#11890: Add GCS Json schema changes (#14559)

* Issue-11890: Add GCS Json schema changes

* Modify Gcs Security config

* Add Test Def for gcs

* Add gcs for storage_metadata_config
This commit is contained in:
Ayush Shah 2024-01-07 08:57:36 +05:30 committed by GitHub
parent 07e9c1a55d
commit d513dc4d4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 118 additions and 12 deletions

View File

@ -20,6 +20,9 @@ import requests
from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
AzureConfig,
)
from metadata.generated.schema.entity.services.connections.database.datalake.gcsConfig import (
GCSConfig,
)
from metadata.generated.schema.entity.services.connections.database.datalake.s3Config import (
S3Config,
)
@ -29,6 +32,9 @@ from metadata.generated.schema.metadataIngestion.storage.manifestMetadataConfig
from metadata.generated.schema.metadataIngestion.storage.storageMetadataADLSConfig import (
StorageMetadataAdlsConfig,
)
from metadata.generated.schema.metadataIngestion.storage.storageMetadataGCSConfig import (
StorageMetadataGcsConfig,
)
from metadata.generated.schema.metadataIngestion.storage.storageMetadataHttpConfig import (
StorageMetadataHttpConfig,
)
@ -39,6 +45,7 @@ from metadata.generated.schema.metadataIngestion.storage.storageMetadataS3Config
StorageMetadataS3Config,
)
from metadata.readers.file.config_source_factory import get_reader
from metadata.utils.credentials import set_google_credentials
from metadata.utils.logger import ometa_logger
logger = ometa_logger()
@ -172,5 +179,39 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig:
except Exception as exc:
logger.debug(traceback.format_exc())
raise StorageMetadataConfigException(
f"Error fetching manifest file from s3: {exc}"
f"Error fetching manifest file from adls: {exc}"
)
@get_manifest.register
def _(config: StorageMetadataGcsConfig) -> ManifestMetadataConfig:
try:
bucket_name, prefix = (
config.prefixConfig.containerName,
config.prefixConfig.objectPrefix,
)
path = (
f"{prefix}/{STORAGE_METADATA_MANIFEST_FILE_NAME}"
if prefix
else STORAGE_METADATA_MANIFEST_FILE_NAME
)
from google.cloud.storage import ( # pylint: disable=import-outside-toplevel
Client,
)
set_google_credentials(gcp_credentials=config.securityConfig)
gcs_client = Client()
reader = get_reader(
config_source=GCSConfig(securityConfig=config.securityConfig),
client=gcs_client,
)
manifest = reader.read(path=path, bucket_name=bucket_name)
return ManifestMetadataConfig.parse_obj(json.loads(manifest))
except Exception as exc:
logger.debug(traceback.format_exc())
raise StorageMetadataConfigException(
f"Error fetching manifest file from gcs: {exc}"
)

View File

@ -32,7 +32,7 @@ import org.openmetadata.schema.services.connections.database.TrinoConnection;
import org.openmetadata.schema.services.connections.database.datalake.GCSConfig;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.search.ElasticSearchConnection;
import org.openmetadata.schema.services.connections.storage.GcsConnection;
import org.openmetadata.schema.services.connections.storage.GCSConnection;
/** Factory class to get a `ClassConverter` based on the service class. */
public final class ClassConverterFactory {
@ -55,7 +55,7 @@ public final class ClassConverterFactory {
Map.entry(DbtPipeline.class, new DbtPipelineClassConverter()),
Map.entry(GCSConfig.class, new GCPConfigClassConverter()),
Map.entry(GCPCredentials.class, new GcpCredentialsClassConverter()),
Map.entry(GcsConnection.class, new GcpConnectionClassConverter()),
Map.entry(GCSConnection.class, new GcpConnectionClassConverter()),
Map.entry(ElasticSearchConnection.class, new ElasticSearchConnectionClassConverter()),
Map.entry(LookerConnection.class, new LookerConnectionClassConverter()),
Map.entry(SSOAuthMechanism.class, new SSOAuthMechanismClassConverter()),

View File

@ -15,19 +15,19 @@ package org.openmetadata.service.secrets.converter;
import java.util.List;
import org.openmetadata.schema.security.credentials.GCPCredentials;
import org.openmetadata.schema.services.connections.storage.GcsConnection;
import org.openmetadata.schema.services.connections.storage.GCSConnection;
import org.openmetadata.service.util.JsonUtils;
/** Converter class to get an `GcsConnection` object. */
/** Converter class to get an `GCSConnection` object. */
public class GcpConnectionClassConverter extends ClassConverter {
public GcpConnectionClassConverter() {
super(GcsConnection.class);
super(GCSConnection.class);
}
@Override
public Object convert(Object object) {
GcsConnection gcsConnection = (GcsConnection) JsonUtils.convertValue(object, this.clazz);
GCSConnection gcsConnection = (GCSConnection) JsonUtils.convertValue(object, this.clazz);
tryToConvertOrFail(gcsConnection.getCredentials(), List.of(GCPCredentials.class))
.ifPresent(obj -> gcsConnection.setCredentials((GCPCredentials) obj));

View File

@ -0,0 +1,35 @@
{
"name": "GCS",
"displayName": "GCS Test Connection",
"description": "This Test Connection validates the access against the storage service and basic metadata extraction of objects.",
"steps": [
{
"name": "ListBuckets",
"description": "List all the buckets available to the user.",
"errorMessage": "Failed to fetch buckets, please validate the credentials if the user has access to list buckets",
"shortCircuit": true,
"mandatory": true
},
{
"name": "ListBlobs",
"description": "List all the blobs available to the user.",
"errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to list blobs",
"shortCircuit": true,
"mandatory": true
},
{
"name": "GetBucket",
"description": "Get the bucket available to the user.",
"errorMessage": "Failed to fetch bucket, please validate the credentials if the user has access to get bucket",
"shortCircuit": true,
"mandatory": true
},
{
"name": "GetBlob",
"description": "Get the blob available to the user.",
"errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to get blob",
"shortCircuit": true,
"mandatory": true
}
]
}

View File

@ -24,7 +24,7 @@ import org.openmetadata.schema.services.connections.database.datalake.GCSConfig;
import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.search.ElasticSearchConnection;
import org.openmetadata.schema.services.connections.storage.GcsConnection;
import org.openmetadata.schema.services.connections.storage.GCSConnection;
public class ClassConverterFactoryTest {
@ -39,7 +39,7 @@ public class ClassConverterFactoryTest {
DbtGCSConfig.class,
DbtPipeline.class,
GCSConfig.class,
GcsConnection.class,
GCSConnection.class,
ElasticSearchConnection.class,
LookerConnection.class,
OpenMetadataConnection.class,

View File

@ -4,7 +4,7 @@
"title": "GCS Connection",
"description": "GCS Connection.",
"type": "object",
"javaType": "org.openmetadata.schema.services.connections.storage.GcsConnection",
"javaType": "org.openmetadata.schema.services.connections.storage.GCSConnection",
"definitions": {
"gcsType": {
"description": "Gcs service type",

View File

@ -19,6 +19,7 @@
"enum": [
"S3",
"ADLS",
"GCS",
"CustomStorage"
],
"javaEnums": [
@ -28,6 +29,9 @@
{
"name": "ADLS"
},
{
"name": "GCS"
},
{
"name": "CustomStorage"
}
@ -50,6 +54,9 @@
{
"$ref": "connections/storage/adlsConnection.json"
},
{
"$ref": "connections/storage/gcsConnection.json"
},
{
"$ref": "connections/storage/customStorageConnection.json"
}

View File

@ -1,5 +1,5 @@
{
"$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataS3Config.json",
"$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataADLSConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Storage Metadata ADLS Config",
"description": "Storage Metadata Manifest file ADLS path config.",
@ -7,7 +7,7 @@
"type": "object",
"properties": {
"securityConfig": {
"title": "S3 Security Config",
"title": "ADLS Security Config",
"$ref": "../../security/credentials/azureCredentials.json"
},
"prefixConfig": {

View File

@ -0,0 +1,20 @@
{
"$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataGCSConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Storage Metadata GCS Config",
"description": "Storage Metadata Manifest file GCS path config.",
"javaType": "org.openmetadata.schema.metadataIngestion.storage.StorageMetadataGCSConfig",
"type": "object",
"properties": {
"securityConfig": {
"title": "GCS Security Config",
"$ref": "../../security/credentials/gcpCredentials.json"
},
"prefixConfig": {
"title": "Storage Metadata Prefix Config",
"$ref": "./storageBucketDetails.json"
}
},
"additionalProperties": false,
"required": ["prefixConfig"]
}

View File

@ -46,6 +46,9 @@
},
{
"$ref": "./storage/storageMetadataADLSConfig.json"
},
{
"$ref": "./storage/storageMetadataGCSConfig.json"
}
]
},