diff --git a/ingestion/src/metadata/utils/storage_metadata_config.py b/ingestion/src/metadata/utils/storage_metadata_config.py index 060f9846f42..3eb12a1670c 100644 --- a/ingestion/src/metadata/utils/storage_metadata_config.py +++ b/ingestion/src/metadata/utils/storage_metadata_config.py @@ -20,6 +20,9 @@ import requests from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import ( AzureConfig, ) +from metadata.generated.schema.entity.services.connections.database.datalake.gcsConfig import ( + GCSConfig, +) from metadata.generated.schema.entity.services.connections.database.datalake.s3Config import ( S3Config, ) @@ -29,6 +32,9 @@ from metadata.generated.schema.metadataIngestion.storage.manifestMetadataConfig from metadata.generated.schema.metadataIngestion.storage.storageMetadataADLSConfig import ( StorageMetadataAdlsConfig, ) +from metadata.generated.schema.metadataIngestion.storage.storageMetadataGCSConfig import ( + StorageMetadataGcsConfig, +) from metadata.generated.schema.metadataIngestion.storage.storageMetadataHttpConfig import ( StorageMetadataHttpConfig, ) @@ -39,6 +45,7 @@ from metadata.generated.schema.metadataIngestion.storage.storageMetadataS3Config StorageMetadataS3Config, ) from metadata.readers.file.config_source_factory import get_reader +from metadata.utils.credentials import set_google_credentials from metadata.utils.logger import ometa_logger logger = ometa_logger() @@ -172,5 +179,39 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig: except Exception as exc: logger.debug(traceback.format_exc()) raise StorageMetadataConfigException( - f"Error fetching manifest file from s3: {exc}" + f"Error fetching manifest file from adls: {exc}" + ) + + +@get_manifest.register +def _(config: StorageMetadataGcsConfig) -> ManifestMetadataConfig: + try: + bucket_name, prefix = ( + config.prefixConfig.containerName, + config.prefixConfig.objectPrefix, + ) + + path = ( + f"{prefix}/{STORAGE_METADATA_MANIFEST_FILE_NAME}" + if prefix + else STORAGE_METADATA_MANIFEST_FILE_NAME + ) + + from google.cloud.storage import ( # pylint: disable=import-outside-toplevel + Client, + ) + + set_google_credentials(gcp_credentials=config.securityConfig) + gcs_client = Client() + reader = get_reader( + config_source=GCSConfig(securityConfig=config.securityConfig), + client=gcs_client, + ) + + manifest = reader.read(path=path, bucket_name=bucket_name) + return ManifestMetadataConfig.parse_obj(json.loads(manifest)) + except Exception as exc: + logger.debug(traceback.format_exc()) + raise StorageMetadataConfigException( + f"Error fetching manifest file from gcs: {exc}" ) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java index c7bb53de96f..fb44824840f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java @@ -32,7 +32,7 @@ import org.openmetadata.schema.services.connections.database.TrinoConnection; import org.openmetadata.schema.services.connections.database.datalake.GCSConfig; import org.openmetadata.schema.services.connections.pipeline.AirflowConnection; import org.openmetadata.schema.services.connections.search.ElasticSearchConnection; -import org.openmetadata.schema.services.connections.storage.GcsConnection; +import org.openmetadata.schema.services.connections.storage.GCSConnection; /** Factory class to get a `ClassConverter` based on the service class. */ public final class ClassConverterFactory { @@ -55,7 +55,7 @@ public final class ClassConverterFactory { Map.entry(DbtPipeline.class, new DbtPipelineClassConverter()), Map.entry(GCSConfig.class, new GCPConfigClassConverter()), Map.entry(GCPCredentials.class, new GcpCredentialsClassConverter()), - Map.entry(GcsConnection.class, new GcpConnectionClassConverter()), + Map.entry(GCSConnection.class, new GcpConnectionClassConverter()), Map.entry(ElasticSearchConnection.class, new ElasticSearchConnectionClassConverter()), Map.entry(LookerConnection.class, new LookerConnectionClassConverter()), Map.entry(SSOAuthMechanism.class, new SSOAuthMechanismClassConverter()), diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/GcpConnectionClassConverter.java b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/GcpConnectionClassConverter.java index 0237a3c83e1..e480d6c2e1d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/GcpConnectionClassConverter.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/GcpConnectionClassConverter.java @@ -15,19 +15,19 @@ package org.openmetadata.service.secrets.converter; import java.util.List; import org.openmetadata.schema.security.credentials.GCPCredentials; -import org.openmetadata.schema.services.connections.storage.GcsConnection; +import org.openmetadata.schema.services.connections.storage.GCSConnection; import org.openmetadata.service.util.JsonUtils; -/** Converter class to get an `GcsConnection` object. */ +/** Converter class to get an `GCSConnection` object. */ public class GcpConnectionClassConverter extends ClassConverter { public GcpConnectionClassConverter() { - super(GcsConnection.class); + super(GCSConnection.class); } @Override public Object convert(Object object) { - GcsConnection gcsConnection = (GcsConnection) JsonUtils.convertValue(object, this.clazz); + GCSConnection gcsConnection = (GCSConnection) JsonUtils.convertValue(object, this.clazz); tryToConvertOrFail(gcsConnection.getCredentials(), List.of(GCPCredentials.class)) .ifPresent(obj -> gcsConnection.setCredentials((GCPCredentials) obj)); diff --git a/openmetadata-service/src/main/resources/json/data/testConnections/storage/gcs.json b/openmetadata-service/src/main/resources/json/data/testConnections/storage/gcs.json new file mode 100644 index 00000000000..ad07409324c --- /dev/null +++ b/openmetadata-service/src/main/resources/json/data/testConnections/storage/gcs.json @@ -0,0 +1,35 @@ +{ + "name": "GCS", + "displayName": "GCS Test Connection", + "description": "This Test Connection validates the access against the storage service and basic metadata extraction of objects.", + "steps": [ + { + "name": "ListBuckets", + "description": "List all the buckets available to the user.", + "errorMessage": "Failed to fetch buckets, please validate the credentials if the user has access to list buckets", + "shortCircuit": true, + "mandatory": true + }, + { + "name": "ListBlobs", + "description": "List all the blobs available to the user.", + "errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to list blobs", + "shortCircuit": true, + "mandatory": true + }, + { + "name": "GetBucket", + "description": "Get the bucket available to the user.", + "errorMessage": "Failed to fetch bucket, please validate the credentials if the user has access to get bucket", + "shortCircuit": true, + "mandatory": true + }, + { + "name": "GetBlob", + "description": "Get the blob available to the user.", + "errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to get blob", + "shortCircuit": true, + "mandatory": true + } + ] +} \ No newline at end of file diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/secrets/converter/ClassConverterFactoryTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/secrets/converter/ClassConverterFactoryTest.java index c97ee57f949..fbdcdfa1efd 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/secrets/converter/ClassConverterFactoryTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/secrets/converter/ClassConverterFactoryTest.java @@ -24,7 +24,7 @@ import org.openmetadata.schema.services.connections.database.datalake.GCSConfig; import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection; import org.openmetadata.schema.services.connections.pipeline.AirflowConnection; import org.openmetadata.schema.services.connections.search.ElasticSearchConnection; -import org.openmetadata.schema.services.connections.storage.GcsConnection; +import org.openmetadata.schema.services.connections.storage.GCSConnection; public class ClassConverterFactoryTest { @@ -39,7 +39,7 @@ public class ClassConverterFactoryTest { DbtGCSConfig.class, DbtPipeline.class, GCSConfig.class, - GcsConnection.class, + GCSConnection.class, ElasticSearchConnection.class, LookerConnection.class, OpenMetadataConnection.class, diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/storage/gcsConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/storage/gcsConnection.json index c467955a216..8482e9789e2 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/storage/gcsConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/storage/gcsConnection.json @@ -4,7 +4,7 @@ "title": "GCS Connection", "description": "GCS Connection.", "type": "object", - "javaType": "org.openmetadata.schema.services.connections.storage.GcsConnection", + "javaType": "org.openmetadata.schema.services.connections.storage.GCSConnection", "definitions": { "gcsType": { "description": "Gcs service type", diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/storageService.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/storageService.json index 3ee2b5578b1..acf5bfa8d7b 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/storageService.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/storageService.json @@ -19,6 +19,7 @@ "enum": [ "S3", "ADLS", + "GCS", "CustomStorage" ], "javaEnums": [ @@ -28,6 +29,9 @@ { "name": "ADLS" }, + { + "name": "GCS" + }, { "name": "CustomStorage" } @@ -50,6 +54,9 @@ { "$ref": "connections/storage/adlsConnection.json" }, + { + "$ref": "connections/storage/gcsConnection.json" + }, { "$ref": "connections/storage/customStorageConnection.json" } diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataADLSConfig.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataADLSConfig.json index f2a2d6ab79a..a7731208d0e 100644 --- a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataADLSConfig.json +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataADLSConfig.json @@ -1,5 +1,5 @@ { - "$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataS3Config.json", + "$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataADLSConfig.json", "$schema": "http://json-schema.org/draft-07/schema#", "title": "Storage Metadata ADLS Config", "description": "Storage Metadata Manifest file ADLS path config.", @@ -7,7 +7,7 @@ "type": "object", "properties": { "securityConfig": { - "title": "S3 Security Config", + "title": "ADLS Security Config", "$ref": "../../security/credentials/azureCredentials.json" }, "prefixConfig": { diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataGCSConfig.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataGCSConfig.json new file mode 100644 index 00000000000..0ca626f9548 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storage/storageMetadataGCSConfig.json @@ -0,0 +1,20 @@ +{ + "$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataGCSConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Storage Metadata GCS Config", + "description": "Storage Metadata Manifest file GCS path config.", + "javaType": "org.openmetadata.schema.metadataIngestion.storage.StorageMetadataGCSConfig", + "type": "object", + "properties": { + "securityConfig": { + "title": "GCS Security Config", + "$ref": "../../security/credentials/gcpCredentials.json" + }, + "prefixConfig": { + "title": "Storage Metadata Prefix Config", + "$ref": "./storageBucketDetails.json" + } + }, + "additionalProperties": false, + "required": ["prefixConfig"] +} \ No newline at end of file diff --git a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storageServiceMetadataPipeline.json b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storageServiceMetadataPipeline.json index cd5d5a3ffa5..1fdd3d949eb 100644 --- a/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storageServiceMetadataPipeline.json +++ b/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/storageServiceMetadataPipeline.json @@ -46,6 +46,9 @@ }, { "$ref": "./storage/storageMetadataADLSConfig.json" + }, + { + "$ref": "./storage/storageMetadataGCSConfig.json" } ] },