#11890: Add GCS Json schema changes (#14559)

* Issue-11890: Add GCS Json schema changes

* Modify Gcs Security config

* Add Test Def for gcs

* Add gcs for storage_metadata_config
This commit is contained in:
Ayush Shah 2024-01-07 08:57:36 +05:30 committed by GitHub
parent 07e9c1a55d
commit d513dc4d4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 118 additions and 12 deletions

View File

@ -20,6 +20,9 @@ import requests
from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import ( from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
AzureConfig, AzureConfig,
) )
from metadata.generated.schema.entity.services.connections.database.datalake.gcsConfig import (
GCSConfig,
)
from metadata.generated.schema.entity.services.connections.database.datalake.s3Config import ( from metadata.generated.schema.entity.services.connections.database.datalake.s3Config import (
S3Config, S3Config,
) )
@ -29,6 +32,9 @@ from metadata.generated.schema.metadataIngestion.storage.manifestMetadataConfig
from metadata.generated.schema.metadataIngestion.storage.storageMetadataADLSConfig import ( from metadata.generated.schema.metadataIngestion.storage.storageMetadataADLSConfig import (
StorageMetadataAdlsConfig, StorageMetadataAdlsConfig,
) )
from metadata.generated.schema.metadataIngestion.storage.storageMetadataGCSConfig import (
StorageMetadataGcsConfig,
)
from metadata.generated.schema.metadataIngestion.storage.storageMetadataHttpConfig import ( from metadata.generated.schema.metadataIngestion.storage.storageMetadataHttpConfig import (
StorageMetadataHttpConfig, StorageMetadataHttpConfig,
) )
@ -39,6 +45,7 @@ from metadata.generated.schema.metadataIngestion.storage.storageMetadataS3Config
StorageMetadataS3Config, StorageMetadataS3Config,
) )
from metadata.readers.file.config_source_factory import get_reader from metadata.readers.file.config_source_factory import get_reader
from metadata.utils.credentials import set_google_credentials
from metadata.utils.logger import ometa_logger from metadata.utils.logger import ometa_logger
logger = ometa_logger() logger = ometa_logger()
@ -172,5 +179,39 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig:
except Exception as exc: except Exception as exc:
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())
raise StorageMetadataConfigException( raise StorageMetadataConfigException(
f"Error fetching manifest file from s3: {exc}" f"Error fetching manifest file from adls: {exc}"
)
@get_manifest.register
def _(config: StorageMetadataGcsConfig) -> ManifestMetadataConfig:
try:
bucket_name, prefix = (
config.prefixConfig.containerName,
config.prefixConfig.objectPrefix,
)
path = (
f"{prefix}/{STORAGE_METADATA_MANIFEST_FILE_NAME}"
if prefix
else STORAGE_METADATA_MANIFEST_FILE_NAME
)
from google.cloud.storage import ( # pylint: disable=import-outside-toplevel
Client,
)
set_google_credentials(gcp_credentials=config.securityConfig)
gcs_client = Client()
reader = get_reader(
config_source=GCSConfig(securityConfig=config.securityConfig),
client=gcs_client,
)
manifest = reader.read(path=path, bucket_name=bucket_name)
return ManifestMetadataConfig.parse_obj(json.loads(manifest))
except Exception as exc:
logger.debug(traceback.format_exc())
raise StorageMetadataConfigException(
f"Error fetching manifest file from gcs: {exc}"
) )

View File

@ -32,7 +32,7 @@ import org.openmetadata.schema.services.connections.database.TrinoConnection;
import org.openmetadata.schema.services.connections.database.datalake.GCSConfig; import org.openmetadata.schema.services.connections.database.datalake.GCSConfig;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection; import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.search.ElasticSearchConnection; import org.openmetadata.schema.services.connections.search.ElasticSearchConnection;
import org.openmetadata.schema.services.connections.storage.GcsConnection; import org.openmetadata.schema.services.connections.storage.GCSConnection;
/** Factory class to get a `ClassConverter` based on the service class. */ /** Factory class to get a `ClassConverter` based on the service class. */
public final class ClassConverterFactory { public final class ClassConverterFactory {
@ -55,7 +55,7 @@ public final class ClassConverterFactory {
Map.entry(DbtPipeline.class, new DbtPipelineClassConverter()), Map.entry(DbtPipeline.class, new DbtPipelineClassConverter()),
Map.entry(GCSConfig.class, new GCPConfigClassConverter()), Map.entry(GCSConfig.class, new GCPConfigClassConverter()),
Map.entry(GCPCredentials.class, new GcpCredentialsClassConverter()), Map.entry(GCPCredentials.class, new GcpCredentialsClassConverter()),
Map.entry(GcsConnection.class, new GcpConnectionClassConverter()), Map.entry(GCSConnection.class, new GcpConnectionClassConverter()),
Map.entry(ElasticSearchConnection.class, new ElasticSearchConnectionClassConverter()), Map.entry(ElasticSearchConnection.class, new ElasticSearchConnectionClassConverter()),
Map.entry(LookerConnection.class, new LookerConnectionClassConverter()), Map.entry(LookerConnection.class, new LookerConnectionClassConverter()),
Map.entry(SSOAuthMechanism.class, new SSOAuthMechanismClassConverter()), Map.entry(SSOAuthMechanism.class, new SSOAuthMechanismClassConverter()),

View File

@ -15,19 +15,19 @@ package org.openmetadata.service.secrets.converter;
import java.util.List; import java.util.List;
import org.openmetadata.schema.security.credentials.GCPCredentials; import org.openmetadata.schema.security.credentials.GCPCredentials;
import org.openmetadata.schema.services.connections.storage.GcsConnection; import org.openmetadata.schema.services.connections.storage.GCSConnection;
import org.openmetadata.service.util.JsonUtils; import org.openmetadata.service.util.JsonUtils;
/** Converter class to get an `GcsConnection` object. */ /** Converter class to get an `GCSConnection` object. */
public class GcpConnectionClassConverter extends ClassConverter { public class GcpConnectionClassConverter extends ClassConverter {
public GcpConnectionClassConverter() { public GcpConnectionClassConverter() {
super(GcsConnection.class); super(GCSConnection.class);
} }
@Override @Override
public Object convert(Object object) { public Object convert(Object object) {
GcsConnection gcsConnection = (GcsConnection) JsonUtils.convertValue(object, this.clazz); GCSConnection gcsConnection = (GCSConnection) JsonUtils.convertValue(object, this.clazz);
tryToConvertOrFail(gcsConnection.getCredentials(), List.of(GCPCredentials.class)) tryToConvertOrFail(gcsConnection.getCredentials(), List.of(GCPCredentials.class))
.ifPresent(obj -> gcsConnection.setCredentials((GCPCredentials) obj)); .ifPresent(obj -> gcsConnection.setCredentials((GCPCredentials) obj));

View File

@ -0,0 +1,35 @@
{
"name": "GCS",
"displayName": "GCS Test Connection",
"description": "This Test Connection validates the access against the storage service and basic metadata extraction of objects.",
"steps": [
{
"name": "ListBuckets",
"description": "List all the buckets available to the user.",
"errorMessage": "Failed to fetch buckets, please validate the credentials if the user has access to list buckets",
"shortCircuit": true,
"mandatory": true
},
{
"name": "ListBlobs",
"description": "List all the blobs available to the user.",
"errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to list blobs",
"shortCircuit": true,
"mandatory": true
},
{
"name": "GetBucket",
"description": "Get the bucket available to the user.",
"errorMessage": "Failed to fetch bucket, please validate the credentials if the user has access to get bucket",
"shortCircuit": true,
"mandatory": true
},
{
"name": "GetBlob",
"description": "Get the blob available to the user.",
"errorMessage": "Failed to fetch blobs, please validate the credentials if the user has access to get blob",
"shortCircuit": true,
"mandatory": true
}
]
}

View File

@ -24,7 +24,7 @@ import org.openmetadata.schema.services.connections.database.datalake.GCSConfig;
import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection; import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection; import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.search.ElasticSearchConnection; import org.openmetadata.schema.services.connections.search.ElasticSearchConnection;
import org.openmetadata.schema.services.connections.storage.GcsConnection; import org.openmetadata.schema.services.connections.storage.GCSConnection;
public class ClassConverterFactoryTest { public class ClassConverterFactoryTest {
@ -39,7 +39,7 @@ public class ClassConverterFactoryTest {
DbtGCSConfig.class, DbtGCSConfig.class,
DbtPipeline.class, DbtPipeline.class,
GCSConfig.class, GCSConfig.class,
GcsConnection.class, GCSConnection.class,
ElasticSearchConnection.class, ElasticSearchConnection.class,
LookerConnection.class, LookerConnection.class,
OpenMetadataConnection.class, OpenMetadataConnection.class,

View File

@ -4,7 +4,7 @@
"title": "GCS Connection", "title": "GCS Connection",
"description": "GCS Connection.", "description": "GCS Connection.",
"type": "object", "type": "object",
"javaType": "org.openmetadata.schema.services.connections.storage.GcsConnection", "javaType": "org.openmetadata.schema.services.connections.storage.GCSConnection",
"definitions": { "definitions": {
"gcsType": { "gcsType": {
"description": "Gcs service type", "description": "Gcs service type",

View File

@ -19,6 +19,7 @@
"enum": [ "enum": [
"S3", "S3",
"ADLS", "ADLS",
"GCS",
"CustomStorage" "CustomStorage"
], ],
"javaEnums": [ "javaEnums": [
@ -28,6 +29,9 @@
{ {
"name": "ADLS" "name": "ADLS"
}, },
{
"name": "GCS"
},
{ {
"name": "CustomStorage" "name": "CustomStorage"
} }
@ -50,6 +54,9 @@
{ {
"$ref": "connections/storage/adlsConnection.json" "$ref": "connections/storage/adlsConnection.json"
}, },
{
"$ref": "connections/storage/gcsConnection.json"
},
{ {
"$ref": "connections/storage/customStorageConnection.json" "$ref": "connections/storage/customStorageConnection.json"
} }

View File

@ -1,5 +1,5 @@
{ {
"$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataS3Config.json", "$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataADLSConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#", "$schema": "http://json-schema.org/draft-07/schema#",
"title": "Storage Metadata ADLS Config", "title": "Storage Metadata ADLS Config",
"description": "Storage Metadata Manifest file ADLS path config.", "description": "Storage Metadata Manifest file ADLS path config.",
@ -7,7 +7,7 @@
"type": "object", "type": "object",
"properties": { "properties": {
"securityConfig": { "securityConfig": {
"title": "S3 Security Config", "title": "ADLS Security Config",
"$ref": "../../security/credentials/azureCredentials.json" "$ref": "../../security/credentials/azureCredentials.json"
}, },
"prefixConfig": { "prefixConfig": {

View File

@ -0,0 +1,20 @@
{
"$id": "https://open-metadata.org/schema/metadataIngestion/storage/storageMetadataGCSConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Storage Metadata GCS Config",
"description": "Storage Metadata Manifest file GCS path config.",
"javaType": "org.openmetadata.schema.metadataIngestion.storage.StorageMetadataGCSConfig",
"type": "object",
"properties": {
"securityConfig": {
"title": "GCS Security Config",
"$ref": "../../security/credentials/gcpCredentials.json"
},
"prefixConfig": {
"title": "Storage Metadata Prefix Config",
"$ref": "./storageBucketDetails.json"
}
},
"additionalProperties": false,
"required": ["prefixConfig"]
}

View File

@ -46,6 +46,9 @@
}, },
{ {
"$ref": "./storage/storageMetadataADLSConfig.json" "$ref": "./storage/storageMetadataADLSConfig.json"
},
{
"$ref": "./storage/storageMetadataGCSConfig.json"
} }
] ]
}, },