refactor(ingest) - remove snowflake_common dependency on aws_common (#4054)

Co-authored-by: Shirshanka Das <shirshanka@apache.org>
This commit is contained in:
Aditya Radhakrishnan 2022-02-04 23:24:20 -08:00 committed by GitHub
parent 1a2f75b1e6
commit 61db4ed152
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 22 additions and 20 deletions

View File

@ -102,19 +102,3 @@ class AwsSourceConfig(ConfigModel):
def get_sagemaker_client(self) -> "SageMakerClient": def get_sagemaker_client(self) -> "SageMakerClient":
return self.get_session().client("sagemaker") return self.get_session().client("sagemaker")
def make_s3_urn(s3_uri: str, env: str, suffix: Optional[str] = None) -> str:
if not s3_uri.startswith("s3://"):
raise ValueError("S3 URIs should begin with 's3://'")
# remove S3 prefix (s3://)
s3_name = s3_uri[5:]
if s3_name.endswith("/"):
s3_name = s3_name[:-1]
if suffix is not None:
return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name}_{suffix},{env})"
return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{env})"

View File

@ -15,7 +15,8 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.aws_common import AwsSourceConfig, make_s3_urn from datahub.ingestion.source.aws.aws_common import AwsSourceConfig
from datahub.ingestion.source.aws.s3_util import make_s3_urn
from datahub.metadata.com.linkedin.pegasus2avro.common import Status from datahub.metadata.com.linkedin.pegasus2avro.common import Status
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent

View File

@ -0,0 +1,17 @@
from typing import Optional
def make_s3_urn(s3_uri: str, env: str, suffix: Optional[str] = None) -> str:
if not s3_uri.startswith("s3://"):
raise ValueError("S3 URIs should begin with 's3://'")
# remove S3 prefix (s3://)
s3_name = s3_uri[5:]
if s3_name.endswith("/"):
s3_name = s3_name[:-1]
if suffix is not None:
return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name}_{suffix},{env})"
return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{env})"

View File

@ -19,7 +19,7 @@ from typing import (
from datahub.emitter import mce_builder from datahub.emitter import mce_builder
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.aws_common import make_s3_urn from datahub.ingestion.source.aws.s3_util import make_s3_urn
from datahub.ingestion.source.aws.sagemaker_processors.common import ( from datahub.ingestion.source.aws.sagemaker_processors.common import (
SagemakerSourceReport, SagemakerSourceReport,
) )

View File

@ -35,7 +35,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.aws_common import make_s3_urn from datahub.ingestion.source.aws.s3_util import make_s3_urn
from datahub.ingestion.source.data_lake.config import DataLakeSourceConfig from datahub.ingestion.source.data_lake.config import DataLakeSourceConfig
from datahub.ingestion.source.data_lake.profiling import _SingleTableProfiler from datahub.ingestion.source.data_lake.profiling import _SingleTableProfiler
from datahub.ingestion.source.data_lake.report import DataLakeSourceReport from datahub.ingestion.source.data_lake.report import DataLakeSourceReport

View File

@ -24,7 +24,7 @@ from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.time_window_config import BaseTimeWindowConfig from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.aws_common import make_s3_urn from datahub.ingestion.source.aws.s3_util import make_s3_urn
from datahub.ingestion.source.sql.sql_common import ( from datahub.ingestion.source.sql.sql_common import (
RecordTypeClass, RecordTypeClass,
SQLAlchemyConfig, SQLAlchemyConfig,