From be8c684b3510bb61d4a4ac09c9a3c69614a3fab5 Mon Sep 17 00:00:00 2001 From: Anush Kumar Date: Fri, 29 Aug 2025 11:37:26 -0700 Subject: [PATCH] refactor(ingestion): renamed redshift lineage_v2 to lineage and other v2 nomenclatures (#14603) --- .../src/datahub/ingestion/source/redshift/config.py | 10 ++++++++-- .../source/redshift/{lineage_v2.py => lineage.py} | 4 ++-- .../src/datahub/ingestion/source/redshift/redshift.py | 6 +++--- .../tests/unit/redshift/test_redshift_lineage.py | 8 ++++---- 4 files changed, 17 insertions(+), 11 deletions(-) rename metadata-ingestion/src/datahub/ingestion/source/redshift/{lineage_v2.py => lineage.py} (99%) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index d8d09b71c0..325a57b6bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -9,6 +9,7 @@ from datahub.configuration import ConfigModel from datahub.configuration.common import AllowDenyPattern from datahub.configuration.source_common import DatasetLineageProviderConfigBase from datahub.configuration.validate_field_removal import pydantic_removed_field +from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.ingestion.api.incremental_lineage_helper import ( IncrementalLineageConfigMixin, ) @@ -102,6 +103,11 @@ class RedshiftConfig( _database_alias_removed = pydantic_removed_field("database_alias") _use_lineage_v2_removed = pydantic_removed_field("use_lineage_v2") + _rename_lineage_v2_generate_queries_to_lineage_generate_queries = ( + pydantic_renamed_field( + "lineage_v2_generate_queries", "lineage_generate_queries" + ) + ) default_schema: str = Field( default="public", @@ -113,9 +119,9 @@ class RedshiftConfig( description="Whether target Redshift instance is serverless (alternative is provisioned cluster)", ) - lineage_v2_generate_queries: bool = Field( + lineage_generate_queries: bool = Field( default=True, - description="Whether to generate queries entities for the new SQL-based lineage collector.", + description="Whether to generate queries entities for the SQL-based lineage collector.", ) include_table_lineage: bool = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py similarity index 99% rename from metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py rename to metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index 19acee6129..08e2935b1e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -107,7 +107,7 @@ def parse_alter_table_rename(default_schema: str, query: str) -> Tuple[str, str, return schema, prev_name, new_name -class RedshiftSqlLineageV2(Closeable): +class RedshiftSqlLineage(Closeable): # does lineage and usage based on SQL parsing. def __init__( @@ -131,7 +131,7 @@ class RedshiftSqlLineageV2(Closeable): platform_instance=self.config.platform_instance, env=self.config.env, generate_lineage=True, - generate_queries=self.config.lineage_v2_generate_queries, + generate_queries=self.config.lineage_generate_queries, generate_usage_statistics=False, generate_operations=False, usage_config=self.config, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index db5dc69cb2..2975ec0c88 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -51,7 +51,7 @@ from datahub.ingestion.source.common.subtypes import ( from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper from datahub.ingestion.source.redshift.exception import handle_redshift_exceptions_yield -from datahub.ingestion.source.redshift.lineage_v2 import RedshiftSqlLineageV2 +from datahub.ingestion.source.redshift.lineage import RedshiftSqlLineage from datahub.ingestion.source.redshift.profile import RedshiftProfiler from datahub.ingestion.source.redshift.redshift_data_reader import RedshiftDataReader from datahub.ingestion.source.redshift.redshift_schema import ( @@ -419,7 +419,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): memory_footprint.total_size(self.db_views) ) - with RedshiftSqlLineageV2( + with RedshiftSqlLineage( config=self.config, report=self.report, context=self.ctx, @@ -953,7 +953,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): self, connection: redshift_connector.Connection, database: str, - lineage_extractor: RedshiftSqlLineageV2, + lineage_extractor: RedshiftSqlLineage, ) -> Iterable[MetadataWorkUnit]: if self.config.include_share_lineage: outbound_shares = self.data_dictionary.get_outbound_datashares(connection) diff --git a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py index 1493e6bf28..a610e1f749 100644 --- a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py +++ b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py @@ -7,10 +7,10 @@ import pytest from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.redshift.config import RedshiftConfig -from datahub.ingestion.source.redshift.lineage_v2 import ( +from datahub.ingestion.source.redshift.lineage import ( LineageCollectorType, LineageDatasetPlatform, - RedshiftSqlLineageV2, + RedshiftSqlLineage, parse_alter_table_rename, ) from datahub.ingestion.source.redshift.redshift_schema import ( @@ -132,7 +132,7 @@ def test_parse_alter_table_rename(): ) -def get_lineage_extractor() -> RedshiftSqlLineageV2: +def get_lineage_extractor() -> RedshiftSqlLineage: config = RedshiftConfig( host_port="localhost:5439", database="test", @@ -142,7 +142,7 @@ def get_lineage_extractor() -> RedshiftSqlLineageV2: ) report = RedshiftReport() - lineage_extractor = RedshiftSqlLineageV2( + lineage_extractor = RedshiftSqlLineage( config, report, PipelineContext(run_id="foo"), config.database )