refactor(ingestion): renamed redshift lineage_v2 to lineage and other v2 nomenclatures (#14603)

This commit is contained in:
Anush Kumar 2025-08-29 11:37:26 -07:00 committed by GitHub
parent 8c6daf3bec
commit be8c684b35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 17 additions and 11 deletions

View File

@ -9,6 +9,7 @@ from datahub.configuration import ConfigModel
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.api.incremental_lineage_helper import (
IncrementalLineageConfigMixin,
)
@ -102,6 +103,11 @@ class RedshiftConfig(
_database_alias_removed = pydantic_removed_field("database_alias")
_use_lineage_v2_removed = pydantic_removed_field("use_lineage_v2")
_rename_lineage_v2_generate_queries_to_lineage_generate_queries = (
pydantic_renamed_field(
"lineage_v2_generate_queries", "lineage_generate_queries"
)
)
default_schema: str = Field(
default="public",
@ -113,9 +119,9 @@ class RedshiftConfig(
description="Whether target Redshift instance is serverless (alternative is provisioned cluster)",
)
lineage_v2_generate_queries: bool = Field(
lineage_generate_queries: bool = Field(
default=True,
description="Whether to generate queries entities for the new SQL-based lineage collector.",
description="Whether to generate queries entities for the SQL-based lineage collector.",
)
include_table_lineage: bool = Field(

View File

@ -107,7 +107,7 @@ def parse_alter_table_rename(default_schema: str, query: str) -> Tuple[str, str,
return schema, prev_name, new_name
class RedshiftSqlLineageV2(Closeable):
class RedshiftSqlLineage(Closeable):
# does lineage and usage based on SQL parsing.
def __init__(
@ -131,7 +131,7 @@ class RedshiftSqlLineageV2(Closeable):
platform_instance=self.config.platform_instance,
env=self.config.env,
generate_lineage=True,
generate_queries=self.config.lineage_v2_generate_queries,
generate_queries=self.config.lineage_generate_queries,
generate_usage_statistics=False,
generate_operations=False,
usage_config=self.config,

View File

@ -51,7 +51,7 @@ from datahub.ingestion.source.common.subtypes import (
from datahub.ingestion.source.redshift.config import RedshiftConfig
from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper
from datahub.ingestion.source.redshift.exception import handle_redshift_exceptions_yield
from datahub.ingestion.source.redshift.lineage_v2 import RedshiftSqlLineageV2
from datahub.ingestion.source.redshift.lineage import RedshiftSqlLineage
from datahub.ingestion.source.redshift.profile import RedshiftProfiler
from datahub.ingestion.source.redshift.redshift_data_reader import RedshiftDataReader
from datahub.ingestion.source.redshift.redshift_schema import (
@ -419,7 +419,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
memory_footprint.total_size(self.db_views)
)
with RedshiftSqlLineageV2(
with RedshiftSqlLineage(
config=self.config,
report=self.report,
context=self.ctx,
@ -953,7 +953,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
self,
connection: redshift_connector.Connection,
database: str,
lineage_extractor: RedshiftSqlLineageV2,
lineage_extractor: RedshiftSqlLineage,
) -> Iterable[MetadataWorkUnit]:
if self.config.include_share_lineage:
outbound_shares = self.data_dictionary.get_outbound_datashares(connection)

View File

@ -7,10 +7,10 @@ import pytest
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.source.redshift.config import RedshiftConfig
from datahub.ingestion.source.redshift.lineage_v2 import (
from datahub.ingestion.source.redshift.lineage import (
LineageCollectorType,
LineageDatasetPlatform,
RedshiftSqlLineageV2,
RedshiftSqlLineage,
parse_alter_table_rename,
)
from datahub.ingestion.source.redshift.redshift_schema import (
@ -132,7 +132,7 @@ def test_parse_alter_table_rename():
)
def get_lineage_extractor() -> RedshiftSqlLineageV2:
def get_lineage_extractor() -> RedshiftSqlLineage:
config = RedshiftConfig(
host_port="localhost:5439",
database="test",
@ -142,7 +142,7 @@ def get_lineage_extractor() -> RedshiftSqlLineageV2:
)
report = RedshiftReport()
lineage_extractor = RedshiftSqlLineageV2(
lineage_extractor = RedshiftSqlLineage(
config, report, PipelineContext(run_id="foo"), config.database
)