From 404cc67911cbb15fa38e7bf9e9bdf1527df64381 Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Wed, 24 Aug 2022 17:51:52 +0530 Subject: [PATCH] WIP - Fix #6744: allow more than one metadata ingestion workflow (#6831) * Fix #6744: allow more than one metadata ingestion workflow * Rename to markDeletedTablesFromFilterOnly * ui support for new field markDeletedTablesFromFilterOnly Co-authored-by: Chirag Madlani <12962843+chirag-madlani@users.noreply.github.com> --- .../database/oracleConnection.json | 1 + .../databaseServiceMetadataPipeline.json | 5 ++++ .../source/database/common_db_source.py | 27 +++++++++-------- .../source/database/database_service.py | 30 ++++++++++++++----- .../AddIngestion/AddIngestion.component.tsx | 14 +++++++++ .../AddIngestion/Steps/ConfigureIngestion.tsx | 23 ++++++++++++++ .../AddIngestion/addIngestion.interface.ts | 2 ++ 7 files changed, 81 insertions(+), 21 deletions(-) diff --git a/catalog-rest-service/src/main/resources/json/schema/entity/services/connections/database/oracleConnection.json b/catalog-rest-service/src/main/resources/json/schema/entity/services/connections/database/oracleConnection.json index 31162976f77..d2881198a86 100644 --- a/catalog-rest-service/src/main/resources/json/schema/entity/services/connections/database/oracleConnection.json +++ b/catalog-rest-service/src/main/resources/json/schema/entity/services/connections/database/oracleConnection.json @@ -72,6 +72,7 @@ }, "oracleConnectionType": { "title": "Oracle Connection Type", + "type": "object", "description": "Connect with oracle by either passing service name or database schema name.", "oneOf": [ { diff --git a/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json b/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json index 070fbf0954b..f5a22fd880f 100644 --- a/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json +++ b/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json @@ -126,6 +126,11 @@ "type": "boolean", "default": true }, + "markDeletedTablesFromFilterOnly": { + "description": "Optional configuration to mark deleted tables only to the filtered schema", + "type": "boolean", + "default": false + }, "includeTables": { "description": "Optional configuration to turn off fetching metadata for tables.", "type": "boolean", diff --git a/ingestion/src/metadata/ingestion/source/database/common_db_source.py b/ingestion/src/metadata/ingestion/source/database/common_db_source.py index 1cb092f32d6..e8bff620ce1 100644 --- a/ingestion/src/metadata/ingestion/source/database/common_db_source.py +++ b/ingestion/src/metadata/ingestion/source/database/common_db_source.py @@ -137,23 +137,24 @@ class CommonDbSourceService( ), ) + def get_raw_database_schema_names(self) -> Iterable[str]: + if self.service_connection.__dict__.get("databaseSchema"): + yield self.service_connection.databaseSchema + else: + for schema_name in self.inspector.get_schema_names(): + yield schema_name + def get_database_schema_names(self) -> Iterable[str]: """ return schema names """ - if self.service_connection.__dict__.get("databaseSchema"): - yield self.service_connection.databaseSchema - - else: - for schema_name in self.inspector.get_schema_names(): - - if filter_by_schema( - self.source_config.schemaFilterPattern, schema_name=schema_name - ): - self.status.filter(schema_name, "Schema pattern not allowed") - continue - - yield schema_name + for schema_name in self.get_raw_database_schema_names(): + if filter_by_schema( + self.source_config.schemaFilterPattern, schema_name=schema_name + ): + self.status.filter(schema_name, "Schema pattern not allowed") + continue + yield schema_name def yield_database_schema( self, schema_name: str diff --git a/ingestion/src/metadata/ingestion/source/database/database_service.py b/ingestion/src/metadata/ingestion/source/database/database_service.py index 787c7e6cf19..01ffd3edf68 100644 --- a/ingestion/src/metadata/ingestion/source/database/database_service.py +++ b/ingestion/src/metadata/ingestion/source/database/database_service.py @@ -337,6 +337,12 @@ class DatabaseServiceSource(DBTMixin, TopologyRunnerMixin, Source, ABC): """ return + def get_raw_database_schema_names(self) -> Iterable[str]: + """ + fetch all schema names without any filtering. + """ + yield from self.get_database_schema_names() + def yield_datamodel( self, table_name_and_type: Tuple[str, TableType] ) -> Iterable[DataModelLink]: @@ -440,12 +446,12 @@ class DatabaseServiceSource(DBTMixin, TopologyRunnerMixin, Source, ABC): self.database_source_state.add(table_fqn) self.status.scanned(table_fqn) - def delete_database_tables(self, database_fqn: str) -> Iterable[DeleteTable]: + def delete_schema_tables(self, schema_fqn: str) -> Iterable[DeleteTable]: """ Returns Deleted tables """ database_state = self.metadata.list_all_entities( - entity=Table, params={"database": database_fqn} + entity=Table, params={"database": schema_fqn} ) for table in database_state: if str(table.fullyQualifiedName.__root__) not in self.database_source_state: @@ -459,10 +465,18 @@ class DatabaseServiceSource(DBTMixin, TopologyRunnerMixin, Source, ABC): logger.info( f"Mark Deleted Tables set to True. Processing database [{self.context.database.name.__root__}]" ) - databse_fqn = fqn.build( - self.metadata, - entity_type=Database, - service_name=self.config.serviceName, - database_name=self.context.database.name.__root__, + schema_names_list = ( + self.get_database_schema_names() + if self.source_config.markDeletedTablesFromFilterOnly + else self.get_raw_database_schema_names() ) - yield from self.delete_database_tables(databse_fqn) + for schema_name in schema_names_list: + schema_fqn = fqn.build( + self.metadata, + entity_type=DatabaseSchema, + service_name=self.config.serviceName, + database_name=self.context.database.name.__root__, + schema_name=schema_name, + ) + + yield from self.delete_schema_tables(schema_fqn) diff --git a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/AddIngestion.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/AddIngestion.component.tsx index 836e29cfe44..74123540cd5 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/AddIngestion.component.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/AddIngestion.component.tsx @@ -154,6 +154,15 @@ const AddIngestion = ({ ) : undefined ); + const [markDeletedTablesFromFilterOnly, setMarkDeletedTablesFromFilterOnly] = + useState( + isDatabaseService + ? Boolean( + (data?.sourceConfig.config as ConfigClass) + ?.markDeletedTablesFromFilterOnly ?? false + ) + : undefined + ); const [includeView, setIncludeView] = useState( Boolean((data?.sourceConfig.config as ConfigClass)?.includeViews) ); @@ -400,6 +409,7 @@ const AddIngestion = ({ showTableFilter ), markDeletedTables, + markDeletedTablesFromFilterOnly, ...DatabaseConfigData, type: ConfigType.DatabaseMetadata, }; @@ -636,6 +646,9 @@ const AddIngestion = ({ handleIngestSampleData={() => setIngestSampleData((pre) => !pre)} handleIngestionName={(val) => setIngestionName(val)} handleMarkDeletedTables={() => setMarkDeletedTables((pre) => !pre)} + handleMarkDeletedTablesFromFilterOnly={() => + setMarkDeletedTablesFromFilterOnly((pre) => !pre) + } handleProfileSample={(val) => setProfileSample(val)} handleQueryLogDuration={(val) => setQueryLogDuration(val)} handleResultLimit={setResultLimit} @@ -648,6 +661,7 @@ const AddIngestion = ({ ingestSampleData={ingestSampleData} ingestionName={ingestionName} markDeletedTables={markDeletedTables} + markDeletedTablesFromFilterOnly={markDeletedTablesFromFilterOnly} pipelineFilterPattern={pipelineFilterPattern} pipelineType={pipelineType} profileSample={profileSample} diff --git a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/Steps/ConfigureIngestion.tsx b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/Steps/ConfigureIngestion.tsx index b2685daac16..435d3ef297d 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/Steps/ConfigureIngestion.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/Steps/ConfigureIngestion.tsx @@ -40,6 +40,7 @@ const ConfigureIngestion = ({ includeView, includeTags, markDeletedTables, + markDeletedTablesFromFilterOnly, serviceCategory, pipelineType, showDatabaseFilter, @@ -66,6 +67,7 @@ const ConfigureIngestion = ({ handleIncludeView, handleIncludeTags, handleMarkDeletedTables, + handleMarkDeletedTablesFromFilterOnly, handleIngestSampleData, handleDatasetServiceName, handleQueryLogDuration, @@ -228,6 +230,27 @@ const ConfigureIngestion = ({ {getSeparator('')} )} + {!isNil(markDeletedTablesFromFilterOnly) && ( + +
+ + { + if (handleMarkDeletedTablesFromFilterOnly) { + handleMarkDeletedTablesFromFilterOnly(); + } + }} + testId="mark-deleted-filter-only" + /> +
+

+ Optional configuration to mark deleted tables only to the + filtered schema +

+ {getSeparator('')} +
+ )} ); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/addIngestion.interface.ts b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/addIngestion.interface.ts index f556986d177..1e42c9cce33 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/addIngestion.interface.ts +++ b/openmetadata-ui/src/main/resources/ui/src/components/AddIngestion/addIngestion.interface.ts @@ -70,6 +70,7 @@ export interface ConfigureIngestionProps { includeView: boolean; includeTags: boolean; markDeletedTables?: boolean; + markDeletedTablesFromFilterOnly?: boolean; enableDebugLog: boolean; profileSample?: number; ingestSampleData: boolean; @@ -92,6 +93,7 @@ export interface ConfigureIngestionProps { handleIncludeView: () => void; handleIncludeTags: () => void; handleMarkDeletedTables?: () => void; + handleMarkDeletedTablesFromFilterOnly?: () => void; handleEnableDebugLog: () => void; handleIngestSampleData: () => void; getIncludeValue: (value: string[], type: FilterPatternEnum) => void;