From e3d1a95d2c353d5af84bea74ff3666e0e552d345 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 14 Dec 2021 08:50:05 -0800 Subject: [PATCH] Fix #1737: Add separate filter patterns for database/schema and tables (#1739) * Fix #1737: Add separate filter patterns for database/schema and tables --- ingestion/examples/workflows/bigquery.json | 2 +- ingestion/examples/workflows/mssql.json | 2 +- ingestion/examples/workflows/mysql.json | 4 ++-- ingestion/examples/workflows/redshift.json | 2 +- ingestion/examples/workflows/snowflake.json | 2 +- ingestion/examples/workflows/vertica.json | 2 +- .../src/metadata/ingestion/sink/metadata_rest.py | 14 ++++++++++---- .../src/metadata/ingestion/source/sql_source.py | 9 +++++---- 8 files changed, 22 insertions(+), 15 deletions(-) diff --git a/ingestion/examples/workflows/bigquery.json b/ingestion/examples/workflows/bigquery.json index 9f2ebf7b855..5da6b1cd1e0 100644 --- a/ingestion/examples/workflows/bigquery.json +++ b/ingestion/examples/workflows/bigquery.json @@ -9,7 +9,7 @@ "options": { "credentials_path": "examples/creds/bigquery-cred.json" }, - "filter_pattern": { + "table_filter_pattern": { "excludes": [ "[\\w]*cloudaudit.*", "[\\w]*logging_googleapis_com.*", diff --git a/ingestion/examples/workflows/mssql.json b/ingestion/examples/workflows/mssql.json index 94ec847da66..1f63558b15e 100644 --- a/ingestion/examples/workflows/mssql.json +++ b/ingestion/examples/workflows/mssql.json @@ -8,7 +8,7 @@ "query": "select top 50 * from {}.{}", "username": "sa", "password": "test!Password", - "filter_pattern": { + "table_filter_pattern": { "excludes": ["catalog_test.*"] } } diff --git a/ingestion/examples/workflows/mysql.json b/ingestion/examples/workflows/mysql.json index 49dfeb28efb..d71b91684d0 100644 --- a/ingestion/examples/workflows/mysql.json +++ b/ingestion/examples/workflows/mysql.json @@ -6,8 +6,8 @@ "password": "openmetadata_password", "database": "openmetadata_db", "service_name": "local_mysql", - "filter_pattern": { - "excludes": ["\"mysql.*\", \"information_schema.*\", \"performance_schema.*\", \"sys.*\""] + "schema_filter_pattern": { + "excludes": ["mysql.*", "information_schema.*", "performance_schema.*", "sys.*"] } } }, diff --git a/ingestion/examples/workflows/redshift.json b/ingestion/examples/workflows/redshift.json index 2894a88546b..57cd17de20d 100644 --- a/ingestion/examples/workflows/redshift.json +++ b/ingestion/examples/workflows/redshift.json @@ -7,7 +7,7 @@ "password": "strong_password", "database": "warehouse", "service_name": "aws_redshift", - "filter_pattern": { + "table_filter_pattern": { "excludes": ["information_schema.*", "[\\w]*event_vw.*"] } } diff --git a/ingestion/examples/workflows/snowflake.json b/ingestion/examples/workflows/snowflake.json index 46cfc02cdef..d7725062c1d 100644 --- a/ingestion/examples/workflows/snowflake.json +++ b/ingestion/examples/workflows/snowflake.json @@ -9,7 +9,7 @@ "database": "SNOWFLAKE_SAMPLE_DATA", "account": "account_name", "service_name": "snowflake", - "filter_pattern": { + "table_filter_pattern": { "excludes": [ "tpcds_sf100tcl" ] diff --git a/ingestion/examples/workflows/vertica.json b/ingestion/examples/workflows/vertica.json index c0078b7e54c..0134fedbc89 100644 --- a/ingestion/examples/workflows/vertica.json +++ b/ingestion/examples/workflows/vertica.json @@ -6,7 +6,7 @@ "password": "openmetadata_password", "database": "openmetadata_db", "service_name": "local_vertica", - "filter_pattern": { + "table_filter_pattern": { "excludes": [] } } diff --git a/ingestion/src/metadata/ingestion/sink/metadata_rest.py b/ingestion/src/metadata/ingestion/sink/metadata_rest.py index 945d2d89c19..6720af99a1d 100644 --- a/ingestion/src/metadata/ingestion/sink/metadata_rest.py +++ b/ingestion/src/metadata/ingestion/sink/metadata_rest.py @@ -170,10 +170,16 @@ class MetadataRestSink(Sink[Entity]): location = self.metadata.create_or_update(location_request) self.metadata.add_location(table=created_table, location=location) if db_and_table.table.sampleData is not None: - self.metadata.ingest_table_sample_data( - table=created_table, - sample_data=db_and_table.table.sampleData, - ) + try: + self.metadata.ingest_table_sample_data( + table=created_table, + sample_data=db_and_table.table.sampleData, + ) + except Exception as e: + logging.error( + f"Failed to ingest sample data for table {db_and_table.table.name}" + ) + if db_and_table.table.tableProfile is not None: for tp in db_and_table.table.tableProfile: for pd in tp: diff --git a/ingestion/src/metadata/ingestion/source/sql_source.py b/ingestion/src/metadata/ingestion/source/sql_source.py index 102798f6f12..a5666217e20 100644 --- a/ingestion/src/metadata/ingestion/source/sql_source.py +++ b/ingestion/src/metadata/ingestion/source/sql_source.py @@ -88,7 +88,8 @@ class SQLConnectionConfig(ConfigModel): data_profiler_date: Optional[str] = datetime.now().strftime("%Y-%m-%d") data_profiler_offset: Optional[int] = 0 data_profiler_limit: Optional[int] = 50000 - filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all() + table_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all() + schema_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all() dbt_manifest_file: Optional[str] = None dbt_catalog_file: Optional[str] = None @@ -201,7 +202,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]): def next_record(self) -> Iterable[OMetaDatabaseAndTable]: inspector = inspect(self.engine) for schema in inspector.get_schema_names(): - if not self.sql_config.filter_pattern.included(schema): + if not self.sql_config.schema_filter_pattern.included(schema): self.status.filter(schema, "Schema pattern not allowed") continue logger.debug("total tables {}".format(inspector.get_table_names(schema))) @@ -218,7 +219,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]): schema, table_name = self.standardize_schema_table_names( schema, table_name ) - if not self.sql_config.filter_pattern.included(table_name): + if not self.sql_config.table_filter_pattern.included(table_name): self.status.filter( "{}.{}".format(self.config.get_service_name(), table_name), "Table pattern not allowed", @@ -276,7 +277,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]): schema, view_name = self.standardize_schema_table_names( schema, view_name ) - if not self.sql_config.filter_pattern.included(view_name): + if not self.sql_config.table_filter_pattern.included(view_name): self.status.filter( "{}.{}".format(self.config.get_service_name(), view_name), "View pattern not allowed",