Fix #1737: Add separate filter patterns for database/schema and tables (#1739)

* Fix #1737: Add separate filter patterns for database/schema and tables
This commit is contained in:
Sriharsha Chintalapani 2021-12-14 08:50:05 -08:00 committed by GitHub
parent 349f9a18f4
commit e3d1a95d2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 22 additions and 15 deletions

View File

@ -9,7 +9,7 @@
"options": { "options": {
"credentials_path": "examples/creds/bigquery-cred.json" "credentials_path": "examples/creds/bigquery-cred.json"
}, },
"filter_pattern": { "table_filter_pattern": {
"excludes": [ "excludes": [
"[\\w]*cloudaudit.*", "[\\w]*cloudaudit.*",
"[\\w]*logging_googleapis_com.*", "[\\w]*logging_googleapis_com.*",

View File

@ -8,7 +8,7 @@
"query": "select top 50 * from {}.{}", "query": "select top 50 * from {}.{}",
"username": "sa", "username": "sa",
"password": "test!Password", "password": "test!Password",
"filter_pattern": { "table_filter_pattern": {
"excludes": ["catalog_test.*"] "excludes": ["catalog_test.*"]
} }
} }

View File

@ -6,8 +6,8 @@
"password": "openmetadata_password", "password": "openmetadata_password",
"database": "openmetadata_db", "database": "openmetadata_db",
"service_name": "local_mysql", "service_name": "local_mysql",
"filter_pattern": { "schema_filter_pattern": {
"excludes": ["\"mysql.*\", \"information_schema.*\", \"performance_schema.*\", \"sys.*\""] "excludes": ["mysql.*", "information_schema.*", "performance_schema.*", "sys.*"]
} }
} }
}, },

View File

@ -7,7 +7,7 @@
"password": "strong_password", "password": "strong_password",
"database": "warehouse", "database": "warehouse",
"service_name": "aws_redshift", "service_name": "aws_redshift",
"filter_pattern": { "table_filter_pattern": {
"excludes": ["information_schema.*", "[\\w]*event_vw.*"] "excludes": ["information_schema.*", "[\\w]*event_vw.*"]
} }
} }

View File

@ -9,7 +9,7 @@
"database": "SNOWFLAKE_SAMPLE_DATA", "database": "SNOWFLAKE_SAMPLE_DATA",
"account": "account_name", "account": "account_name",
"service_name": "snowflake", "service_name": "snowflake",
"filter_pattern": { "table_filter_pattern": {
"excludes": [ "excludes": [
"tpcds_sf100tcl" "tpcds_sf100tcl"
] ]

View File

@ -6,7 +6,7 @@
"password": "openmetadata_password", "password": "openmetadata_password",
"database": "openmetadata_db", "database": "openmetadata_db",
"service_name": "local_vertica", "service_name": "local_vertica",
"filter_pattern": { "table_filter_pattern": {
"excludes": [] "excludes": []
} }
} }

View File

@ -170,10 +170,16 @@ class MetadataRestSink(Sink[Entity]):
location = self.metadata.create_or_update(location_request) location = self.metadata.create_or_update(location_request)
self.metadata.add_location(table=created_table, location=location) self.metadata.add_location(table=created_table, location=location)
if db_and_table.table.sampleData is not None: if db_and_table.table.sampleData is not None:
try:
self.metadata.ingest_table_sample_data( self.metadata.ingest_table_sample_data(
table=created_table, table=created_table,
sample_data=db_and_table.table.sampleData, sample_data=db_and_table.table.sampleData,
) )
except Exception as e:
logging.error(
f"Failed to ingest sample data for table {db_and_table.table.name}"
)
if db_and_table.table.tableProfile is not None: if db_and_table.table.tableProfile is not None:
for tp in db_and_table.table.tableProfile: for tp in db_and_table.table.tableProfile:
for pd in tp: for pd in tp:

View File

@ -88,7 +88,8 @@ class SQLConnectionConfig(ConfigModel):
data_profiler_date: Optional[str] = datetime.now().strftime("%Y-%m-%d") data_profiler_date: Optional[str] = datetime.now().strftime("%Y-%m-%d")
data_profiler_offset: Optional[int] = 0 data_profiler_offset: Optional[int] = 0
data_profiler_limit: Optional[int] = 50000 data_profiler_limit: Optional[int] = 50000
filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all() table_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
schema_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
dbt_manifest_file: Optional[str] = None dbt_manifest_file: Optional[str] = None
dbt_catalog_file: Optional[str] = None dbt_catalog_file: Optional[str] = None
@ -201,7 +202,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
def next_record(self) -> Iterable[OMetaDatabaseAndTable]: def next_record(self) -> Iterable[OMetaDatabaseAndTable]:
inspector = inspect(self.engine) inspector = inspect(self.engine)
for schema in inspector.get_schema_names(): for schema in inspector.get_schema_names():
if not self.sql_config.filter_pattern.included(schema): if not self.sql_config.schema_filter_pattern.included(schema):
self.status.filter(schema, "Schema pattern not allowed") self.status.filter(schema, "Schema pattern not allowed")
continue continue
logger.debug("total tables {}".format(inspector.get_table_names(schema))) logger.debug("total tables {}".format(inspector.get_table_names(schema)))
@ -218,7 +219,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
schema, table_name = self.standardize_schema_table_names( schema, table_name = self.standardize_schema_table_names(
schema, table_name schema, table_name
) )
if not self.sql_config.filter_pattern.included(table_name): if not self.sql_config.table_filter_pattern.included(table_name):
self.status.filter( self.status.filter(
"{}.{}".format(self.config.get_service_name(), table_name), "{}.{}".format(self.config.get_service_name(), table_name),
"Table pattern not allowed", "Table pattern not allowed",
@ -276,7 +277,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
schema, view_name = self.standardize_schema_table_names( schema, view_name = self.standardize_schema_table_names(
schema, view_name schema, view_name
) )
if not self.sql_config.filter_pattern.included(view_name): if not self.sql_config.table_filter_pattern.included(view_name):
self.status.filter( self.status.filter(
"{}.{}".format(self.config.get_service_name(), view_name), "{}.{}".format(self.config.get_service_name(), view_name),
"View pattern not allowed", "View pattern not allowed",