Fix #1737: Add separate filter patterns for database/schema and tables (#1739)

* Fix #1737: Add separate filter patterns for database/schema and tables
This commit is contained in:
Sriharsha Chintalapani 2021-12-14 08:50:05 -08:00 committed by GitHub
parent 349f9a18f4
commit e3d1a95d2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 22 additions and 15 deletions

View File

@ -9,7 +9,7 @@
"options": {
"credentials_path": "examples/creds/bigquery-cred.json"
},
"filter_pattern": {
"table_filter_pattern": {
"excludes": [
"[\\w]*cloudaudit.*",
"[\\w]*logging_googleapis_com.*",

View File

@ -8,7 +8,7 @@
"query": "select top 50 * from {}.{}",
"username": "sa",
"password": "test!Password",
"filter_pattern": {
"table_filter_pattern": {
"excludes": ["catalog_test.*"]
}
}

View File

@ -6,8 +6,8 @@
"password": "openmetadata_password",
"database": "openmetadata_db",
"service_name": "local_mysql",
"filter_pattern": {
"excludes": ["\"mysql.*\", \"information_schema.*\", \"performance_schema.*\", \"sys.*\""]
"schema_filter_pattern": {
"excludes": ["mysql.*", "information_schema.*", "performance_schema.*", "sys.*"]
}
}
},

View File

@ -7,7 +7,7 @@
"password": "strong_password",
"database": "warehouse",
"service_name": "aws_redshift",
"filter_pattern": {
"table_filter_pattern": {
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
}
}

View File

@ -9,7 +9,7 @@
"database": "SNOWFLAKE_SAMPLE_DATA",
"account": "account_name",
"service_name": "snowflake",
"filter_pattern": {
"table_filter_pattern": {
"excludes": [
"tpcds_sf100tcl"
]

View File

@ -6,7 +6,7 @@
"password": "openmetadata_password",
"database": "openmetadata_db",
"service_name": "local_vertica",
"filter_pattern": {
"table_filter_pattern": {
"excludes": []
}
}

View File

@ -170,10 +170,16 @@ class MetadataRestSink(Sink[Entity]):
location = self.metadata.create_or_update(location_request)
self.metadata.add_location(table=created_table, location=location)
if db_and_table.table.sampleData is not None:
self.metadata.ingest_table_sample_data(
table=created_table,
sample_data=db_and_table.table.sampleData,
)
try:
self.metadata.ingest_table_sample_data(
table=created_table,
sample_data=db_and_table.table.sampleData,
)
except Exception as e:
logging.error(
f"Failed to ingest sample data for table {db_and_table.table.name}"
)
if db_and_table.table.tableProfile is not None:
for tp in db_and_table.table.tableProfile:
for pd in tp:

View File

@ -88,7 +88,8 @@ class SQLConnectionConfig(ConfigModel):
data_profiler_date: Optional[str] = datetime.now().strftime("%Y-%m-%d")
data_profiler_offset: Optional[int] = 0
data_profiler_limit: Optional[int] = 50000
filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
table_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
schema_filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
dbt_manifest_file: Optional[str] = None
dbt_catalog_file: Optional[str] = None
@ -201,7 +202,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
def next_record(self) -> Iterable[OMetaDatabaseAndTable]:
inspector = inspect(self.engine)
for schema in inspector.get_schema_names():
if not self.sql_config.filter_pattern.included(schema):
if not self.sql_config.schema_filter_pattern.included(schema):
self.status.filter(schema, "Schema pattern not allowed")
continue
logger.debug("total tables {}".format(inspector.get_table_names(schema)))
@ -218,7 +219,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
schema, table_name = self.standardize_schema_table_names(
schema, table_name
)
if not self.sql_config.filter_pattern.included(table_name):
if not self.sql_config.table_filter_pattern.included(table_name):
self.status.filter(
"{}.{}".format(self.config.get_service_name(), table_name),
"Table pattern not allowed",
@ -276,7 +277,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
schema, view_name = self.standardize_schema_table_names(
schema, view_name
)
if not self.sql_config.filter_pattern.included(view_name):
if not self.sql_config.table_filter_pattern.included(view_name):
self.status.filter(
"{}.{}".format(self.config.get_service_name(), view_name),
"View pattern not allowed",