diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 40d652463b..6311ddf7e2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -252,12 +252,13 @@ plugins: Dict[str, Set[str]] = { | bigquery_common | {"sqlalchemy-bigquery>=1.4.1", "sqllineage==1.3.6", "sqlparse"}, "bigquery-usage-legacy": bigquery_common | usage_common | {"cachetools"}, - "bigquery": sql_common | bigquery_common | {"sqllineage==1.3.6", "sql_metadata"}, + "bigquery": sql_common | bigquery_common | {"sqllineage==1.3.6", "sql_metadata", "sqlalchemy-bigquery>=1.4.1"}, "bigquery-beta": sql_common | bigquery_common | { "sqllineage==1.3.6", "sql_metadata", + "sqlalchemy-bigquery>=1.4.1" }, # deprecated, but keeping the extra for backwards compatibility "clickhouse": sql_common | clickhouse_common, "clickhouse-usage": sql_common | usage_common | clickhouse_common, @@ -376,7 +377,8 @@ mypy_stubs = { "types-ujson>=5.2.0", "types-termcolor>=1.0.0", "types-Deprecated", - "types-protobuf", + # Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor" + "types-protobuf<4.21.0.0", } base_dev_requirements = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 383bd91c99..5de36facb6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -349,8 +349,8 @@ class BigQueryDataDictionary: ) if "last_altered" in table else None, - size_in_bytes=table.bytes if "bytes" in table else None, - rows_count=table.row_count if "row_count" in table else None, + size_in_bytes=table.get("bytes"), + rows_count=table.get("row_count"), comment=table.comment, ddl=table.ddl, expires=tables[table.table_name].expires if tables else None, @@ -361,24 +361,16 @@ class BigQueryDataDictionary: clustering_fields=tables[table.table_name].clustering_fields if tables else None, - max_partition_id=table.max_partition_id - if "max_partition_id" in table - else None, + max_partition_id=table.get("max_partition_id"), max_shard_id=BigqueryTableIdentifier.get_table_and_shard( table.table_name )[1] if len(BigqueryTableIdentifier.get_table_and_shard(table.table_name)) == 2 else None, - num_partitions=table.num_partitions - if "num_partitions" in table - else None, - active_billable_bytes=table.active_billable_bytes - if "active_billable_bytes" in table - else None, - long_term_billable_bytes=table.long_term_billable_bytes - if "long_term_billable_bytes" in table - else None, + num_partitions=table.get("num_partitions"), + active_billable_bytes=table.get("active_billable_bytes"), + long_term_billable_bytes=table.get("long_term_billable_bytes"), ) for table in cur ]