fix(ingest): bigquery-beta - fix for missing key error if dataset was empty (#6133)

This commit is contained in:
Tamas Nemeth 2022-10-06 18:20:49 +02:00 committed by GitHub
parent 928f29438a
commit 6db0925b26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 2 deletions

View File

@ -25,6 +25,9 @@ If you have multiple projects in your BigQuery setup, the role should be granted
| `bigquery.readsessions.getData` | Get data from the read session. |
| `resourcemanager.projects.get` | Retrieve project names and metadata. |
You can use the following predefined IAM role which has all the needed permissions as well:
- [roles/bigquery.metadataViewer](https://cloud.google.com/bigquery/docs/access-control#bigquery.metadataViewer)
##### Lineage/usage generation requirements
Additional requirements needed on the top of the basic requirements.

View File

@ -433,11 +433,17 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
yield wu
def get_workunits(self) -> Iterable[WorkUnit]:
logger.info("Getting projects")
conn: bigquery.Client = self.get_bigquery_client()
self.add_config_to_report()
projects: List[BigqueryProject] = BigQueryDataDictionary.get_projects(conn)
if len(projects) == 0:
logger.warning(
"Get projects didn't return any project. Maybe resourcemanager.projects.get permission is missing for the service account. You can assign predefined roles/bigquery.metadataViewer role to your service account."
)
return
for project_id in projects:
if not self.config.project_id_pattern.allowed(project_id.id):
self.report.report_dropped(project_id.id)
@ -446,6 +452,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
yield from self._process_project(conn, project_id)
if self.config.profiling.enabled:
logger.info("Starting profiling...")
yield from self.profiler.get_workunits(self.db_tables)
# Clean up stale entities if configured.
@ -475,6 +482,12 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
)
return None
if len(bigquery_project.datasets) == 0:
logger.warning(
f"No dataset found in {project_id}. Either there are no datasets in this project or missing bigquery.datasets.get permission. You can assign predefined roles/bigquery.metadataViewer role to your service account."
)
return
for bigquery_dataset in bigquery_project.datasets:
if not self.config.dataset_pattern.allowed(bigquery_dataset.name):

View File

@ -761,7 +761,8 @@ class BigQueryUsageExtractor:
try:
resource = event.read_event.resource.get_sanitized_table_ref()
if (
resource.table_identifier.get_table_display_name()
resource.table_identifier.dataset not in tables
or resource.table_identifier.get_table_display_name()
not in tables[resource.table_identifier.dataset]
):
logger.debug(f"Skipping non existing {resource} from usage")