mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-28 10:28:22 +00:00
fix(ingest): bigquery-beta - fix for missing key error if dataset was empty (#6133)
This commit is contained in:
parent
928f29438a
commit
6db0925b26
@ -25,6 +25,9 @@ If you have multiple projects in your BigQuery setup, the role should be granted
|
||||
| `bigquery.readsessions.getData` | Get data from the read session. |
|
||||
| `resourcemanager.projects.get` | Retrieve project names and metadata. |
|
||||
|
||||
You can use the following predefined IAM role which has all the needed permissions as well:
|
||||
- [roles/bigquery.metadataViewer](https://cloud.google.com/bigquery/docs/access-control#bigquery.metadataViewer)
|
||||
|
||||
##### Lineage/usage generation requirements
|
||||
|
||||
Additional requirements needed on the top of the basic requirements.
|
||||
|
||||
@ -433,11 +433,17 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
||||
yield wu
|
||||
|
||||
def get_workunits(self) -> Iterable[WorkUnit]:
|
||||
|
||||
logger.info("Getting projects")
|
||||
conn: bigquery.Client = self.get_bigquery_client()
|
||||
self.add_config_to_report()
|
||||
|
||||
projects: List[BigqueryProject] = BigQueryDataDictionary.get_projects(conn)
|
||||
if len(projects) == 0:
|
||||
logger.warning(
|
||||
"Get projects didn't return any project. Maybe resourcemanager.projects.get permission is missing for the service account. You can assign predefined roles/bigquery.metadataViewer role to your service account."
|
||||
)
|
||||
return
|
||||
|
||||
for project_id in projects:
|
||||
if not self.config.project_id_pattern.allowed(project_id.id):
|
||||
self.report.report_dropped(project_id.id)
|
||||
@ -446,6 +452,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
||||
yield from self._process_project(conn, project_id)
|
||||
|
||||
if self.config.profiling.enabled:
|
||||
logger.info("Starting profiling...")
|
||||
yield from self.profiler.get_workunits(self.db_tables)
|
||||
|
||||
# Clean up stale entities if configured.
|
||||
@ -475,6 +482,12 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
||||
)
|
||||
return None
|
||||
|
||||
if len(bigquery_project.datasets) == 0:
|
||||
logger.warning(
|
||||
f"No dataset found in {project_id}. Either there are no datasets in this project or missing bigquery.datasets.get permission. You can assign predefined roles/bigquery.metadataViewer role to your service account."
|
||||
)
|
||||
return
|
||||
|
||||
for bigquery_dataset in bigquery_project.datasets:
|
||||
|
||||
if not self.config.dataset_pattern.allowed(bigquery_dataset.name):
|
||||
|
||||
@ -761,7 +761,8 @@ class BigQueryUsageExtractor:
|
||||
try:
|
||||
resource = event.read_event.resource.get_sanitized_table_ref()
|
||||
if (
|
||||
resource.table_identifier.get_table_display_name()
|
||||
resource.table_identifier.dataset not in tables
|
||||
or resource.table_identifier.get_table_display_name()
|
||||
not in tables[resource.table_identifier.dataset]
|
||||
):
|
||||
logger.debug(f"Skipping non existing {resource} from usage")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user