diff --git a/ingestion/src/metadata/ingestion/source/storage/gcs/client.py b/ingestion/src/metadata/ingestion/source/storage/gcs/client.py index 0f6f6119019..87cd06400b6 100644 --- a/ingestion/src/metadata/ingestion/source/storage/gcs/client.py +++ b/ingestion/src/metadata/ingestion/source/storage/gcs/client.py @@ -16,8 +16,6 @@ from google import auth from google.cloud.monitoring_v3 import MetricServiceClient from google.cloud.storage import Client -NoProject = object() - class MultiProjectClient: """Google Cloud Client does not support ad-hoc project switching. This class wraps the client and allows @@ -37,19 +35,16 @@ class MultiProjectClient: project_ids: Optional[List[str]] = None, **client_kwargs, ): + self.default_project = None if project_ids: self.clients = { project_id: client_class(project=project_id, **client_kwargs) for project_id in project_ids } else: - self.clients = {NoProject: client_class(**client_kwargs)} - - def project_ids(self): - if NoProject in self.clients: _, project_id = auth.default() - return [project_id] - return list(self.clients.keys()) + self.default_project = project_id + self.clients = {project_id: client_class(**client_kwargs)} def __getattr__(self, client_method): """Return the underlying client method as a partial function so we can inject the project_id.""" @@ -57,7 +52,7 @@ class MultiProjectClient: def _call(self, method, project_id, *args, **kwargs): """Call the method on the client for the given project_id. The args and kwargs are passed through.""" - client = self.clients.get(project_id, self.clients.get(NoProject)) + client = self.clients.get(project_id, self.clients.get(self.default_project)) if not client: raise ValueError(f"Project {project_id} not found") return getattr(client, method)(*args, **kwargs) diff --git a/ingestion/src/metadata/ingestion/source/storage/gcs/metadata.py b/ingestion/src/metadata/ingestion/source/storage/gcs/metadata.py index bd89d3a4321..095a8987636 100644 --- a/ingestion/src/metadata/ingestion/source/storage/gcs/metadata.py +++ b/ingestion/src/metadata/ingestion/source/storage/gcs/metadata.py @@ -259,6 +259,10 @@ class GcsSource(StorageServiceSource): try: bucket = client.get_bucket(bucket_name) except NotFound: + logger.warning( + f"Bucket {bucket_name} not found in project {project_id}" + ) + self.status.warning(f"{project_id}.{bucket_name}", "Bucket Not Found") continue return GCSBucketResponse( name=bucket.name, diff --git a/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/database/bigquery/yaml.md b/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/database/bigquery/yaml.md index 319f88fa65f..9480d31c4c4 100644 --- a/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/database/bigquery/yaml.md +++ b/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/database/bigquery/yaml.md @@ -199,7 +199,7 @@ source: Super secret key -----END PRIVATE KEY----- clientEmail: role@project.iam.gserviceaccount.com - clientId: 1234 + clientId: "1234" # authUri: https://accounts.google.com/o/oauth2/auth (default) # tokenUri: https://oauth2.googleapis.com/token (default) # authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default) diff --git a/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/storage/gcs/yaml.md b/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/storage/gcs/yaml.md index 82b6d260159..324f4ed000a 100644 --- a/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/storage/gcs/yaml.md +++ b/openmetadata-docs/content/v1.5.x-SNAPSHOT/connectors/storage/gcs/yaml.md @@ -22,6 +22,12 @@ Configure and schedule GCS metadata workflows from the CLI: ## Requirements +To run the GCS ingestion, you will need to install: + +```bash +pip3 install "openmetadata-ingestion[datalake-gcs]" +``` + {%inlineCallout icon="description" bold="OpenMetadata 1.0 or later" href="/deployment"%} To deploy OpenMetadata, check the Deployment guides. {%/inlineCallout%} @@ -98,7 +104,8 @@ source: config: type: GCS credentials: - gcpConfig: + gcpConfig: + path: ``` - If you want to use [ADC authentication](https://cloud.google.com/docs/authentication#adc) for GCP you can just leave @@ -162,7 +169,7 @@ source: Super secret key -----END PRIVATE KEY----- clientEmail: client@mail.com - clientId: 1234 + clientId: "1234" # authUri: https://accounts.google.com/o/oauth2/auth (default) # tokenUri: https://oauth2.googleapis.com/token (default) # authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default)