1. update docs using gcp credentials in path.
2. updated example `clientIt` in docs
3. fixed client to work with implicit project
4. fixed workflow to warn about missing buckets
This commit is contained in:
Imri Paran 2024-08-20 08:14:46 +02:00 committed by GitHub
parent 31c2ec8c57
commit 2722eadc33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 12 deletions

View File

@ -16,8 +16,6 @@ from google import auth
from google.cloud.monitoring_v3 import MetricServiceClient from google.cloud.monitoring_v3 import MetricServiceClient
from google.cloud.storage import Client from google.cloud.storage import Client
NoProject = object()
class MultiProjectClient: class MultiProjectClient:
"""Google Cloud Client does not support ad-hoc project switching. This class wraps the client and allows """Google Cloud Client does not support ad-hoc project switching. This class wraps the client and allows
@ -37,19 +35,16 @@ class MultiProjectClient:
project_ids: Optional[List[str]] = None, project_ids: Optional[List[str]] = None,
**client_kwargs, **client_kwargs,
): ):
self.default_project = None
if project_ids: if project_ids:
self.clients = { self.clients = {
project_id: client_class(project=project_id, **client_kwargs) project_id: client_class(project=project_id, **client_kwargs)
for project_id in project_ids for project_id in project_ids
} }
else: else:
self.clients = {NoProject: client_class(**client_kwargs)}
def project_ids(self):
if NoProject in self.clients:
_, project_id = auth.default() _, project_id = auth.default()
return [project_id] self.default_project = project_id
return list(self.clients.keys()) self.clients = {project_id: client_class(**client_kwargs)}
def __getattr__(self, client_method): def __getattr__(self, client_method):
"""Return the underlying client method as a partial function so we can inject the project_id.""" """Return the underlying client method as a partial function so we can inject the project_id."""
@ -57,7 +52,7 @@ class MultiProjectClient:
def _call(self, method, project_id, *args, **kwargs): def _call(self, method, project_id, *args, **kwargs):
"""Call the method on the client for the given project_id. The args and kwargs are passed through.""" """Call the method on the client for the given project_id. The args and kwargs are passed through."""
client = self.clients.get(project_id, self.clients.get(NoProject)) client = self.clients.get(project_id, self.clients.get(self.default_project))
if not client: if not client:
raise ValueError(f"Project {project_id} not found") raise ValueError(f"Project {project_id} not found")
return getattr(client, method)(*args, **kwargs) return getattr(client, method)(*args, **kwargs)

View File

@ -259,6 +259,10 @@ class GcsSource(StorageServiceSource):
try: try:
bucket = client.get_bucket(bucket_name) bucket = client.get_bucket(bucket_name)
except NotFound: except NotFound:
logger.warning(
f"Bucket {bucket_name} not found in project {project_id}"
)
self.status.warning(f"{project_id}.{bucket_name}", "Bucket Not Found")
continue continue
return GCSBucketResponse( return GCSBucketResponse(
name=bucket.name, name=bucket.name,

View File

@ -199,7 +199,7 @@ source:
Super secret key Super secret key
-----END PRIVATE KEY----- -----END PRIVATE KEY-----
clientEmail: role@project.iam.gserviceaccount.com clientEmail: role@project.iam.gserviceaccount.com
clientId: 1234 clientId: "1234"
# authUri: https://accounts.google.com/o/oauth2/auth (default) # authUri: https://accounts.google.com/o/oauth2/auth (default)
# tokenUri: https://oauth2.googleapis.com/token (default) # tokenUri: https://oauth2.googleapis.com/token (default)
# authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default) # authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default)

View File

@ -22,6 +22,12 @@ Configure and schedule GCS metadata workflows from the CLI:
## Requirements ## Requirements
To run the GCS ingestion, you will need to install:
```bash
pip3 install "openmetadata-ingestion[datalake-gcs]"
```
{%inlineCallout icon="description" bold="OpenMetadata 1.0 or later" href="/deployment"%} {%inlineCallout icon="description" bold="OpenMetadata 1.0 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides. To deploy OpenMetadata, check the Deployment guides.
{%/inlineCallout%} {%/inlineCallout%}
@ -98,7 +104,8 @@ source:
config: config:
type: GCS type: GCS
credentials: credentials:
gcpConfig: <path to file> gcpConfig:
path: <path to file>
``` ```
- If you want to use [ADC authentication](https://cloud.google.com/docs/authentication#adc) for GCP you can just leave - If you want to use [ADC authentication](https://cloud.google.com/docs/authentication#adc) for GCP you can just leave
@ -162,7 +169,7 @@ source:
Super secret key Super secret key
-----END PRIVATE KEY----- -----END PRIVATE KEY-----
clientEmail: client@mail.com clientEmail: client@mail.com
clientId: 1234 clientId: "1234"
# authUri: https://accounts.google.com/o/oauth2/auth (default) # authUri: https://accounts.google.com/o/oauth2/auth (default)
# tokenUri: https://oauth2.googleapis.com/token (default) # tokenUri: https://oauth2.googleapis.com/token (default)
# authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default) # authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs (default)