mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

### Description To not require additional dependencies on cloud-related CLIs (i.e. gcloud and az), using python and the existing dependencies already used to run out code to interact with those providers for overhead work associated with destination ingest tests.
56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
#!/usr/bin/env python
|
|
import click
|
|
from google.cloud import storage
|
|
from google.oauth2 import service_account
|
|
|
|
|
|
@click.group(name="gcs-ingest")
|
|
def cli():
|
|
pass
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--service-account-file", type=click.Path(), required=True)
|
|
@click.option("--bucket", type=str, required=True)
|
|
@click.option("--blob-path", type=str, required=True)
|
|
def down(service_account_file: str, bucket: str, blob_path: str):
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
filename=service_account_file
|
|
)
|
|
|
|
storage_client = storage.Client(credentials=credentials)
|
|
for blob in storage_client.list_blobs(bucket_or_name=bucket, prefix=blob_path):
|
|
print(f"deleting {blob.name}")
|
|
blob.delete()
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--service-account-file", type=click.Path(), required=True)
|
|
@click.option("--bucket", type=str, required=True)
|
|
@click.option("--blob-path", type=str, required=True)
|
|
@click.option("--expected-files", type=int, required=True)
|
|
def check(service_account_file: str, bucket: str, blob_path: str, expected_files: int):
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
filename=service_account_file
|
|
)
|
|
|
|
storage_client = storage.Client(credentials=credentials)
|
|
blob_json_list = [
|
|
f.name
|
|
for f in storage_client.list_blobs(bucket_or_name=bucket, prefix=blob_path)
|
|
if f.name.endswith("json")
|
|
]
|
|
found = len(blob_json_list)
|
|
print(
|
|
f"Checking that the number of files found ({found}) "
|
|
f"matches what's expected: {expected_files}"
|
|
)
|
|
assert (
|
|
found == expected_files
|
|
), f"number of files found ({found}) doesn't match what's expected: {expected_files}"
|
|
print("successfully checked the number of files!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|