mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 01:54:25 +00:00 
			
		
		
		
	 ead2a7f1eb
			
		
	
	
		ead2a7f1eb
		
			
		
	
	
	
	
		
			
			### Description To not require additional dependencies on cloud-related CLIs (i.e. gcloud and az), using python and the existing dependencies already used to run out code to interact with those providers for overhead work associated with destination ingest tests.
		
			
				
	
	
		
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| import click
 | |
| from google.cloud import storage
 | |
| from google.oauth2 import service_account
 | |
| 
 | |
| 
 | |
| @click.group(name="gcs-ingest")
 | |
| def cli():
 | |
|     pass
 | |
| 
 | |
| 
 | |
| @cli.command()
 | |
| @click.option("--service-account-file", type=click.Path(), required=True)
 | |
| @click.option("--bucket", type=str, required=True)
 | |
| @click.option("--blob-path", type=str, required=True)
 | |
| def down(service_account_file: str, bucket: str, blob_path: str):
 | |
|     credentials = service_account.Credentials.from_service_account_file(
 | |
|         filename=service_account_file
 | |
|     )
 | |
| 
 | |
|     storage_client = storage.Client(credentials=credentials)
 | |
|     for blob in storage_client.list_blobs(bucket_or_name=bucket, prefix=blob_path):
 | |
|         print(f"deleting {blob.name}")
 | |
|         blob.delete()
 | |
| 
 | |
| 
 | |
| @cli.command()
 | |
| @click.option("--service-account-file", type=click.Path(), required=True)
 | |
| @click.option("--bucket", type=str, required=True)
 | |
| @click.option("--blob-path", type=str, required=True)
 | |
| @click.option("--expected-files", type=int, required=True)
 | |
| def check(service_account_file: str, bucket: str, blob_path: str, expected_files: int):
 | |
|     credentials = service_account.Credentials.from_service_account_file(
 | |
|         filename=service_account_file
 | |
|     )
 | |
| 
 | |
|     storage_client = storage.Client(credentials=credentials)
 | |
|     blob_json_list = [
 | |
|         f.name
 | |
|         for f in storage_client.list_blobs(bucket_or_name=bucket, prefix=blob_path)
 | |
|         if f.name.endswith("json")
 | |
|     ]
 | |
|     found = len(blob_json_list)
 | |
|     print(
 | |
|         f"Checking that the number of files found ({found}) "
 | |
|         f"matches what's expected: {expected_files}"
 | |
|     )
 | |
|     assert (
 | |
|         found == expected_files
 | |
|     ), f"number of files found ({found}) doesn't match what's expected: {expected_files}"
 | |
|     print("successfully checked the number of files!")
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     cli()
 |