mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

### Description Given the filtering in the ingest logger, anything going to console should go through that. This adds a linter that only checks for `print()` statements in the ingest code and ignored it elsewhere for now.
55 lines
1.8 KiB
Python
55 lines
1.8 KiB
Python
#!/usr/bin/env python
|
|
import click
|
|
from azure.storage.blob import ContainerClient
|
|
|
|
|
|
@click.group(name="azure-ingest")
|
|
def cli():
|
|
pass
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--connection-string", type=str, required=True)
|
|
@click.option("--container", type=str, required=True)
|
|
@click.option("--blob-path", type=str, required=True)
|
|
def down(connection_string: str, container: str, blob_path: str):
|
|
container_client = ContainerClient.from_connection_string(
|
|
conn_str=connection_string, container_name=container
|
|
)
|
|
blob_list = [b.name for b in list(container_client.list_blobs(name_starts_with=blob_path))]
|
|
print(f"deleting all content from {container}/{blob_path}")
|
|
# Delete all content in folder first
|
|
container_client.delete_blobs(*[b for b in blob_list if b != blob_path])
|
|
|
|
# Delete folder itself
|
|
container_client.delete_blob(blob_path)
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--connection-string", type=str, required=True)
|
|
@click.option("--container", type=str, required=True)
|
|
@click.option("--blob-path", type=str, required=True)
|
|
@click.option("--expected-files", type=int, required=True)
|
|
def check(connection_string: str, container: str, blob_path: str, expected_files: int):
|
|
container_client = ContainerClient.from_connection_string(
|
|
conn_str=connection_string, container_name=container
|
|
)
|
|
blob_json_list = [
|
|
b.name
|
|
for b in list(container_client.list_blobs(name_starts_with=blob_path))
|
|
if b.name.endswith("json")
|
|
]
|
|
found = len(blob_json_list)
|
|
print(
|
|
f"Checking that the number of files found ({found}) "
|
|
f"matches what's expected: {expected_files}"
|
|
)
|
|
assert (
|
|
found == expected_files
|
|
), f"number of files found ({found}) doesn't match what's expected: {expected_files}"
|
|
print("successfully checked the number of files!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|