2024-04-25 10:55:15 -07:00
|
|
|
import os
|
|
|
|
|
2023-12-11 16:29:41 -05:00
|
|
|
from unstructured.ingest.connector.fsspec.azure import (
|
|
|
|
AzureAccessConfig,
|
|
|
|
SimpleAzureBlobStorageConfig,
|
|
|
|
)
|
|
|
|
from unstructured.ingest.interfaces import (
|
|
|
|
PartitionConfig,
|
|
|
|
ProcessorConfig,
|
|
|
|
ReadConfig,
|
|
|
|
)
|
|
|
|
from unstructured.ingest.runner import AzureRunner
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
runner = AzureRunner(
|
|
|
|
processor_config=ProcessorConfig(
|
|
|
|
verbose=True,
|
|
|
|
output_dir="azure-ingest-output",
|
|
|
|
num_processes=2,
|
|
|
|
),
|
|
|
|
read_config=ReadConfig(),
|
|
|
|
partition_config=PartitionConfig(),
|
|
|
|
connector_config=SimpleAzureBlobStorageConfig(
|
|
|
|
access_config=AzureAccessConfig(
|
2024-04-25 10:55:15 -07:00
|
|
|
account_name=os.getenv("AZURE_ACCOUNT_NAME"),
|
2023-12-11 16:29:41 -05:00
|
|
|
),
|
|
|
|
remote_url="abfs://container1/",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
runner.run()
|