mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-04 07:27:34 +00:00
26 lines
749 B
Python
26 lines
749 B
Python
![]() |
from unstructured.ingest.connector.fsspec.s3 import S3AccessConfig, SimpleS3Config
|
||
|
from unstructured.ingest.interfaces import (
|
||
|
PartitionConfig,
|
||
|
ProcessorConfig,
|
||
|
ReadConfig,
|
||
|
)
|
||
|
from unstructured.ingest.runner import S3Runner
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
runner = S3Runner(
|
||
|
processor_config=ProcessorConfig(
|
||
|
verbose=True,
|
||
|
output_dir="s3-small-batch-output",
|
||
|
num_processes=2,
|
||
|
),
|
||
|
read_config=ReadConfig(),
|
||
|
partition_config=PartitionConfig(),
|
||
|
connector_config=SimpleS3Config(
|
||
|
access_config=S3AccessConfig(
|
||
|
anon=True,
|
||
|
),
|
||
|
remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/",
|
||
|
),
|
||
|
)
|
||
|
runner.run()
|