mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 23:20:35 +00:00
20 lines
672 B
Python
20 lines
672 B
Python
from unstructured.ingest.connector.wikipedia import SimpleWikipediaConfig
|
|
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
|
|
from unstructured.ingest.runner import WikipediaRunner
|
|
|
|
if __name__ == "__main__":
|
|
runner = WikipediaRunner(
|
|
processor_config=ProcessorConfig(
|
|
verbose=True,
|
|
output_dir="wikipedia-ingest-output",
|
|
num_processes=2,
|
|
),
|
|
read_config=ReadConfig(),
|
|
partition_config=PartitionConfig(),
|
|
connector_config=SimpleWikipediaConfig(
|
|
page_title="Open Source Software",
|
|
auto_suggest=False,
|
|
),
|
|
)
|
|
runner.run()
|