mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-08 17:46:54 +00:00

Improves the documentation code. Standardizes unstructured api key Replaces misc hard coded values Replaces `azureunstructured1` with a generic value
28 lines
974 B
Python
28 lines
974 B
Python
import os
|
|
|
|
from unstructured.ingest.connector.reddit import RedditAccessConfig, SimpleRedditConfig
|
|
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
|
|
from unstructured.ingest.runner import RedditRunner
|
|
|
|
if __name__ == "__main__":
|
|
runner = RedditRunner(
|
|
processor_config=ProcessorConfig(
|
|
verbose=True,
|
|
output_dir="reddit-ingest-output",
|
|
num_processes=2,
|
|
),
|
|
read_config=ReadConfig(),
|
|
partition_config=PartitionConfig(),
|
|
connector_config=SimpleRedditConfig(
|
|
access_config=RedditAccessConfig(
|
|
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
|
|
),
|
|
subreddit_name="machinelearning",
|
|
client_id=os.getenv("REDDIT_CLIENT_ID"),
|
|
user_agent=r"Unstructured Ingest Subreddit fetcher by \\u\...",
|
|
search_query="Unstructured",
|
|
num_posts=10,
|
|
),
|
|
)
|
|
runner.run()
|