mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-04 07:27:34 +00:00

Adds MongoDB as a source (we already had it as a destination connector) --------- Co-authored-by: potter-potter <david.potter@gmail.com>
15 lines
547 B
Bash
Executable File
15 lines
547 B
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Structured outputs are stored in mongodb-ingest-output
|
|
|
|
PYTHONPATH=. ./unstructured/ingest/main.py \
|
|
mongodb \
|
|
--uri "<MongoDB hosted uri" \
|
|
--database "<MongoDB database>" \
|
|
--collection "<MongoDB collection>" \
|
|
--host "<Host where mongodb database is served>" \
|
|
--port "<Port where mongodb database is served>" \
|
|
--collection "<Collection name to ingest data from>" \
|
|
--batch-size "<How many records to read at a time per process>" \
|
|
--num-processes "<Number of processes to be used to download, ie. 2>"
|