diff --git a/CHANGELOG.md b/CHANGELOG.md index c9f51a997..9c247b646 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 0.4.17-dev0 + +### Enhancements + +### Features + +### Fixes + ## 0.4.16 ### Enhancements diff --git a/requirements/base.txt b/requirements/base.txt index 70eb3c4e9..7c0b47616 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -20,10 +20,6 @@ charset-normalizer==3.0.1 # via requests click==8.1.3 # via nltk -colorama==0.4.6 - # via - # click - # tqdm deprecated==1.2.13 # via argilla et-xmlfile==1.1.0 diff --git a/requirements/build.txt b/requirements/build.txt index cb7fbcb77..b24835d4b 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -16,8 +16,6 @@ certifi==2022.12.7 # requests charset-normalizer==3.0.1 # via requests -colorama==0.4.6 - # via sphinx docutils==0.18.1 # via # sphinx diff --git a/requirements/dev.txt b/requirements/dev.txt index 1963a6715..437271ed3 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -6,6 +6,10 @@ # anyio==3.6.2 # via jupyter-server +appnope==0.1.3 + # via + # ipykernel + # ipython argon2-cffi==21.3.0 # via # jupyter-server @@ -31,11 +35,6 @@ cffi==1.15.1 # via argon2-cffi-bindings click==8.1.3 # via pip-tools -colorama==0.4.6 - # via - # build - # click - # ipython comm==0.1.2 # via ipykernel debugpy==1.6.6 @@ -108,7 +107,7 @@ jupyter-client==8.0.3 # nbclient # notebook # qtconsole -jupyter-console==6.6.1 +jupyter-console==6.6.2 # via jupyter jupyter-core==5.2.0 # via @@ -182,6 +181,8 @@ pandocfilters==1.5.0 # via nbconvert parso==0.8.3 # via jedi +pexpect==4.8.0 + # via ipython pickleshare==0.7.5 # via ipython pip-tools==6.12.2 @@ -201,6 +202,10 @@ prompt-toolkit==3.0.37 # jupyter-console psutil==5.9.4 # via ipykernel +ptyprocess==0.7.0 + # via + # pexpect + # terminado pure-eval==0.2.2 # via stack-data pycparser==2.21 diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index b39cf0a83..48a56af3b 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -22,10 +22,6 @@ click==8.1.3 # via # nltk # sacremoses -colorama==0.4.6 - # via - # click - # tqdm deprecated==1.2.13 # via argilla et-xmlfile==1.1.0 diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt index d0bed21bc..e4d29f8f7 100644 --- a/requirements/ingest-github.txt +++ b/requirements/ingest-github.txt @@ -8,7 +8,7 @@ anyio==3.6.2 # via # -r requirements/base.txt # httpcore -argilla==1.3.0 +argilla==1.3.1 # via # -r requirements/base.txt # unstructured (setup.py) @@ -33,10 +33,6 @@ click==8.1.3 # via # -r requirements/base.txt # nltk -colorama==0.4.6 - # via - # click - # tqdm deprecated==1.2.13 # via # -r requirements/base.txt @@ -64,6 +60,10 @@ idna==3.4 # anyio # requests # rfc3986 +importlib-metadata==6.0.0 + # via + # -r requirements/base.txt + # markdown joblib==1.2.0 # via # -r requirements/base.txt @@ -74,6 +74,10 @@ lxml==4.9.2 # python-docx # python-pptx # unstructured (setup.py) +markdown==3.4.1 + # via + # -r requirements/base.txt + # unstructured (setup.py) monotonic==1.6 # via # -r requirements/base.txt @@ -107,7 +111,7 @@ pillow==9.4.0 # unstructured (setup.py) pycparser==2.21 # via cffi -pydantic==1.10.4 +pydantic==1.10.5 # via # -r requirements/base.txt # argilla @@ -165,7 +169,7 @@ tqdm==4.64.1 # -r requirements/base.txt # argilla # nltk -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # -r requirements/base.txt # pydantic @@ -182,3 +186,7 @@ xlsxwriter==3.0.8 # via # -r requirements/base.txt # python-pptx +zipp==3.15.0 + # via + # -r requirements/base.txt + # importlib-metadata diff --git a/requirements/ingest-reddit.txt b/requirements/ingest-reddit.txt index 7015cffd1..4f1089aab 100644 --- a/requirements/ingest-reddit.txt +++ b/requirements/ingest-reddit.txt @@ -57,6 +57,10 @@ idna==3.4 # anyio # requests # rfc3986 +importlib-metadata==6.0.0 + # via + # -r requirements/base.txt + # markdown joblib==1.2.0 # via # -r requirements/base.txt @@ -67,6 +71,10 @@ lxml==4.9.2 # python-docx # python-pptx # unstructured (setup.py) +markdown==3.4.1 + # via + # -r requirements/base.txt + # unstructured (setup.py) monotonic==1.6 # via # -r requirements/base.txt @@ -176,3 +184,7 @@ xlsxwriter==3.0.8 # via # -r requirements/base.txt # python-pptx +zipp==3.15.0 + # via + # -r requirements/base.txt + # importlib-metadata diff --git a/requirements/ingest-s3.txt b/requirements/ingest-s3.txt index c8cf5107f..82e0057c5 100644 --- a/requirements/ingest-s3.txt +++ b/requirements/ingest-s3.txt @@ -16,9 +16,9 @@ backoff==2.2.1 # via # -r requirements/base.txt # argilla -boto3==1.26.79 +boto3==1.26.80 # via unstructured (setup.py) -botocore==1.29.79 +botocore==1.29.80 # via # boto3 # s3transfer @@ -37,11 +37,6 @@ click==8.1.3 # via # -r requirements/base.txt # nltk -colorama==0.4.6 - # via - # -r requirements/base.txt - # click - # tqdm deprecated==1.2.13 # via # -r requirements/base.txt diff --git a/requirements/local-inference.txt b/requirements/local-inference.txt index 19e87a269..7ae718d60 100644 --- a/requirements/local-inference.txt +++ b/requirements/local-inference.txt @@ -30,10 +30,6 @@ click==8.1.3 # via # nltk # uvicorn -colorama==0.4.6 - # via - # click - # tqdm coloredlogs==15.0.1 # via onnxruntime contourpy==1.0.7 @@ -122,7 +118,7 @@ numpy==1.23.5 # transformers omegaconf==2.3.0 # via effdet -onnxruntime==1.13.1 +onnxruntime==1.14.1 # via unstructured-inference opencv-python==4.6.0.66 # via @@ -173,8 +169,6 @@ pydantic==1.10.5 # fastapi pyparsing==3.0.9 # via matplotlib -pyreadline3==3.4.1 - # via humanfriendly pytesseract==0.3.10 # via layoutparser python-dateutil==2.8.2 @@ -272,4 +266,6 @@ wrapt==1.14.1 xlsxwriter==3.0.8 # via python-pptx zipp==3.15.0 - # via importlib-resources + # via + # importlib-metadata + # importlib-resources diff --git a/test_unstructured_ingest/test-ingest-github.sh b/test_unstructured_ingest/test-ingest-github.sh index bde3359be..552891868 100755 --- a/test_unstructured_ingest/test-ingest-github.sh +++ b/test_unstructured_ingest/test-ingest-github.sh @@ -3,6 +3,13 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR"/.. || exit 1 +if [[ "$CI" == "true" && "$(( RANDOM % 10))" != "1" ]]; then + # NOTE(crag): proper fix is being tracked here: https://github.com/Unstructured-IO/unstructured/issues/306 + echo "Skipping ingest 90% of github ingest tests to avoid rate limiting issue." + exit 0 +fi + + PYTHONPATH=. ./unstructured/ingest/main.py --github-url dcneiner/Downloadify --github-file-glob '*.html,*.txt' --structured-output-dir github-downloadify-output --verbose if ! diff -ru github-downloadify-output test_unstructured_ingest/expected-structured-output/github-downloadify ; then @@ -10,9 +17,9 @@ if ! diff -ru github-downloadify-output test_unstructured_ingest/expected-struct echo "There are differences from the previously checked-in structured outputs." echo echo "If these differences are acceptable, copy the outputs from" - echo "s3-small-batch-output/ to test_unstructured_ingest/expected-structured-output/s3-small-batch/ after running" + echo "github-downloadify-output/ to test_unstructured_ingest/expected-structured-output/github-downloadify/ after running" echo - echo " PYTHONPATH=. python examples/ingest/s3-small-batch/main.py --structured-output-dir s3-small-batch-output" + echo " PYTHONPATH=. ./unstructured/ingest/main.py --github-url dcneiner/Downloadify --github-file-glob '*.html,*.txt' --structured-output-dir github-downloadify-output --verbose" echo exit 1 fi diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c2f7fdb40..919ea7d9d 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.4.16" # pragma: no cover +__version__ = "0.4.17-dev0" # pragma: no cover