From 782b4352ec48202e0c86973f52b14ac90a83ebc0 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 6 Feb 2023 11:39:29 -0500 Subject: [PATCH] build(deps): weekly dependency update; reduce dependabot frequency (#194) * deps: pip-compile to update dependencies * bump version * linting, linting, linting * typo --- .github/dependabot.yml | 4 +-- CHANGELOG.md | 2 ++ docs/requirements.txt | 4 +-- requirements/base.txt | 4 +-- requirements/build.txt | 4 +-- requirements/dev.txt | 13 +++++----- requirements/huggingface.txt | 4 +-- requirements/local-inference.txt | 26 ++++++++++++++++--- requirements/test.in | 5 +++- requirements/test.txt | 8 +++--- .../staging/test_base_staging.py | 1 - test_unstructured/staging/test_huggingface.py | 1 - unstructured/__version__.py | 2 +- unstructured/partition/email.py | 3 --- unstructured/partition/text.py | 1 - 15 files changed, 52 insertions(+), 30 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 26108f1ea..917cfa312 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,11 +3,11 @@ updates: - package-ecosystem: "pip" directory: "/requirements" schedule: - interval: "weekly" + interval: "monthly" - package-ecosystem: "github-actions" # NOTE(robinson) - Workflow files stored in the # default location of `.github/workflows` directory: "/" schedule: - interval: "weekly" + interval: "monthly" diff --git a/CHANGELOG.md b/CHANGELOG.md index f069e7524..58cfc477a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +## 0.4.7-dev0 + ## 0.4.6 * Loosen the default cap threshold to `0.5`. diff --git a/docs/requirements.txt b/docs/requirements.txt index 3d6a859ee..a44c3900a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -48,7 +48,7 @@ sphinxcontrib-applehelp==1.0.4 # via sphinx sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jquery==3.0.0 # via sphinx-rtd-theme @@ -60,7 +60,7 @@ sphinxcontrib-serializinghtml==1.1.5 # via sphinx urllib3==1.26.14 # via requests -zipp==3.12.0 +zipp==3.12.1 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/base.txt b/requirements/base.txt index 700cbfd82..3717b48e1 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -50,7 +50,7 @@ numpy==1.23.5 # via # argilla # pandas -openpyxl==3.0.10 +openpyxl==3.1.0 # via unstructured (setup.py) packaging==23.0 # via argilla @@ -99,5 +99,5 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.7 +xlsxwriter==3.0.8 # via python-pptx diff --git a/requirements/build.txt b/requirements/build.txt index 3d6a859ee..a44c3900a 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -48,7 +48,7 @@ sphinxcontrib-applehelp==1.0.4 # via sphinx sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jquery==3.0.0 # via sphinx-rtd-theme @@ -60,7 +60,7 @@ sphinxcontrib-serializinghtml==1.1.5 # via sphinx urllib3==1.26.14 # via requests -zipp==3.12.0 +zipp==3.12.1 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/dev.txt b/requirements/dev.txt index d8f253fca..0ea310169 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -25,7 +25,7 @@ attrs==22.2.0 # via jsonschema backcall==0.2.0 # via ipython -beautifulsoup4==4.11.1 +beautifulsoup4==4.11.2 # via nbconvert bleach==6.0.0 # via nbconvert @@ -59,7 +59,7 @@ importlib-metadata==6.0.0 # nbconvert importlib-resources==5.10.2 # via jsonschema -ipykernel==6.21.0 +ipykernel==6.21.1 # via # ipywidgets # jupyter @@ -123,7 +123,7 @@ jupyter-core==5.2.0 # qtconsole jupyter-events==0.6.3 # via jupyter-server -jupyter-server==2.1.0 +jupyter-server==2.2.1 # via # nbclassic # notebook-shim @@ -162,6 +162,7 @@ nbformat==5.7.3 # notebook nest-asyncio==1.5.6 # via + # ipykernel # nbclassic # notebook notebook==6.5.2 @@ -183,7 +184,7 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.12.1 +pip-tools==6.12.2 # via -r requirements/dev.in pkgutil-resolve-name==1.3.10 # via jsonschema @@ -310,7 +311,7 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.5.0 +websocket-client==1.5.1 # via jupyter-server wheel==0.38.4 # via @@ -318,7 +319,7 @@ wheel==0.38.4 # pip-tools widgetsnbextension==4.0.5 # via ipywidgets -zipp==3.12.0 +zipp==3.12.1 # via # importlib-metadata # importlib-resources diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 3f5a554a6..d00f8b69b 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -63,7 +63,7 @@ numpy==1.23.5 # argilla # pandas # transformers -openpyxl==3.0.10 +openpyxl==3.1.0 # via unstructured (setup.py) packaging==23.0 # via @@ -144,5 +144,5 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.7 +xlsxwriter==3.0.8 # via python-pptx diff --git a/requirements/local-inference.txt b/requirements/local-inference.txt index e3d880deb..ce5ab0549 100644 --- a/requirements/local-inference.txt +++ b/requirements/local-inference.txt @@ -30,6 +30,8 @@ click==8.1.3 # via # nltk # uvicorn +coloredlogs==15.0.1 + # via onnxruntime contourpy==1.0.7 # via matplotlib cryptography==39.0.0 @@ -46,6 +48,8 @@ fastapi==0.89.1 # via unstructured-inference filelock==3.9.0 # via huggingface-hub +flatbuffers==23.1.21 + # via onnxruntime fonttools==4.38.0 # via matplotlib h11==0.14.0 @@ -60,6 +64,8 @@ huggingface-hub==0.12.0 # via # timm # unstructured-inference +humanfriendly==10.0 + # via coloredlogs idna==3.4 # via # anyio @@ -69,6 +75,8 @@ iopath==0.1.10 # via layoutparser joblib==1.2.0 # via nltk +jsons==1.6.3 + # via unstructured-inference kiwisolver==1.4.4 # via matplotlib layoutparser[layoutmodels,tesseract]==0.3.4 @@ -82,6 +90,8 @@ matplotlib==3.6.3 # via pycocotools monotonic==1.6 # via argilla +mpmath==1.2.1 + # via sympy nltk==3.8.1 # via unstructured (setup.py) numpy==1.23.5 @@ -90,6 +100,7 @@ numpy==1.23.5 # contourpy # layoutparser # matplotlib + # onnxruntime # opencv-python # pandas # pycocotools @@ -97,17 +108,20 @@ numpy==1.23.5 # torchvision omegaconf==2.3.0 # via effdet +onnxruntime==1.13.1 + # via unstructured-inference opencv-python==4.6.0.66 # via # layoutparser # unstructured-inference -openpyxl==3.0.10 +openpyxl==3.1.0 # via unstructured (setup.py) packaging==23.0 # via # argilla # huggingface-hub # matplotlib + # onnxruntime # pytesseract pandas==1.5.3 # via @@ -132,6 +146,8 @@ pillow==9.4.0 # unstructured (setup.py) portalocker==2.7.0 # via iopath +protobuf==4.21.12 + # via onnxruntime pycocotools==2.0.6 # via effdet pycparser==2.21 @@ -186,6 +202,8 @@ sniffio==1.3.0 # httpx starlette==0.22.0 # via fastapi +sympy==1.11.1 + # via onnxruntime timm==0.6.12 # via effdet torch==1.13.1 @@ -213,7 +231,9 @@ typing-extensions==4.4.0 # starlette # torch # torchvision -unstructured-inference==0.2.4 +typish==1.9.3 + # via jsons +unstructured-inference==0.2.5 # via unstructured (setup.py) urllib3==1.26.14 # via requests @@ -225,5 +245,5 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.7 +xlsxwriter==3.0.8 # via python-pptx diff --git a/requirements/test.in b/requirements/test.in index 52932e804..bdeba9a08 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -7,7 +7,10 @@ click>=8.1 flake8 mypy pytest-cov -label_studio_sdk +# NOTE(robinson) - Currently tests do not pass with 0.0.18. Added the following +# issue to address +# ref: https://github.com/Unstructured-IO/unstructured/issues/200 +label_studio_sdk==0.0.17 vcrpy # NOTE(robinson) - The following pins are to address diff --git a/requirements/test.txt b/requirements/test.txt index 526c8f288..b9e095bd8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,7 +6,7 @@ # attrs==22.2.0 # via pytest -black==22.12.0 +black==23.1.0 # via -r requirements/test.in certifi==2022.12.7 # via @@ -42,12 +42,14 @@ multidict==6.0.4 # via yarl mypy==0.991 # via -r requirements/test.in -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via # black # mypy packaging==23.0 - # via pytest + # via + # black + # pytest pathspec==0.11.0 # via black platformdirs==2.6.2 diff --git a/test_unstructured/staging/test_base_staging.py b/test_unstructured/staging/test_base_staging.py index bff32b39a..19a53b976 100644 --- a/test_unstructured/staging/test_base_staging.py +++ b/test_unstructured/staging/test_base_staging.py @@ -43,7 +43,6 @@ def test_isd_to_elements(): def test_convert_to_isd_csv(output_csv_file): - elements = [Title(text="Title 1"), NarrativeText(text="Narrative 1")] with open(output_csv_file, "w+") as csv_file: isd_csv_string = base.convert_to_isd_csv(elements) diff --git a/test_unstructured/staging/test_huggingface.py b/test_unstructured/staging/test_huggingface.py index 169415145..cb95bf17c 100644 --- a/test_unstructured/staging/test_huggingface.py +++ b/test_unstructured/staging/test_huggingface.py @@ -5,7 +5,6 @@ import unstructured.staging.huggingface as huggingface class MockTokenizer: - model_max_length = 20 def tokenize(self, text): diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 62e1ee83a..b3a836b02 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.4.6" # pragma: no cover +__version__ = "0.4.7-dev0" # pragma: no cover diff --git a/unstructured/partition/email.py b/unstructured/partition/email.py index c6dca905d..7e5b0b4a6 100644 --- a/unstructured/partition/email.py +++ b/unstructured/partition/email.py @@ -33,7 +33,6 @@ VALID_CONTENT_SOURCES: Final[List[str]] = ["text/html", "text/plain"] def _parse_received_data(data: str) -> List[Element]: - ip_address_names = extract_ip_address_name(data) ip_addresses = extract_ip_address(data) mapi_id = extract_mapi_id(data) @@ -111,7 +110,6 @@ def extract_attachment_info( def has_embedded_image(element): - PATTERN = re.compile("\[image: .+\]") # noqa: W605 NOTE(harrell) return PATTERN.search(element.text) @@ -119,7 +117,6 @@ def has_embedded_image(element): def find_embedded_image( element: Union[NarrativeText, Title], indices: re.Match ) -> Tuple[Element, Element]: - start, end = indices.start(), indices.end() image_raw_info = element.text[start:end] diff --git a/unstructured/partition/text.py b/unstructured/partition/text.py index d3fc38cd6..0b4c0d2de 100644 --- a/unstructured/partition/text.py +++ b/unstructured/partition/text.py @@ -53,7 +53,6 @@ def partition_text( elements: List[Element] = list() for ctext in file_content: - ctext = ctext.strip() if ctext == "":