mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-26 14:45:31 +00:00
build(deps): weekly dependency update; reduce dependabot frequency (#194)
* deps: pip-compile to update dependencies * bump version * linting, linting, linting * typo
This commit is contained in:
parent
014585e872
commit
782b4352ec
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@ -3,11 +3,11 @@ updates:
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/requirements"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
# NOTE(robinson) - Workflow files stored in the
|
||||
# default location of `.github/workflows`
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
## 0.4.7-dev0
|
||||
|
||||
## 0.4.6
|
||||
|
||||
* Loosen the default cap threshold to `0.5`.
|
||||
|
||||
@ -48,7 +48,7 @@ sphinxcontrib-applehelp==1.0.4
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==1.0.2
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.0.0
|
||||
sphinxcontrib-htmlhelp==2.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-jquery==3.0.0
|
||||
# via sphinx-rtd-theme
|
||||
@ -60,7 +60,7 @@ sphinxcontrib-serializinghtml==1.1.5
|
||||
# via sphinx
|
||||
urllib3==1.26.14
|
||||
# via requests
|
||||
zipp==3.12.0
|
||||
zipp==3.12.1
|
||||
# via importlib-metadata
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
|
||||
@ -50,7 +50,7 @@ numpy==1.23.5
|
||||
# via
|
||||
# argilla
|
||||
# pandas
|
||||
openpyxl==3.0.10
|
||||
openpyxl==3.1.0
|
||||
# via unstructured (setup.py)
|
||||
packaging==23.0
|
||||
# via argilla
|
||||
@ -99,5 +99,5 @@ wrapt==1.14.1
|
||||
# via
|
||||
# argilla
|
||||
# deprecated
|
||||
xlsxwriter==3.0.7
|
||||
xlsxwriter==3.0.8
|
||||
# via python-pptx
|
||||
|
||||
@ -48,7 +48,7 @@ sphinxcontrib-applehelp==1.0.4
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==1.0.2
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.0.0
|
||||
sphinxcontrib-htmlhelp==2.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-jquery==3.0.0
|
||||
# via sphinx-rtd-theme
|
||||
@ -60,7 +60,7 @@ sphinxcontrib-serializinghtml==1.1.5
|
||||
# via sphinx
|
||||
urllib3==1.26.14
|
||||
# via requests
|
||||
zipp==3.12.0
|
||||
zipp==3.12.1
|
||||
# via importlib-metadata
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
|
||||
@ -25,7 +25,7 @@ attrs==22.2.0
|
||||
# via jsonschema
|
||||
backcall==0.2.0
|
||||
# via ipython
|
||||
beautifulsoup4==4.11.1
|
||||
beautifulsoup4==4.11.2
|
||||
# via nbconvert
|
||||
bleach==6.0.0
|
||||
# via nbconvert
|
||||
@ -59,7 +59,7 @@ importlib-metadata==6.0.0
|
||||
# nbconvert
|
||||
importlib-resources==5.10.2
|
||||
# via jsonschema
|
||||
ipykernel==6.21.0
|
||||
ipykernel==6.21.1
|
||||
# via
|
||||
# ipywidgets
|
||||
# jupyter
|
||||
@ -123,7 +123,7 @@ jupyter-core==5.2.0
|
||||
# qtconsole
|
||||
jupyter-events==0.6.3
|
||||
# via jupyter-server
|
||||
jupyter-server==2.1.0
|
||||
jupyter-server==2.2.1
|
||||
# via
|
||||
# nbclassic
|
||||
# notebook-shim
|
||||
@ -162,6 +162,7 @@ nbformat==5.7.3
|
||||
# notebook
|
||||
nest-asyncio==1.5.6
|
||||
# via
|
||||
# ipykernel
|
||||
# nbclassic
|
||||
# notebook
|
||||
notebook==6.5.2
|
||||
@ -183,7 +184,7 @@ pexpect==4.8.0
|
||||
# via ipython
|
||||
pickleshare==0.7.5
|
||||
# via ipython
|
||||
pip-tools==6.12.1
|
||||
pip-tools==6.12.2
|
||||
# via -r requirements/dev.in
|
||||
pkgutil-resolve-name==1.3.10
|
||||
# via jsonschema
|
||||
@ -310,7 +311,7 @@ webencodings==0.5.1
|
||||
# via
|
||||
# bleach
|
||||
# tinycss2
|
||||
websocket-client==1.5.0
|
||||
websocket-client==1.5.1
|
||||
# via jupyter-server
|
||||
wheel==0.38.4
|
||||
# via
|
||||
@ -318,7 +319,7 @@ wheel==0.38.4
|
||||
# pip-tools
|
||||
widgetsnbextension==4.0.5
|
||||
# via ipywidgets
|
||||
zipp==3.12.0
|
||||
zipp==3.12.1
|
||||
# via
|
||||
# importlib-metadata
|
||||
# importlib-resources
|
||||
|
||||
@ -63,7 +63,7 @@ numpy==1.23.5
|
||||
# argilla
|
||||
# pandas
|
||||
# transformers
|
||||
openpyxl==3.0.10
|
||||
openpyxl==3.1.0
|
||||
# via unstructured (setup.py)
|
||||
packaging==23.0
|
||||
# via
|
||||
@ -144,5 +144,5 @@ wrapt==1.14.1
|
||||
# via
|
||||
# argilla
|
||||
# deprecated
|
||||
xlsxwriter==3.0.7
|
||||
xlsxwriter==3.0.8
|
||||
# via python-pptx
|
||||
|
||||
@ -30,6 +30,8 @@ click==8.1.3
|
||||
# via
|
||||
# nltk
|
||||
# uvicorn
|
||||
coloredlogs==15.0.1
|
||||
# via onnxruntime
|
||||
contourpy==1.0.7
|
||||
# via matplotlib
|
||||
cryptography==39.0.0
|
||||
@ -46,6 +48,8 @@ fastapi==0.89.1
|
||||
# via unstructured-inference
|
||||
filelock==3.9.0
|
||||
# via huggingface-hub
|
||||
flatbuffers==23.1.21
|
||||
# via onnxruntime
|
||||
fonttools==4.38.0
|
||||
# via matplotlib
|
||||
h11==0.14.0
|
||||
@ -60,6 +64,8 @@ huggingface-hub==0.12.0
|
||||
# via
|
||||
# timm
|
||||
# unstructured-inference
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
idna==3.4
|
||||
# via
|
||||
# anyio
|
||||
@ -69,6 +75,8 @@ iopath==0.1.10
|
||||
# via layoutparser
|
||||
joblib==1.2.0
|
||||
# via nltk
|
||||
jsons==1.6.3
|
||||
# via unstructured-inference
|
||||
kiwisolver==1.4.4
|
||||
# via matplotlib
|
||||
layoutparser[layoutmodels,tesseract]==0.3.4
|
||||
@ -82,6 +90,8 @@ matplotlib==3.6.3
|
||||
# via pycocotools
|
||||
monotonic==1.6
|
||||
# via argilla
|
||||
mpmath==1.2.1
|
||||
# via sympy
|
||||
nltk==3.8.1
|
||||
# via unstructured (setup.py)
|
||||
numpy==1.23.5
|
||||
@ -90,6 +100,7 @@ numpy==1.23.5
|
||||
# contourpy
|
||||
# layoutparser
|
||||
# matplotlib
|
||||
# onnxruntime
|
||||
# opencv-python
|
||||
# pandas
|
||||
# pycocotools
|
||||
@ -97,17 +108,20 @@ numpy==1.23.5
|
||||
# torchvision
|
||||
omegaconf==2.3.0
|
||||
# via effdet
|
||||
onnxruntime==1.13.1
|
||||
# via unstructured-inference
|
||||
opencv-python==4.6.0.66
|
||||
# via
|
||||
# layoutparser
|
||||
# unstructured-inference
|
||||
openpyxl==3.0.10
|
||||
openpyxl==3.1.0
|
||||
# via unstructured (setup.py)
|
||||
packaging==23.0
|
||||
# via
|
||||
# argilla
|
||||
# huggingface-hub
|
||||
# matplotlib
|
||||
# onnxruntime
|
||||
# pytesseract
|
||||
pandas==1.5.3
|
||||
# via
|
||||
@ -132,6 +146,8 @@ pillow==9.4.0
|
||||
# unstructured (setup.py)
|
||||
portalocker==2.7.0
|
||||
# via iopath
|
||||
protobuf==4.21.12
|
||||
# via onnxruntime
|
||||
pycocotools==2.0.6
|
||||
# via effdet
|
||||
pycparser==2.21
|
||||
@ -186,6 +202,8 @@ sniffio==1.3.0
|
||||
# httpx
|
||||
starlette==0.22.0
|
||||
# via fastapi
|
||||
sympy==1.11.1
|
||||
# via onnxruntime
|
||||
timm==0.6.12
|
||||
# via effdet
|
||||
torch==1.13.1
|
||||
@ -213,7 +231,9 @@ typing-extensions==4.4.0
|
||||
# starlette
|
||||
# torch
|
||||
# torchvision
|
||||
unstructured-inference==0.2.4
|
||||
typish==1.9.3
|
||||
# via jsons
|
||||
unstructured-inference==0.2.5
|
||||
# via unstructured (setup.py)
|
||||
urllib3==1.26.14
|
||||
# via requests
|
||||
@ -225,5 +245,5 @@ wrapt==1.14.1
|
||||
# via
|
||||
# argilla
|
||||
# deprecated
|
||||
xlsxwriter==3.0.7
|
||||
xlsxwriter==3.0.8
|
||||
# via python-pptx
|
||||
|
||||
@ -7,7 +7,10 @@ click>=8.1
|
||||
flake8
|
||||
mypy
|
||||
pytest-cov
|
||||
label_studio_sdk
|
||||
# NOTE(robinson) - Currently tests do not pass with 0.0.18. Added the following
|
||||
# issue to address
|
||||
# ref: https://github.com/Unstructured-IO/unstructured/issues/200
|
||||
label_studio_sdk==0.0.17
|
||||
vcrpy
|
||||
|
||||
# NOTE(robinson) - The following pins are to address
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
#
|
||||
attrs==22.2.0
|
||||
# via pytest
|
||||
black==22.12.0
|
||||
black==23.1.0
|
||||
# via -r requirements/test.in
|
||||
certifi==2022.12.7
|
||||
# via
|
||||
@ -42,12 +42,14 @@ multidict==6.0.4
|
||||
# via yarl
|
||||
mypy==0.991
|
||||
# via -r requirements/test.in
|
||||
mypy-extensions==0.4.3
|
||||
mypy-extensions==1.0.0
|
||||
# via
|
||||
# black
|
||||
# mypy
|
||||
packaging==23.0
|
||||
# via pytest
|
||||
# via
|
||||
# black
|
||||
# pytest
|
||||
pathspec==0.11.0
|
||||
# via black
|
||||
platformdirs==2.6.2
|
||||
|
||||
@ -43,7 +43,6 @@ def test_isd_to_elements():
|
||||
|
||||
|
||||
def test_convert_to_isd_csv(output_csv_file):
|
||||
|
||||
elements = [Title(text="Title 1"), NarrativeText(text="Narrative 1")]
|
||||
with open(output_csv_file, "w+") as csv_file:
|
||||
isd_csv_string = base.convert_to_isd_csv(elements)
|
||||
|
||||
@ -5,7 +5,6 @@ import unstructured.staging.huggingface as huggingface
|
||||
|
||||
|
||||
class MockTokenizer:
|
||||
|
||||
model_max_length = 20
|
||||
|
||||
def tokenize(self, text):
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.4.6" # pragma: no cover
|
||||
__version__ = "0.4.7-dev0" # pragma: no cover
|
||||
|
||||
@ -33,7 +33,6 @@ VALID_CONTENT_SOURCES: Final[List[str]] = ["text/html", "text/plain"]
|
||||
|
||||
|
||||
def _parse_received_data(data: str) -> List[Element]:
|
||||
|
||||
ip_address_names = extract_ip_address_name(data)
|
||||
ip_addresses = extract_ip_address(data)
|
||||
mapi_id = extract_mapi_id(data)
|
||||
@ -111,7 +110,6 @@ def extract_attachment_info(
|
||||
|
||||
|
||||
def has_embedded_image(element):
|
||||
|
||||
PATTERN = re.compile("\[image: .+\]") # noqa: W605 NOTE(harrell)
|
||||
return PATTERN.search(element.text)
|
||||
|
||||
@ -119,7 +117,6 @@ def has_embedded_image(element):
|
||||
def find_embedded_image(
|
||||
element: Union[NarrativeText, Title], indices: re.Match
|
||||
) -> Tuple[Element, Element]:
|
||||
|
||||
start, end = indices.start(), indices.end()
|
||||
|
||||
image_raw_info = element.text[start:end]
|
||||
|
||||
@ -53,7 +53,6 @@ def partition_text(
|
||||
|
||||
elements: List[Element] = list()
|
||||
for ctext in file_content:
|
||||
|
||||
ctext = ctext.strip()
|
||||
|
||||
if ctext == "":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user