mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-08 06:28:55 +00:00
feat: add ability to pass headers in partition_html (#397)
Also adds pytest-mock requirement, those fixtures are nice to have! Implements issue/feature #396 .
This commit is contained in:
parent
a4394f6f16
commit
ce9fc26009
@ -1,8 +1,9 @@
|
|||||||
## 0.5.7-dev2
|
## 0.5.7-dev3
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
* Refactored codebase using `exactly_one`
|
* Refactored codebase using `exactly_one`
|
||||||
|
* Adds ability to pass headers when passing a url in partition_html()
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
|
||||||
|
|||||||
@ -8,7 +8,7 @@ alabaster==0.7.13
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
babel==2.12.1
|
babel==2.12.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.12.0
|
||||||
# via furo
|
# via furo
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
# via
|
# via
|
||||||
@ -20,13 +20,13 @@ docutils==0.18.1
|
|||||||
# via
|
# via
|
||||||
# sphinx
|
# sphinx
|
||||||
# sphinx-rtd-theme
|
# sphinx-rtd-theme
|
||||||
furo==2022.12.7
|
furo==2023.3.23
|
||||||
# via -r requirements/build.in
|
# via -r requirements/build.in
|
||||||
idna==3.4
|
idna==3.4
|
||||||
# via requests
|
# via requests
|
||||||
imagesize==1.4.1
|
imagesize==1.4.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via sphinx
|
# via sphinx
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
# via sphinx
|
# via sphinx
|
||||||
@ -52,6 +52,7 @@ sphinx==6.1.3
|
|||||||
# furo
|
# furo
|
||||||
# sphinx-basic-ng
|
# sphinx-basic-ng
|
||||||
# sphinx-rtd-theme
|
# sphinx-rtd-theme
|
||||||
|
# sphinxcontrib-jquery
|
||||||
sphinx-basic-ng==1.0.0b1
|
sphinx-basic-ng==1.0.0b1
|
||||||
# via furo
|
# via furo
|
||||||
sphinx-rtd-theme==1.2.0rc3
|
sphinx-rtd-theme==1.2.0rc3
|
||||||
@ -62,7 +63,7 @@ sphinxcontrib-devhelp==1.0.2
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-htmlhelp==2.0.1
|
sphinxcontrib-htmlhelp==2.0.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-jquery==3.0.0
|
sphinxcontrib-jquery==4.1
|
||||||
# via sphinx-rtd-theme
|
# via sphinx-rtd-theme
|
||||||
sphinxcontrib-jsmath==1.0.1
|
sphinxcontrib-jsmath==1.0.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
@ -70,10 +71,7 @@ sphinxcontrib-qthelp==1.0.3
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-serializinghtml==1.1.5
|
sphinxcontrib-serializinghtml==1.1.5
|
||||||
# via sphinx
|
# via sphinx
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via requests
|
# via requests
|
||||||
zipp==3.15.0
|
zipp==3.15.0
|
||||||
# via importlib-metadata
|
# via importlib-metadata
|
||||||
|
|
||||||
# The following packages are considered to be unsafe in a requirements file:
|
|
||||||
# setuptools
|
|
||||||
|
|||||||
@ -210,10 +210,13 @@ Examples:
|
|||||||
|
|
||||||
The ``partition_html`` function partitions an HTML document and returns a list
|
The ``partition_html`` function partitions an HTML document and returns a list
|
||||||
of document ``Element`` objects. ``partition_html`` can take a filename, file-like
|
of document ``Element`` objects. ``partition_html`` can take a filename, file-like
|
||||||
object, or string as input. The three examples below all produce the same output.
|
object, string, or url as input.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
|
These three invocations of partition_html() result are essentially equivalent:
|
||||||
|
|
||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
from unstructured.partition.html import partition_html
|
from unstructured.partition.html import partition_html
|
||||||
@ -228,6 +231,22 @@ Examples:
|
|||||||
elements = partition_html(text=text)
|
elements = partition_html(text=text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
The following illustrates fetching a url and partition it the response content.
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
from unstructured.partition.html import partition_html
|
||||||
|
|
||||||
|
elements = partition_html(url="https://python.org/")
|
||||||
|
|
||||||
|
# you can also provide custom headers:
|
||||||
|
|
||||||
|
elements = partition_html(url="https://python.org/",
|
||||||
|
headers={"User-Agent": "YourScriptName/1.0 ..."})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
``partition_pdf``
|
``partition_pdf``
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|||||||
@ -65,7 +65,7 @@ def get_forms_by_cik(session: requests.Session, cik: Union[str, int]) -> dict:
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
content = json.loads(response.content)
|
content = json.loads(response.content)
|
||||||
recent_forms = content["filings"]["recent"]
|
recent_forms = content["filings"]["recent"]
|
||||||
form_types = {k: v for k, v in zip(recent_forms["accessionNumber"], recent_forms["form"])}
|
form_types = dict(zip(recent_forms["accessionNumber"], recent_forms["form"]))
|
||||||
return form_types
|
return form_types
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -4,12 +4,9 @@
|
|||||||
#
|
#
|
||||||
# pip-compile --output-file=requirements/base.txt
|
# pip-compile --output-file=requirements/base.txt
|
||||||
#
|
#
|
||||||
--extra-index-url https://pypi.ngc.nvidia.com
|
|
||||||
--trusted-host pypi.ngc.nvidia.com
|
|
||||||
|
|
||||||
anyio==3.6.2
|
anyio==3.6.2
|
||||||
# via httpcore
|
# via httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
# via argilla
|
# via argilla
|
||||||
@ -40,7 +37,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via markdown
|
# via markdown
|
||||||
joblib==1.2.0
|
joblib==1.2.0
|
||||||
# via nltk
|
# via nltk
|
||||||
@ -49,7 +46,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
# via argilla
|
# via argilla
|
||||||
@ -59,7 +56,7 @@ numpy==1.23.5
|
|||||||
# via
|
# via
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
packaging==23.0
|
packaging==23.0
|
||||||
# via argilla
|
# via argilla
|
||||||
@ -71,7 +68,7 @@ pillow==9.4.0
|
|||||||
# via
|
# via
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via argilla
|
# via argilla
|
||||||
pygments==2.14.0
|
pygments==2.14.0
|
||||||
# via rich
|
# via rich
|
||||||
@ -87,7 +84,7 @@ python-pptx==0.6.21
|
|||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
pytz==2022.7.1
|
pytz==2022.7.1
|
||||||
# via pandas
|
# via pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via nltk
|
# via nltk
|
||||||
requests==2.28.2
|
requests==2.28.2
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
@ -110,7 +107,7 @@ typing-extensions==4.5.0
|
|||||||
# via
|
# via
|
||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via requests
|
# via requests
|
||||||
wrapt==1.14.1
|
wrapt==1.14.1
|
||||||
# via
|
# via
|
||||||
|
|||||||
@ -8,7 +8,7 @@ alabaster==0.7.13
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
babel==2.12.1
|
babel==2.12.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.12.0
|
||||||
# via furo
|
# via furo
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
# via
|
# via
|
||||||
@ -20,13 +20,13 @@ docutils==0.18.1
|
|||||||
# via
|
# via
|
||||||
# sphinx
|
# sphinx
|
||||||
# sphinx-rtd-theme
|
# sphinx-rtd-theme
|
||||||
furo==2022.12.7
|
furo==2023.3.23
|
||||||
# via -r requirements/build.in
|
# via -r requirements/build.in
|
||||||
idna==3.4
|
idna==3.4
|
||||||
# via requests
|
# via requests
|
||||||
imagesize==1.4.1
|
imagesize==1.4.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via sphinx
|
# via sphinx
|
||||||
jinja2==3.1.2
|
jinja2==3.1.2
|
||||||
# via sphinx
|
# via sphinx
|
||||||
@ -52,6 +52,7 @@ sphinx==6.1.3
|
|||||||
# furo
|
# furo
|
||||||
# sphinx-basic-ng
|
# sphinx-basic-ng
|
||||||
# sphinx-rtd-theme
|
# sphinx-rtd-theme
|
||||||
|
# sphinxcontrib-jquery
|
||||||
sphinx-basic-ng==1.0.0b1
|
sphinx-basic-ng==1.0.0b1
|
||||||
# via furo
|
# via furo
|
||||||
sphinx-rtd-theme==1.2.0rc3
|
sphinx-rtd-theme==1.2.0rc3
|
||||||
@ -62,7 +63,7 @@ sphinxcontrib-devhelp==1.0.2
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-htmlhelp==2.0.1
|
sphinxcontrib-htmlhelp==2.0.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-jquery==3.0.0
|
sphinxcontrib-jquery==4.1
|
||||||
# via sphinx-rtd-theme
|
# via sphinx-rtd-theme
|
||||||
sphinxcontrib-jsmath==1.0.1
|
sphinxcontrib-jsmath==1.0.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
@ -70,10 +71,7 @@ sphinxcontrib-qthelp==1.0.3
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
sphinxcontrib-serializinghtml==1.1.5
|
sphinxcontrib-serializinghtml==1.1.5
|
||||||
# via sphinx
|
# via sphinx
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via requests
|
# via requests
|
||||||
zipp==3.15.0
|
zipp==3.15.0
|
||||||
# via importlib-metadata
|
# via importlib-metadata
|
||||||
|
|
||||||
# The following packages are considered to be unsafe in a requirements file:
|
|
||||||
# setuptools
|
|
||||||
|
|||||||
@ -25,7 +25,7 @@ attrs==22.2.0
|
|||||||
# via jsonschema
|
# via jsonschema
|
||||||
backcall==0.2.0
|
backcall==0.2.0
|
||||||
# via ipython
|
# via ipython
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.12.0
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
bleach==6.0.0
|
bleach==6.0.0
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
@ -37,7 +37,7 @@ cfgv==3.3.1
|
|||||||
# via pre-commit
|
# via pre-commit
|
||||||
click==8.1.3
|
click==8.1.3
|
||||||
# via pip-tools
|
# via pip-tools
|
||||||
comm==0.1.2
|
comm==0.1.3
|
||||||
# via ipykernel
|
# via ipykernel
|
||||||
debugpy==1.6.6
|
debugpy==1.6.6
|
||||||
# via ipykernel
|
# via ipykernel
|
||||||
@ -51,25 +51,24 @@ executing==1.2.0
|
|||||||
# via stack-data
|
# via stack-data
|
||||||
fastjsonschema==2.16.3
|
fastjsonschema==2.16.3
|
||||||
# via nbformat
|
# via nbformat
|
||||||
filelock==3.9.0
|
filelock==3.10.3
|
||||||
# via virtualenv
|
# via virtualenv
|
||||||
fqdn==1.5.1
|
fqdn==1.5.1
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
identify==2.5.19
|
identify==2.5.21
|
||||||
# via pre-commit
|
# via pre-commit
|
||||||
idna==3.4
|
idna==3.4
|
||||||
# via
|
# via
|
||||||
# anyio
|
# anyio
|
||||||
# jsonschema
|
# jsonschema
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# jupyter-client
|
# jupyter-client
|
||||||
# nbconvert
|
# nbconvert
|
||||||
importlib-resources==5.12.0
|
importlib-resources==5.12.0
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
ipykernel==6.21.3
|
ipykernel==6.22.0
|
||||||
# via
|
# via
|
||||||
# ipywidgets
|
|
||||||
# jupyter
|
# jupyter
|
||||||
# jupyter-console
|
# jupyter-console
|
||||||
# nbclassic
|
# nbclassic
|
||||||
@ -86,7 +85,7 @@ ipython-genutils==0.2.0
|
|||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
# qtconsole
|
# qtconsole
|
||||||
ipywidgets==8.0.4
|
ipywidgets==8.0.5
|
||||||
# via jupyter
|
# via jupyter
|
||||||
isoduration==20.11.0
|
isoduration==20.11.0
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
@ -106,7 +105,7 @@ jsonschema[format-nongpl]==4.17.3
|
|||||||
# nbformat
|
# nbformat
|
||||||
jupyter==1.0.0
|
jupyter==1.0.0
|
||||||
# via -r requirements/dev.in
|
# via -r requirements/dev.in
|
||||||
jupyter-client==8.0.3
|
jupyter-client==8.1.0
|
||||||
# via
|
# via
|
||||||
# ipykernel
|
# ipykernel
|
||||||
# jupyter-console
|
# jupyter-console
|
||||||
@ -117,7 +116,7 @@ jupyter-client==8.0.3
|
|||||||
# qtconsole
|
# qtconsole
|
||||||
jupyter-console==6.6.3
|
jupyter-console==6.6.3
|
||||||
# via jupyter
|
# via jupyter
|
||||||
jupyter-core==5.2.0
|
jupyter-core==5.3.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/dev.in
|
# -r requirements/dev.in
|
||||||
# ipykernel
|
# ipykernel
|
||||||
@ -132,7 +131,7 @@ jupyter-core==5.2.0
|
|||||||
# qtconsole
|
# qtconsole
|
||||||
jupyter-events==0.6.3
|
jupyter-events==0.6.3
|
||||||
# via jupyter-server
|
# via jupyter-server
|
||||||
jupyter-server==2.4.0
|
jupyter-server==2.5.0
|
||||||
# via
|
# via
|
||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook-shim
|
# notebook-shim
|
||||||
@ -140,7 +139,7 @@ jupyter-server-terminals==0.4.4
|
|||||||
# via jupyter-server
|
# via jupyter-server
|
||||||
jupyterlab-pygments==0.2.2
|
jupyterlab-pygments==0.2.2
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
jupyterlab-widgets==3.0.5
|
jupyterlab-widgets==3.0.6
|
||||||
# via ipywidgets
|
# via ipywidgets
|
||||||
markupsafe==2.1.2
|
markupsafe==2.1.2
|
||||||
# via
|
# via
|
||||||
@ -156,13 +155,13 @@ nbclassic==0.5.3
|
|||||||
# via notebook
|
# via notebook
|
||||||
nbclient==0.7.2
|
nbclient==0.7.2
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
nbconvert==7.2.9
|
nbconvert==7.2.10
|
||||||
# via
|
# via
|
||||||
# jupyter
|
# jupyter
|
||||||
# jupyter-server
|
# jupyter-server
|
||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
nbformat==5.7.3
|
nbformat==5.8.0
|
||||||
# via
|
# via
|
||||||
# jupyter-server
|
# jupyter-server
|
||||||
# nbclassic
|
# nbclassic
|
||||||
@ -186,6 +185,7 @@ packaging==23.0
|
|||||||
# ipykernel
|
# ipykernel
|
||||||
# jupyter-server
|
# jupyter-server
|
||||||
# nbconvert
|
# nbconvert
|
||||||
|
# qtconsole
|
||||||
# qtpy
|
# qtpy
|
||||||
pandocfilters==1.5.0
|
pandocfilters==1.5.0
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
@ -203,7 +203,7 @@ platformdirs==3.1.1
|
|||||||
# via
|
# via
|
||||||
# jupyter-core
|
# jupyter-core
|
||||||
# virtualenv
|
# virtualenv
|
||||||
pre-commit==3.1.1
|
pre-commit==3.2.0
|
||||||
# via -r requirements/dev.in
|
# via -r requirements/dev.in
|
||||||
prometheus-client==0.16.0
|
prometheus-client==0.16.0
|
||||||
# via
|
# via
|
||||||
@ -244,7 +244,7 @@ pyyaml==6.0
|
|||||||
# via
|
# via
|
||||||
# jupyter-events
|
# jupyter-events
|
||||||
# pre-commit
|
# pre-commit
|
||||||
pyzmq==25.0.0
|
pyzmq==25.0.2
|
||||||
# via
|
# via
|
||||||
# ipykernel
|
# ipykernel
|
||||||
# jupyter-client
|
# jupyter-client
|
||||||
@ -253,7 +253,7 @@ pyzmq==25.0.0
|
|||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
# qtconsole
|
# qtconsole
|
||||||
qtconsole==5.4.0
|
qtconsole==5.4.1
|
||||||
# via jupyter
|
# via jupyter
|
||||||
qtpy==2.3.0
|
qtpy==2.3.0
|
||||||
# via qtconsole
|
# via qtconsole
|
||||||
@ -322,7 +322,7 @@ traitlets==5.9.0
|
|||||||
# qtconsole
|
# qtconsole
|
||||||
uri-template==1.2.0
|
uri-template==1.2.0
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
virtualenv==20.20.0
|
virtualenv==20.21.0
|
||||||
# via pre-commit
|
# via pre-commit
|
||||||
wcwidth==0.2.6
|
wcwidth==0.2.6
|
||||||
# via prompt-toolkit
|
# via prompt-toolkit
|
||||||
@ -334,11 +334,11 @@ webencodings==0.5.1
|
|||||||
# tinycss2
|
# tinycss2
|
||||||
websocket-client==1.5.1
|
websocket-client==1.5.1
|
||||||
# via jupyter-server
|
# via jupyter-server
|
||||||
wheel==0.38.4
|
wheel==0.40.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/dev.in
|
# -r requirements/dev.in
|
||||||
# pip-tools
|
# pip-tools
|
||||||
widgetsnbextension==4.0.5
|
widgetsnbextension==4.0.6
|
||||||
# via ipywidgets
|
# via ipywidgets
|
||||||
zipp==3.15.0
|
zipp==3.15.0
|
||||||
# via
|
# via
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
#
|
#
|
||||||
anyio==3.6.2
|
anyio==3.6.2
|
||||||
# via httpcore
|
# via httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
# via argilla
|
# via argilla
|
||||||
@ -28,9 +28,10 @@ deprecated==1.2.13
|
|||||||
# via argilla
|
# via argilla
|
||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
filelock==3.9.0
|
filelock==3.10.3
|
||||||
# via
|
# via
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
|
# torch
|
||||||
# transformers
|
# transformers
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
# via httpcore
|
# via httpcore
|
||||||
@ -38,15 +39,17 @@ httpcore==0.16.3
|
|||||||
# via httpx
|
# via httpx
|
||||||
httpx==0.23.3
|
httpx==0.23.3
|
||||||
# via argilla
|
# via argilla
|
||||||
huggingface-hub==0.13.1
|
huggingface-hub==0.13.3
|
||||||
# via transformers
|
# via transformers
|
||||||
idna==3.4
|
idna==3.4
|
||||||
# via
|
# via
|
||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via markdown
|
# via markdown
|
||||||
|
jinja2==3.1.2
|
||||||
|
# via torch
|
||||||
joblib==1.2.0
|
joblib==1.2.0
|
||||||
# via
|
# via
|
||||||
# nltk
|
# nltk
|
||||||
@ -58,10 +61,16 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
|
markupsafe==2.1.2
|
||||||
|
# via jinja2
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
# via argilla
|
# via argilla
|
||||||
|
mpmath==1.3.0
|
||||||
|
# via sympy
|
||||||
|
networkx==3.0
|
||||||
|
# via torch
|
||||||
nltk==3.8.1
|
nltk==3.8.1
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
numpy==1.23.5
|
numpy==1.23.5
|
||||||
@ -69,7 +78,7 @@ numpy==1.23.5
|
|||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
# transformers
|
# transformers
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
packaging==23.0
|
packaging==23.0
|
||||||
# via
|
# via
|
||||||
@ -84,10 +93,12 @@ pillow==9.4.0
|
|||||||
# via
|
# via
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via argilla
|
# via argilla
|
||||||
pygments==2.14.0
|
pygments==2.14.0
|
||||||
# via rich
|
# via rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via pandas
|
# via pandas
|
||||||
python-docx==0.8.11
|
python-docx==0.8.11
|
||||||
@ -102,7 +113,7 @@ pyyaml==6.0
|
|||||||
# via
|
# via
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# transformers
|
# transformers
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# nltk
|
# nltk
|
||||||
# sacremoses
|
# sacremoses
|
||||||
@ -130,9 +141,11 @@ sniffio==1.3.0
|
|||||||
# anyio
|
# anyio
|
||||||
# httpcore
|
# httpcore
|
||||||
# httpx
|
# httpx
|
||||||
|
sympy==1.11.1
|
||||||
|
# via torch
|
||||||
tokenizers==0.13.2
|
tokenizers==0.13.2
|
||||||
# via transformers
|
# via transformers
|
||||||
torch==1.13.1
|
torch==2.0.0
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
tqdm==4.65.0
|
tqdm==4.65.0
|
||||||
# via
|
# via
|
||||||
@ -141,7 +154,7 @@ tqdm==4.65.0
|
|||||||
# nltk
|
# nltk
|
||||||
# sacremoses
|
# sacremoses
|
||||||
# transformers
|
# transformers
|
||||||
transformers==4.26.1
|
transformers==4.27.3
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
typing-extensions==4.5.0
|
typing-extensions==4.5.0
|
||||||
# via
|
# via
|
||||||
@ -149,7 +162,7 @@ typing-extensions==4.5.0
|
|||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
# torch
|
# torch
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via requests
|
# via requests
|
||||||
wrapt==1.14.1
|
wrapt==1.14.1
|
||||||
# via
|
# via
|
||||||
|
|||||||
@ -16,7 +16,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.3.1
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -50,7 +50,7 @@ cffi==1.15.1
|
|||||||
# via
|
# via
|
||||||
# azure-datalake-store
|
# azure-datalake-store
|
||||||
# cryptography
|
# cryptography
|
||||||
charset-normalizer==3.0.1
|
charset-normalizer==3.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# aiohttp
|
# aiohttp
|
||||||
@ -59,7 +59,11 @@ click==8.1.3
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
cryptography==39.0.1
|
commonmark==0.9.1
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# rich
|
||||||
|
cryptography==39.0.2
|
||||||
# via
|
# via
|
||||||
# adal
|
# adal
|
||||||
# azure-identity
|
# azure-identity
|
||||||
@ -78,7 +82,7 @@ frozenlist==1.3.3
|
|||||||
# via
|
# via
|
||||||
# aiohttp
|
# aiohttp
|
||||||
# aiosignal
|
# aiosignal
|
||||||
fsspec==2023.1.0
|
fsspec==2023.3.0
|
||||||
# via
|
# via
|
||||||
# adlfs
|
# adlfs
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -101,7 +105,7 @@ idna==3.4
|
|||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
# yarl
|
# yarl
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -117,7 +121,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -144,7 +148,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -166,14 +170,22 @@ portalocker==2.7.0
|
|||||||
# via msal-extensions
|
# via msal-extensions
|
||||||
pycparser==2.21
|
pycparser==2.21
|
||||||
# via cffi
|
# via cffi
|
||||||
pydantic==1.10.5
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
|
pygments==2.14.0
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# rich
|
||||||
pyjwt[crypto]==2.6.0
|
pyjwt[crypto]==2.6.0
|
||||||
# via
|
# via
|
||||||
# adal
|
# adal
|
||||||
# msal
|
# msal
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -195,7 +207,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -211,6 +223,10 @@ rfc3986[idna2008]==1.5.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpx
|
# httpx
|
||||||
|
rich==13.0.1
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# argilla
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -224,7 +240,7 @@ sniffio==1.3.0
|
|||||||
# anyio
|
# anyio
|
||||||
# httpcore
|
# httpcore
|
||||||
# httpx
|
# httpx
|
||||||
tqdm==4.64.1
|
tqdm==4.65.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -235,7 +251,8 @@ typing-extensions==4.5.0
|
|||||||
# azure-core
|
# azure-core
|
||||||
# azure-storage-blob
|
# azure-storage-blob
|
||||||
# pydantic
|
# pydantic
|
||||||
urllib3==1.26.14
|
# rich
|
||||||
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
@ -244,7 +261,7 @@ wrapt==1.14.1
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# deprecated
|
# deprecated
|
||||||
xlsxwriter==3.0.8
|
xlsxwriter==3.0.9
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
|
|||||||
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -64,7 +64,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -78,7 +78,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -95,7 +95,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -115,7 +115,7 @@ pillow==9.4.0
|
|||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pycparser==2.21
|
pycparser==2.21
|
||||||
# via cffi
|
# via cffi
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -129,6 +129,10 @@ pyjwt==2.6.0
|
|||||||
# via pygithub
|
# via pygithub
|
||||||
pynacl==1.5.0
|
pynacl==1.5.0
|
||||||
# via pygithub
|
# via pygithub
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -149,7 +153,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -186,7 +190,7 @@ typing-extensions==4.5.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
|
|||||||
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -61,7 +61,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -75,7 +75,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -92,7 +92,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -110,7 +110,7 @@ pillow==9.4.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -118,6 +118,10 @@ pygments==2.14.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# rich
|
# rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -140,7 +144,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -180,7 +184,7 @@ typing-extensions==4.5.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
|
|||||||
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -47,7 +47,7 @@ et-xmlfile==1.1.0
|
|||||||
# openpyxl
|
# openpyxl
|
||||||
google-api-core==2.11.0
|
google-api-core==2.11.0
|
||||||
# via google-api-python-client
|
# via google-api-python-client
|
||||||
google-api-python-client==2.80.0
|
google-api-python-client==2.82.0
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
google-auth==2.16.2
|
google-auth==2.16.2
|
||||||
# via
|
# via
|
||||||
@ -56,7 +56,7 @@ google-auth==2.16.2
|
|||||||
# google-auth-httplib2
|
# google-auth-httplib2
|
||||||
google-auth-httplib2==0.1.0
|
google-auth-httplib2==0.1.0
|
||||||
# via google-api-python-client
|
# via google-api-python-client
|
||||||
googleapis-common-protos==1.58.0
|
googleapis-common-protos==1.59.0
|
||||||
# via google-api-core
|
# via google-api-core
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
# via
|
# via
|
||||||
@ -66,7 +66,7 @@ httpcore==0.16.3
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpx
|
# httpx
|
||||||
httplib2==0.21.0
|
httplib2==0.22.0
|
||||||
# via
|
# via
|
||||||
# google-api-python-client
|
# google-api-python-client
|
||||||
# google-auth-httplib2
|
# google-auth-httplib2
|
||||||
@ -80,7 +80,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -94,7 +94,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -111,7 +111,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -139,7 +139,7 @@ pyasn1==0.4.8
|
|||||||
# rsa
|
# rsa
|
||||||
pyasn1-modules==0.2.8
|
pyasn1-modules==0.2.8
|
||||||
# via google-auth
|
# via google-auth
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -147,6 +147,10 @@ pygments==2.14.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# rich
|
# rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
pyparsing==3.0.9
|
pyparsing==3.0.9
|
||||||
# via httplib2
|
# via httplib2
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
@ -169,7 +173,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -212,7 +216,7 @@ typing-extensions==4.5.0
|
|||||||
# rich
|
# rich
|
||||||
uritemplate==4.1.1
|
uritemplate==4.1.1
|
||||||
# via google-api-python-client
|
# via google-api-python-client
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
|
|||||||
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -61,7 +61,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -75,7 +75,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -92,7 +92,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -114,7 +114,7 @@ praw==7.7.0
|
|||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
prawcore==2.3.0
|
prawcore==2.3.0
|
||||||
# via praw
|
# via praw
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -122,6 +122,10 @@ pygments==2.14.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# rich
|
# rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -142,7 +146,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -182,7 +186,7 @@ typing-extensions==4.5.0
|
|||||||
# rich
|
# rich
|
||||||
update-checker==0.18.0
|
update-checker==0.18.0
|
||||||
# via praw
|
# via praw
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
|
|||||||
@ -18,7 +18,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -87,7 +87,7 @@ idna==3.4
|
|||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
# yarl
|
# yarl
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -103,7 +103,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -124,7 +124,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -142,7 +142,7 @@ pillow==9.4.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -150,6 +150,10 @@ pygments==2.14.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# rich
|
# rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -171,7 +175,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -210,7 +214,7 @@ typing-extensions==4.5.0
|
|||||||
# aioitertools
|
# aioitertools
|
||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# botocore
|
# botocore
|
||||||
|
|||||||
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -16,7 +16,7 @@ backoff==2.2.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.12.0
|
||||||
# via wikipedia
|
# via wikipedia
|
||||||
certifi==2022.12.7
|
certifi==2022.12.7
|
||||||
# via
|
# via
|
||||||
@ -63,7 +63,7 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# markdown
|
# markdown
|
||||||
@ -77,7 +77,7 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -94,7 +94,7 @@ numpy==1.23.5
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# pandas
|
# pandas
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -112,7 +112,7 @@ pillow==9.4.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -120,6 +120,10 @@ pygments==2.14.0
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# rich
|
# rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -140,7 +144,7 @@ pytz==2022.7.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pandas
|
# pandas
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
@ -179,7 +183,7 @@ typing-extensions==4.5.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pydantic
|
# pydantic
|
||||||
# rich
|
# rich
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# requests
|
# requests
|
||||||
|
|||||||
@ -10,7 +10,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# httpcore
|
# httpcore
|
||||||
# starlette
|
# starlette
|
||||||
argilla==1.4.0
|
argilla==1.5.0
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
# via argilla
|
# via argilla
|
||||||
@ -46,15 +46,16 @@ effdet==0.3.0
|
|||||||
# via layoutparser
|
# via layoutparser
|
||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
fastapi==0.94.0
|
fastapi==0.95.0
|
||||||
# via unstructured-inference
|
# via unstructured-inference
|
||||||
filelock==3.9.0
|
filelock==3.10.3
|
||||||
# via
|
# via
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
|
# torch
|
||||||
# transformers
|
# transformers
|
||||||
flatbuffers==23.3.3
|
flatbuffers==23.3.3
|
||||||
# via onnxruntime
|
# via onnxruntime
|
||||||
fonttools==4.39.0
|
fonttools==4.39.2
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
# via
|
# via
|
||||||
@ -64,7 +65,7 @@ httpcore==0.16.3
|
|||||||
# via httpx
|
# via httpx
|
||||||
httpx==0.23.3
|
httpx==0.23.3
|
||||||
# via argilla
|
# via argilla
|
||||||
huggingface-hub==0.13.1
|
huggingface-hub==0.13.3
|
||||||
# via
|
# via
|
||||||
# timm
|
# timm
|
||||||
# transformers
|
# transformers
|
||||||
@ -76,12 +77,14 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
importlib-metadata==6.0.0
|
importlib-metadata==6.1.0
|
||||||
# via markdown
|
# via markdown
|
||||||
importlib-resources==5.12.0
|
importlib-resources==5.12.0
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
iopath==0.1.10
|
iopath==0.1.10
|
||||||
# via layoutparser
|
# via layoutparser
|
||||||
|
jinja2==3.1.2
|
||||||
|
# via torch
|
||||||
joblib==1.2.0
|
joblib==1.2.0
|
||||||
# via nltk
|
# via nltk
|
||||||
kiwisolver==1.4.4
|
kiwisolver==1.4.4
|
||||||
@ -93,14 +96,18 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
markdown==3.4.1
|
markdown==3.4.3
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
|
markupsafe==2.1.2
|
||||||
|
# via jinja2
|
||||||
matplotlib==3.7.1
|
matplotlib==3.7.1
|
||||||
# via pycocotools
|
# via pycocotools
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
# via argilla
|
# via argilla
|
||||||
mpmath==1.3.0
|
mpmath==1.3.0
|
||||||
# via sympy
|
# via sympy
|
||||||
|
networkx==3.0
|
||||||
|
# via torch
|
||||||
nltk==3.8.1
|
nltk==3.8.1
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
numpy==1.23.5
|
numpy==1.23.5
|
||||||
@ -124,7 +131,7 @@ opencv-python==4.6.0.66
|
|||||||
# via
|
# via
|
||||||
# layoutparser
|
# layoutparser
|
||||||
# unstructured-inference
|
# unstructured-inference
|
||||||
openpyxl==3.1.1
|
openpyxl==3.1.2
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
packaging==23.0
|
packaging==23.0
|
||||||
# via
|
# via
|
||||||
@ -163,12 +170,14 @@ pycocotools==2.0.6
|
|||||||
# via effdet
|
# via effdet
|
||||||
pycparser==2.21
|
pycparser==2.21
|
||||||
# via cffi
|
# via cffi
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via
|
# via
|
||||||
# argilla
|
# argilla
|
||||||
# fastapi
|
# fastapi
|
||||||
pygments==2.14.0
|
pygments==2.14.0
|
||||||
# via rich
|
# via rich
|
||||||
|
pypandoc==1.11
|
||||||
|
# via unstructured (setup.py)
|
||||||
pyparsing==3.0.9
|
pyparsing==3.0.9
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
pytesseract==0.3.10
|
pytesseract==0.3.10
|
||||||
@ -194,7 +203,7 @@ pyyaml==6.0
|
|||||||
# omegaconf
|
# omegaconf
|
||||||
# timm
|
# timm
|
||||||
# transformers
|
# transformers
|
||||||
regex==2022.10.31
|
regex==2023.3.23
|
||||||
# via
|
# via
|
||||||
# nltk
|
# nltk
|
||||||
# transformers
|
# transformers
|
||||||
@ -217,21 +226,23 @@ sniffio==1.3.0
|
|||||||
# anyio
|
# anyio
|
||||||
# httpcore
|
# httpcore
|
||||||
# httpx
|
# httpx
|
||||||
starlette==0.26.0.post1
|
starlette==0.26.1
|
||||||
# via fastapi
|
# via fastapi
|
||||||
sympy==1.11.1
|
sympy==1.11.1
|
||||||
# via onnxruntime
|
# via
|
||||||
|
# onnxruntime
|
||||||
|
# torch
|
||||||
timm==0.6.12
|
timm==0.6.12
|
||||||
# via effdet
|
# via effdet
|
||||||
tokenizers==0.13.2
|
tokenizers==0.13.2
|
||||||
# via transformers
|
# via transformers
|
||||||
torch==1.13.1
|
torch==2.0.0
|
||||||
# via
|
# via
|
||||||
# effdet
|
# effdet
|
||||||
# layoutparser
|
# layoutparser
|
||||||
# timm
|
# timm
|
||||||
# torchvision
|
# torchvision
|
||||||
torchvision==0.14.1
|
torchvision==0.15.1
|
||||||
# via
|
# via
|
||||||
# effdet
|
# effdet
|
||||||
# layoutparser
|
# layoutparser
|
||||||
@ -243,7 +254,7 @@ tqdm==4.65.0
|
|||||||
# iopath
|
# iopath
|
||||||
# nltk
|
# nltk
|
||||||
# transformers
|
# transformers
|
||||||
transformers==4.26.1
|
transformers==4.27.3
|
||||||
# via unstructured-inference
|
# via unstructured-inference
|
||||||
typing-extensions==4.5.0
|
typing-extensions==4.5.0
|
||||||
# via
|
# via
|
||||||
@ -253,12 +264,11 @@ typing-extensions==4.5.0
|
|||||||
# rich
|
# rich
|
||||||
# starlette
|
# starlette
|
||||||
# torch
|
# torch
|
||||||
# torchvision
|
|
||||||
unstructured-inference==0.2.11
|
unstructured-inference==0.2.11
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
urllib3==1.26.14
|
urllib3==1.26.15
|
||||||
# via requests
|
# via requests
|
||||||
uvicorn==0.21.0
|
uvicorn==0.21.1
|
||||||
# via unstructured-inference
|
# via unstructured-inference
|
||||||
wand==0.6.11
|
wand==0.6.11
|
||||||
# via pdfplumber
|
# via pdfplumber
|
||||||
|
|||||||
@ -8,6 +8,7 @@ flake8
|
|||||||
mypy
|
mypy
|
||||||
types-Markdown
|
types-Markdown
|
||||||
pytest-cov
|
pytest-cov
|
||||||
|
pytest-mock
|
||||||
label_studio_sdk
|
label_studio_sdk
|
||||||
types-requests
|
types-requests
|
||||||
vcrpy
|
vcrpy
|
||||||
|
|||||||
@ -4,9 +4,6 @@
|
|||||||
#
|
#
|
||||||
# pip-compile requirements/test.in
|
# pip-compile requirements/test.in
|
||||||
#
|
#
|
||||||
--extra-index-url https://pypi.ngc.nvidia.com
|
|
||||||
--trusted-host pypi.ngc.nvidia.com
|
|
||||||
|
|
||||||
appdirs==1.4.4
|
appdirs==1.4.4
|
||||||
# via label-studio-tools
|
# via label-studio-tools
|
||||||
attrs==22.2.0
|
attrs==22.2.0
|
||||||
@ -23,7 +20,7 @@ click==8.1.3
|
|||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# black
|
# black
|
||||||
coverage[toml]==7.2.1
|
coverage[toml]==7.2.2
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# pytest-cov
|
# pytest-cov
|
||||||
@ -67,19 +64,23 @@ pluggy==1.0.0
|
|||||||
# via pytest
|
# via pytest
|
||||||
pycodestyle==2.10.0
|
pycodestyle==2.10.0
|
||||||
# via flake8
|
# via flake8
|
||||||
pydantic==1.10.6
|
pydantic==1.10.7
|
||||||
# via label-studio-sdk
|
# via label-studio-sdk
|
||||||
pyflakes==3.0.1
|
pyflakes==3.0.1
|
||||||
# via flake8
|
# via flake8
|
||||||
pytest==7.2.2
|
pytest==7.2.2
|
||||||
# via pytest-cov
|
# via
|
||||||
|
# pytest-cov
|
||||||
|
# pytest-mock
|
||||||
pytest-cov==4.0.0
|
pytest-cov==4.0.0
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
|
pytest-mock==3.10.0
|
||||||
|
# via -r requirements/test.in
|
||||||
pyyaml==6.0
|
pyyaml==6.0
|
||||||
# via vcrpy
|
# via vcrpy
|
||||||
requests==2.28.2
|
requests==2.28.2
|
||||||
# via label-studio-sdk
|
# via label-studio-sdk
|
||||||
ruff==0.0.256
|
ruff==0.0.259
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
# via vcrpy
|
# via vcrpy
|
||||||
@ -91,7 +92,7 @@ tomli==2.0.1
|
|||||||
# pytest
|
# pytest
|
||||||
types-markdown==3.4.2.5
|
types-markdown==3.4.2.5
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
types-requests==2.28.11.15
|
types-requests==2.28.11.16
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
types-urllib3==1.26.25.8
|
types-urllib3==1.26.25.8
|
||||||
# via types-requests
|
# via types-requests
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
from requests.models import Response
|
||||||
|
|
||||||
from unstructured.documents.elements import PageBreak
|
from unstructured.documents.elements import PageBreak
|
||||||
from unstructured.partition.html import partition_html
|
from unstructured.partition.html import partition_html
|
||||||
@ -86,6 +87,25 @@ def test_partition_html_from_url_raises_with_bad_content_type():
|
|||||||
partition_html(url="https://fake.url")
|
partition_html(url="https://fake.url")
|
||||||
|
|
||||||
|
|
||||||
|
def test_partition_from_url_uses_headers(mocker):
|
||||||
|
test_url = "https://example.com"
|
||||||
|
test_headers = {"User-Agent": "test"}
|
||||||
|
|
||||||
|
response = Response()
|
||||||
|
response.status_code = 200
|
||||||
|
response._content = (
|
||||||
|
b"<html><head></head><body><p>What do i know? Who needs to know it?</p></body></html>"
|
||||||
|
)
|
||||||
|
response.headers = {"Content-Type": "text/html"}
|
||||||
|
|
||||||
|
mock_get = mocker.patch("requests.get", return_value=response)
|
||||||
|
|
||||||
|
partition_html(url=test_url, headers=test_headers)
|
||||||
|
|
||||||
|
# Check if requests.get was called with the correct arguments
|
||||||
|
mock_get.assert_called_once_with(test_url, headers=test_headers)
|
||||||
|
|
||||||
|
|
||||||
def test_partition_html_raises_with_none_specified():
|
def test_partition_html_raises_with_none_specified():
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
partition_html()
|
partition_html()
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.5.7-dev2" # pragma: no cover
|
__version__ = "0.5.7-dev3" # pragma: no cover
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from typing import IO, List, Optional
|
from typing import IO, Dict, List, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@ -20,6 +20,7 @@ def partition_html(
|
|||||||
encoding: Optional[str] = None,
|
encoding: Optional[str] = None,
|
||||||
include_page_breaks: bool = False,
|
include_page_breaks: bool = False,
|
||||||
include_metadata: bool = True,
|
include_metadata: bool = True,
|
||||||
|
headers: Dict[str, str] = {},
|
||||||
parser: VALID_PARSERS = None,
|
parser: VALID_PARSERS = None,
|
||||||
) -> List[Element]:
|
) -> List[Element]:
|
||||||
"""Partitions an HTML document into its constituent elements.
|
"""Partitions an HTML document into its constituent elements.
|
||||||
@ -67,7 +68,7 @@ def partition_html(
|
|||||||
document = HTMLDocument.from_string(_text, parser=parser)
|
document = HTMLDocument.from_string(_text, parser=parser)
|
||||||
|
|
||||||
elif url is not None:
|
elif url is not None:
|
||||||
response = requests.get(url)
|
response = requests.get(url, headers=headers)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
raise ValueError(f"URL return an error: {response.status_code}")
|
raise ValueError(f"URL return an error: {response.status_code}")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user