Update partition_via_api to not post a strategy value if not user specified (#967)

* remove default strategy

* working on test

* fixed test, coordinates param needed to be included

* nits

* update changelog

* lint

* update requirements
This commit is contained in:
shreyanid 2023-07-26 09:56:39 -07:00 committed by GitHub
parent 08fc41cde2
commit 71a24b2887
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 169 additions and 128 deletions

View File

@ -37,6 +37,7 @@
has a `text/plain` MIME type. has a `text/plain` MIME type.
* Enables filters to be passed to `partition_doc` so it doesn't error with LibreOffice7. * Enables filters to be passed to `partition_doc` so it doesn't error with LibreOffice7.
* Removed old error message that's superseded by `requires_dependencies`. * Removed old error message that's superseded by `requires_dependencies`.
* Removes using `hi_res` as the default strategy value for `partition_via_api` and `partition_multiple_via_api`
## 0.8.1 ## 0.8.1

View File

@ -10,7 +10,7 @@ babel==2.12.1
# via sphinx # via sphinx
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
# via furo # via furo
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -r requirements/build.in # -r requirements/build.in
# requests # requests
@ -71,7 +71,7 @@ sphinxcontrib-qthelp==1.0.3
# via sphinx # via sphinx
sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-serializinghtml==1.1.5
# via sphinx # via sphinx
urllib3==2.0.3 urllib3==2.0.4
# via requests # via requests
zipp==3.16.1 zipp==3.16.2
# via importlib-metadata # via importlib-metadata

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/base.in # pip-compile requirements/base.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/constraints.in # -c requirements/constraints.in
# requests # requests
@ -16,7 +16,7 @@ charset-normalizer==3.2.0
# via # via
# pdfminer-six # pdfminer-six
# requests # requests
click==8.1.5 click==8.1.6
# via nltk # via nltk
cryptography==41.0.2 cryptography==41.0.2
# via pdfminer-six # via pdfminer-six
@ -35,7 +35,7 @@ lxml==4.9.3
# -r requirements/base.in # -r requirements/base.in
# python-docx # python-docx
# python-pptx # python-pptx
markdown==3.4.3 markdown==3.4.4
# via -r requirements/base.in # via -r requirements/base.in
msg-parser==1.2.0 msg-parser==1.2.0
# via -r requirements/base.in # via -r requirements/base.in
@ -92,5 +92,5 @@ xlrd==2.0.1
# via -r requirements/base.in # via -r requirements/base.in
xlsxwriter==3.1.2 xlsxwriter==3.1.2
# via python-pptx # via python-pptx
zipp==3.16.1 zipp==3.16.2
# via importlib-metadata # via importlib-metadata

View File

@ -10,7 +10,7 @@ babel==2.12.1
# via sphinx # via sphinx
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
# via furo # via furo
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -r requirements/build.in # -r requirements/build.in
# requests # requests
@ -71,7 +71,7 @@ sphinxcontrib-qthelp==1.0.3
# via sphinx # via sphinx
sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-serializinghtml==1.1.5
# via sphinx # via sphinx
urllib3==2.0.3 urllib3==2.0.4
# via requests # via requests
zipp==3.16.1 zipp==3.16.2
# via importlib-metadata # via importlib-metadata

View File

@ -11,20 +11,21 @@ appnope==0.1.3
# ipykernel # ipykernel
# ipython # ipython
argon2-cffi==21.3.0 argon2-cffi==21.3.0
# via # via jupyter-server
# jupyter-server
# nbclassic
# notebook
argon2-cffi-bindings==21.2.0 argon2-cffi-bindings==21.2.0
# via argon2-cffi # via argon2-cffi
arrow==1.2.3 arrow==1.2.3
# via isoduration # via isoduration
asttokens==2.2.1 asttokens==2.2.1
# via stack-data # via stack-data
async-lru==2.0.3
# via jupyterlab
attrs==23.1.0 attrs==23.1.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
babel==2.12.1
# via jupyterlab-server
backcall==0.2.0 backcall==0.2.0
# via ipython # via ipython
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
@ -33,13 +34,24 @@ bleach==6.0.0
# via nbconvert # via nbconvert
build==0.10.0 build==0.10.0
# via pip-tools # via pip-tools
certifi==2023.7.22
# via
# -c requirements/base.txt
# -c requirements/constraints.in
# -c requirements/test.txt
# requests
cffi==1.15.1 cffi==1.15.1
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# argon2-cffi-bindings # argon2-cffi-bindings
cfgv==3.3.1 cfgv==3.3.1
# via pre-commit # via pre-commit
click==8.1.5 charset-normalizer==3.2.0
# via
# -c requirements/base.txt
# -c requirements/test.txt
# requests
click==8.1.6
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/test.txt # -c requirements/test.txt
@ -52,7 +64,7 @@ decorator==5.1.1
# via ipython # via ipython
defusedxml==0.7.1 defusedxml==0.7.1
# via nbconvert # via nbconvert
distlib==0.3.6 distlib==0.3.7
# via virtualenv # via virtualenv
exceptiongroup==1.1.2 exceptiongroup==1.1.2
# via # via
@ -60,13 +72,13 @@ exceptiongroup==1.1.2
# anyio # anyio
executing==1.2.0 executing==1.2.0
# via stack-data # via stack-data
fastjsonschema==2.17.1 fastjsonschema==2.18.0
# via nbformat # via nbformat
filelock==3.12.2 filelock==3.12.2
# via virtualenv # via virtualenv
fqdn==1.5.1 fqdn==1.5.1
# via jsonschema # via jsonschema
identify==2.5.24 identify==2.5.26
# via pre-commit # via pre-commit
idna==3.4 idna==3.4
# via # via
@ -74,22 +86,27 @@ idna==3.4
# -c requirements/test.txt # -c requirements/test.txt
# anyio # anyio
# jsonschema # jsonschema
# requests
importlib-metadata==6.8.0 importlib-metadata==6.8.0
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# jupyter-client # jupyter-client
# jupyter-lsp
# jupyterlab
# jupyterlab-server
# nbconvert # nbconvert
importlib-resources==6.0.0 importlib-resources==6.0.0
# via # via
# jsonschema # jsonschema
# jsonschema-specifications # jsonschema-specifications
ipykernel==6.24.0 # jupyterlab
# notebook
ipykernel==6.25.0
# via # via
# ipywidgets # ipywidgets
# jupyter # jupyter
# jupyter-console # jupyter-console
# nbclassic # jupyterlab
# notebook
# qtconsole # qtconsole
ipython==8.12.2 ipython==8.12.2
# via # via
@ -98,10 +115,7 @@ ipython==8.12.2
# ipywidgets # ipywidgets
# jupyter-console # jupyter-console
ipython-genutils==0.2.0 ipython-genutils==0.2.0
# via # via qtconsole
# nbclassic
# notebook
# qtconsole
ipywidgets==8.0.7 ipywidgets==8.0.7
# via jupyter # via jupyter
isoduration==20.11.0 isoduration==20.11.0
@ -111,16 +125,19 @@ jedi==0.18.2
jinja2==3.1.2 jinja2==3.1.2
# via # via
# jupyter-server # jupyter-server
# nbclassic # jupyterlab
# jupyterlab-server
# nbconvert # nbconvert
# notebook json5==0.9.14
# via jupyterlab-server
jsonpointer==2.4 jsonpointer==2.4
# via jsonschema # via jsonschema
jsonschema[format-nongpl]==4.18.3 jsonschema[format-nongpl]==4.18.4
# via # via
# jupyter-events # jupyter-events
# jupyterlab-server
# nbformat # nbformat
jsonschema-specifications==2023.6.1 jsonschema-specifications==2023.7.1
# via jsonschema # via jsonschema
jupyter==1.0.0 jupyter==1.0.0
# via -r requirements/dev.in # via -r requirements/dev.in
@ -129,9 +146,7 @@ jupyter-client==8.3.0
# ipykernel # ipykernel
# jupyter-console # jupyter-console
# jupyter-server # jupyter-server
# nbclassic
# nbclient # nbclient
# notebook
# qtconsole # qtconsole
jupyter-console==6.6.3 jupyter-console==6.6.3
# via jupyter # via jupyter
@ -142,22 +157,32 @@ jupyter-core==5.3.1
# jupyter-client # jupyter-client
# jupyter-console # jupyter-console
# jupyter-server # jupyter-server
# nbclassic # jupyterlab
# nbclient # nbclient
# nbconvert # nbconvert
# nbformat # nbformat
# notebook
# qtconsole # qtconsole
jupyter-events==0.6.3 jupyter-events==0.6.3
# via jupyter-server # via jupyter-server
jupyter-lsp==2.2.0
# via jupyterlab
jupyter-server==2.7.0 jupyter-server==2.7.0
# via # via
# nbclassic # jupyter-lsp
# jupyterlab
# jupyterlab-server
# notebook
# notebook-shim # notebook-shim
jupyter-server-terminals==0.4.4 jupyter-server-terminals==0.4.4
# via jupyter-server # via jupyter-server
jupyterlab==4.0.3
# via notebook
jupyterlab-pygments==0.2.2 jupyterlab-pygments==0.2.2
# via nbconvert # via nbconvert
jupyterlab-server==2.24.0
# via
# jupyterlab
# notebook
jupyterlab-widgets==3.0.8 jupyterlab-widgets==3.0.8
# via ipywidgets # via ipywidgets
markupsafe==2.1.3 markupsafe==2.1.3
@ -170,34 +195,27 @@ matplotlib-inline==0.1.6
# ipython # ipython
mistune==3.0.1 mistune==3.0.1
# via nbconvert # via nbconvert
nbclassic==1.0.0
# via notebook
nbclient==0.8.0 nbclient==0.8.0
# via nbconvert # via nbconvert
nbconvert==7.6.0 nbconvert==7.7.3
# via # via
# jupyter # jupyter
# jupyter-server # jupyter-server
# nbclassic
# notebook
nbformat==5.9.1 nbformat==5.9.1
# via # via
# jupyter-server # jupyter-server
# nbclassic
# nbclient # nbclient
# nbconvert # nbconvert
# notebook
nest-asyncio==1.5.6 nest-asyncio==1.5.6
# via # via ipykernel
# ipykernel
# nbclassic
# notebook
nodeenv==1.8.0 nodeenv==1.8.0
# via pre-commit # via pre-commit
notebook==6.5.4 notebook==7.0.0
# via jupyter # via jupyter
notebook-shim==0.2.3 notebook-shim==0.2.3
# via nbclassic # via
# jupyterlab
# notebook
overrides==7.3.1 overrides==7.3.1
# via jupyter-server # via jupyter-server
packaging==23.1 packaging==23.1
@ -206,6 +224,8 @@ packaging==23.1
# build # build
# ipykernel # ipykernel
# jupyter-server # jupyter-server
# jupyterlab
# jupyterlab-server
# nbconvert # nbconvert
# qtconsole # qtconsole
# qtpy # qtpy
@ -217,11 +237,11 @@ pexpect==4.8.0
# via ipython # via ipython
pickleshare==0.7.5 pickleshare==0.7.5
# via ipython # via ipython
pip-tools==6.14.0 pip-tools==7.1.0
# via -r requirements/dev.in # via -r requirements/dev.in
pkgutil-resolve-name==1.3.10 pkgutil-resolve-name==1.3.10
# via jsonschema # via jsonschema
platformdirs==3.8.1 platformdirs==3.9.1
# via # via
# -c requirements/test.txt # -c requirements/test.txt
# jupyter-core # jupyter-core
@ -229,10 +249,7 @@ platformdirs==3.8.1
pre-commit==3.3.3 pre-commit==3.3.3
# via -r requirements/dev.in # via -r requirements/dev.in
prometheus-client==0.17.1 prometheus-client==0.17.1
# via # via jupyter-server
# jupyter-server
# nbclassic
# notebook
prompt-toolkit==3.0.39 prompt-toolkit==3.0.39
# via # via
# ipython # ipython
@ -265,7 +282,11 @@ python-dateutil==2.8.2
# jupyter-client # jupyter-client
python-json-logger==2.0.7 python-json-logger==2.0.7
# via jupyter-events # via jupyter-events
pyyaml==6.0 pytz==2023.3
# via
# -c requirements/base.txt
# babel
pyyaml==6.0.1
# via # via
# -c requirements/test.txt # -c requirements/test.txt
# jupyter-events # jupyter-events
@ -276,17 +297,20 @@ pyzmq==25.1.0
# jupyter-client # jupyter-client
# jupyter-console # jupyter-console
# jupyter-server # jupyter-server
# nbclassic
# notebook
# qtconsole # qtconsole
qtconsole==5.4.3 qtconsole==5.4.3
# via jupyter # via jupyter
qtpy==2.3.1 qtpy==2.3.1
# via qtconsole # via qtconsole
referencing==0.29.1 referencing==0.30.0
# via # via
# jsonschema # jsonschema
# jsonschema-specifications # jsonschema-specifications
requests==2.31.0
# via
# -c requirements/base.txt
# -c requirements/test.txt
# jupyterlab-server
rfc3339-validator==0.1.4 rfc3339-validator==0.1.4
# via # via
# jsonschema # jsonschema
@ -295,15 +319,12 @@ rfc3986-validator==0.1.1
# via # via
# jsonschema # jsonschema
# jupyter-events # jupyter-events
rpds-py==0.8.10 rpds-py==0.9.2
# via # via
# jsonschema # jsonschema
# referencing # referencing
send2trash==1.8.2 send2trash==1.8.2
# via # via jupyter-server
# jupyter-server
# nbclassic
# notebook
six==1.16.0 six==1.16.0
# via # via
# -c requirements/base.txt # -c requirements/base.txt
@ -322,14 +343,13 @@ terminado==0.17.1
# via # via
# jupyter-server # jupyter-server
# jupyter-server-terminals # jupyter-server-terminals
# nbclassic
# notebook
tinycss2==1.2.1 tinycss2==1.2.1
# via nbconvert # via nbconvert
tomli==2.0.1 tomli==2.0.1
# via # via
# -c requirements/test.txt # -c requirements/test.txt
# build # build
# jupyterlab
# pip-tools # pip-tools
# pyproject-hooks # pyproject-hooks
tornado==6.3.2 tornado==6.3.2
@ -337,7 +357,7 @@ tornado==6.3.2
# ipykernel # ipykernel
# jupyter-client # jupyter-client
# jupyter-server # jupyter-server
# nbclassic # jupyterlab
# notebook # notebook
# terminado # terminado
traitlets==5.9.0 traitlets==5.9.0
@ -351,20 +371,26 @@ traitlets==5.9.0
# jupyter-core # jupyter-core
# jupyter-events # jupyter-events
# jupyter-server # jupyter-server
# jupyterlab
# matplotlib-inline # matplotlib-inline
# nbclassic
# nbclient # nbclient
# nbconvert # nbconvert
# nbformat # nbformat
# notebook
# qtconsole # qtconsole
typing-extensions==4.7.1 typing-extensions==4.7.1
# via # via
# -c requirements/test.txt # -c requirements/test.txt
# async-lru
# ipython # ipython
uri-template==1.3.0 uri-template==1.3.0
# via jsonschema # via jsonschema
virtualenv==20.23.1 urllib3==1.26.16
# via
# -c requirements/base.txt
# -c requirements/constraints.in
# -c requirements/test.txt
# requests
virtualenv==20.24.2
# via pre-commit # via pre-commit
wcwidth==0.2.6 wcwidth==0.2.6
# via prompt-toolkit # via prompt-toolkit
@ -376,13 +402,13 @@ webencodings==0.5.1
# tinycss2 # tinycss2
websocket-client==1.6.1 websocket-client==1.6.1
# via jupyter-server # via jupyter-server
wheel==0.40.0 wheel==0.41.0
# via # via
# -c requirements/constraints.in # -c requirements/constraints.in
# pip-tools # pip-tools
widgetsnbextension==4.0.8 widgetsnbextension==4.0.8
# via ipywidgets # via ipywidgets
zipp==3.16.1 zipp==3.16.2
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# importlib-metadata # importlib-metadata

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/huggingface.in # pip-compile requirements/huggingface.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -13,7 +13,7 @@ charset-normalizer==3.2.0
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# requests # requests
click==8.1.5 click==8.1.6
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# sacremoses # sacremoses
@ -52,7 +52,7 @@ packaging==23.1
# via # via
# huggingface-hub # huggingface-hub
# transformers # transformers
pyyaml==6.0 pyyaml==6.0.1
# via # via
# huggingface-hub # huggingface-hub
# transformers # transformers
@ -89,7 +89,7 @@ tqdm==4.65.0
# huggingface-hub # huggingface-hub
# sacremoses # sacremoses
# transformers # transformers
transformers==4.30.2 transformers==4.31.0
# via -r requirements/huggingface.in # via -r requirements/huggingface.in
typing-extensions==4.7.1 typing-extensions==4.7.1
# via # via

View File

@ -6,7 +6,7 @@
# #
adlfs==2023.4.0 adlfs==2023.4.0
# via -r requirements/ingest-azure.in # via -r requirements/ingest-azure.in
aiohttp==3.8.4 aiohttp==3.8.5
# via adlfs # via adlfs
aiosignal==1.3.1 aiosignal==1.3.1
# via aiohttp # via aiohttp
@ -25,7 +25,7 @@ azure-identity==1.13.0
# via adlfs # via adlfs
azure-storage-blob==12.17.0 azure-storage-blob==12.17.0
# via adlfs # via adlfs
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -62,7 +62,7 @@ idna==3.4
# yarl # yarl
isodate==0.6.1 isodate==0.6.1
# via azure-storage-blob # via azure-storage-blob
msal==1.22.0 msal==1.23.0
# via # via
# azure-datalake-store # azure-datalake-store
# azure-identity # azure-identity
@ -79,7 +79,7 @@ pycparser==2.21
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# cffi # cffi
pyjwt[crypto]==2.7.0 pyjwt[crypto]==2.8.0
# via msal # via msal
requests==2.31.0 requests==2.31.0
# via # via

View File

@ -1,12 +1,12 @@
# #
# This file is autogenerated by pip-compile with Python 3.10 # This file is autogenerated by pip-compile with Python 3.8
# by the following command: # by the following command:
# #
# pip-compile requirements/ingest-confluence.in # pip-compile requirements/ingest-confluence.in
# #
atlassian-python-api==3.39.0 atlassian-python-api==3.39.0
# via -r requirements/ingest-confluence.in # via -r requirements/ingest-confluence.in
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-discord.in # pip-compile requirements/ingest-discord.in
# #
aiohttp==3.8.4 aiohttp==3.8.5
# via discord-py # via discord-py
aiosignal==1.3.1 aiosignal==1.3.1
# via aiohttp # via aiohttp

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-dropbox.in # pip-compile requirements/ingest-dropbox.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-elasticsearch.in # pip-compile requirements/ingest-elasticsearch.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-gcs.in # pip-compile requirements/ingest-gcs.in
# #
aiohttp==3.8.4 aiohttp==3.8.5
# via gcsfs # via gcsfs
aiosignal==1.3.1 aiosignal==1.3.1
# via aiohttp # via aiohttp
@ -14,7 +14,7 @@ attrs==23.1.0
# via aiohttp # via aiohttp
cachetools==5.3.1 cachetools==5.3.1
# via google-auth # via google-auth
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -74,6 +74,7 @@ protobuf==4.23.4
# via # via
# -c requirements/constraints.in # -c requirements/constraints.in
# google-api-core # google-api-core
# googleapis-common-protos
pyasn1==0.5.0 pyasn1==0.5.0
# via # via
# pyasn1-modules # pyasn1-modules

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-github.in # pip-compile requirements/ingest-github.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -34,7 +34,7 @@ pycparser==2.21
# cffi # cffi
pygithub==1.58.2 pygithub==1.58.2
# via -r requirements/ingest-github.in # via -r requirements/ingest-github.in
pyjwt[crypto]==2.7.0 pyjwt[crypto]==2.8.0
# via pygithub # via pygithub
pynacl==1.5.0 pynacl==1.5.0
# via pygithub # via pygithub

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-gitlab.in # pip-compile requirements/ingest-gitlab.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -6,7 +6,7 @@
# #
cachetools==5.3.1 cachetools==5.3.1
# via google-auth # via google-auth
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -17,7 +17,7 @@ charset-normalizer==3.2.0
# requests # requests
google-api-core==2.11.1 google-api-core==2.11.1
# via google-api-python-client # via google-api-python-client
google-api-python-client==2.93.0 google-api-python-client==2.95.0
# via -r requirements/ingest-google-drive.in # via -r requirements/ingest-google-drive.in
google-auth==2.22.0 google-auth==2.22.0
# via # via

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-onedrive.in # pip-compile requirements/ingest-onedrive.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -27,17 +27,17 @@ idna==3.4
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# requests # requests
msal==1.22.0 msal==1.23.0
# via # via
# -r requirements/ingest-onedrive.in # -r requirements/ingest-onedrive.in
# office365-rest-python-client # office365-rest-python-client
office365-rest-python-client==2.4.2 office365-rest-python-client==2.4.3
# via -r requirements/ingest-onedrive.in # via -r requirements/ingest-onedrive.in
pycparser==2.21 pycparser==2.21
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# cffi # cffi
pyjwt[crypto]==2.7.0 pyjwt[crypto]==2.8.0
# via msal # via msal
pytz==2023.3 pytz==2023.3
# via # via

View File

@ -4,7 +4,7 @@
# #
# pip-compile requirements/ingest-reddit.in # pip-compile requirements/ingest-reddit.in
# #
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -6,7 +6,7 @@
# #
aiobotocore==2.5.2 aiobotocore==2.5.2
# via s3fs # via s3fs
aiohttp==3.8.4 aiohttp==3.8.5
# via # via
# aiobotocore # aiobotocore
# s3fs # s3fs

View File

@ -6,7 +6,7 @@
# #
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
# via wikipedia # via wikipedia
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in

View File

@ -6,7 +6,7 @@
# #
antlr4-python3-runtime==4.9.3 antlr4-python3-runtime==4.9.3
# via omegaconf # via omegaconf
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -39,7 +39,7 @@ filelock==3.12.2
# transformers # transformers
flatbuffers==23.5.26 flatbuffers==23.5.26
# via onnxruntime # via onnxruntime
fonttools==4.41.0 fonttools==4.41.1
# via matplotlib # via matplotlib
fsspec==2023.6.0 fsspec==2023.6.0
# via huggingface-hub # via huggingface-hub
@ -112,7 +112,7 @@ pdfminer-six==20221105
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# pdfplumber # pdfplumber
pdfplumber==0.9.0 pdfplumber==0.10.1
# via layoutparser # via layoutparser
pillow==10.0.0 pillow==10.0.0
# via # via
@ -139,6 +139,8 @@ pyparsing==3.0.9
# via # via
# -c requirements/constraints.in # -c requirements/constraints.in
# matplotlib # matplotlib
pypdfium2==4.18.0
# via pdfplumber
pytesseract==0.3.10 pytesseract==0.3.10
# via layoutparser # via layoutparser
python-dateutil==2.8.2 python-dateutil==2.8.2
@ -152,7 +154,7 @@ pytz==2023.3
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# pandas # pandas
pyyaml==6.0 pyyaml==6.0.1
# via # via
# huggingface-hub # huggingface-hub
# layoutparser # layoutparser
@ -204,7 +206,7 @@ tqdm==4.65.0
# huggingface-hub # huggingface-hub
# iopath # iopath
# transformers # transformers
transformers==4.30.2 transformers==4.31.0
# via unstructured-inference # via unstructured-inference
typing-extensions==4.7.1 typing-extensions==4.7.1
# via # via
@ -222,9 +224,7 @@ urllib3==1.26.16
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
# requests # requests
wand==0.6.11 zipp==3.16.2
# via pdfplumber
zipp==3.16.1
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# importlib-resources # importlib-resources

View File

@ -8,7 +8,7 @@ appdirs==1.4.4
# via label-studio-tools # via label-studio-tools
black==23.7.0 black==23.7.0
# via -r requirements/test.in # via -r requirements/test.in
certifi==2023.5.7 certifi==2023.7.22
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -c requirements/constraints.in # -c requirements/constraints.in
@ -17,7 +17,7 @@ charset-normalizer==3.2.0
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# requests # requests
click==8.1.5 click==8.1.6
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# -r requirements/test.in # -r requirements/test.in
@ -32,7 +32,7 @@ flake8==6.0.0
# via -r requirements/test.in # via -r requirements/test.in
freezegun==1.2.2 freezegun==1.2.2
# via -r requirements/test.in # via -r requirements/test.in
grpcio==1.56.0 grpcio==1.56.2
# via -r requirements/test.in # via -r requirements/test.in
idna==3.4 idna==3.4
# via # via
@ -66,13 +66,13 @@ packaging==23.1
# pytest # pytest
pathspec==0.11.1 pathspec==0.11.1
# via black # via black
platformdirs==3.8.1 platformdirs==3.9.1
# via black # via black
pluggy==1.2.0 pluggy==1.2.0
# via pytest # via pytest
pycodestyle==2.10.0 pycodestyle==2.10.0
# via flake8 # via flake8
pydantic==1.10.11 pydantic==1.10.12
# via label-studio-sdk # via label-studio-sdk
pyflakes==3.0.1 pyflakes==3.0.1
# via flake8 # via flake8
@ -88,13 +88,13 @@ python-dateutil==2.8.2
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# freezegun # freezegun
pyyaml==6.0 pyyaml==6.0.1
# via vcrpy # via vcrpy
requests==2.31.0 requests==2.31.0
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# label-studio-sdk # label-studio-sdk
ruff==0.0.278 ruff==0.0.280
# via -r requirements/test.in # via -r requirements/test.in
six==1.16.0 six==1.16.0
# via # via
@ -107,13 +107,13 @@ tomli==2.0.1
# coverage # coverage
# mypy # mypy
# pytest # pytest
types-markdown==3.4.2.9 types-markdown==3.4.2.10
# via -r requirements/test.in # via -r requirements/test.in
types-requests==2.31.0.1 types-requests==2.31.0.2
# via -r requirements/test.in # via -r requirements/test.in
types-tabulate==0.9.0.2 types-tabulate==0.9.0.3
# via -r requirements/test.in # via -r requirements/test.in
types-urllib3==1.26.25.13 types-urllib3==1.26.25.14
# via types-requests # via types-requests
typing-extensions==4.7.1 typing-extensions==4.7.1
# via # via

View File

@ -97,6 +97,31 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch):
partition_via_api(filename=filename) partition_via_api(filename=filename)
def test_partition_via_api_with_no_strategy():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg")
elements_no_strategy = partition_via_api(filename=filename, api_key=get_api_key())
elements_hi_res = partition_via_api(filename=filename, strategy="hi_res", api_key=get_api_key())
# confirm that hi_res strategy was not passed as defaukt to partition by comparing outputs
assert elements_no_strategy[0].text.startswith("arXiv")
assert elements_hi_res[0].text.startswith("LayoutParser")
def test_partition_via_api_with_image_hi_res_strategy_includes_coordinates():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg")
# coordinates not included by default to limit payload size
elements = partition_via_api(
filename=filename,
strategy="hi_res",
coordinates="true",
api_key=get_api_key(),
)
assert elements[0].metadata.coordinates is not None
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI") @pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch") @pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_via_api_valid_request_data_kwargs(): def test_partition_via_api_valid_request_data_kwargs():

View File

@ -53,10 +53,6 @@ def partition_via_api(
"UNSTRUCTURED-API-KEY": api_key, "UNSTRUCTURED-API-KEY": api_key,
} }
# set default values for kwargs
strategy = request_kwargs.pop("strategy", "hi_res")
request_kwargs["strategy"] = strategy
if filename is not None: if filename is not None:
with open(filename, "rb") as f: with open(filename, "rb") as f:
files = [ files = [
@ -118,10 +114,6 @@ def partition_multiple_via_api(
A list of file-like object using "rb" mode --> open(filename, "rb"). A list of file-like object using "rb" mode --> open(filename, "rb").
file_filename file_filename
When file is not None, the filename (string) to store in element metadata. E.g. "foo.txt" When file is not None, the filename (string) to store in element metadata. E.g. "foo.txt"
strategy
The strategy to use for partitioning the PDF. Uses a layout detection model if set
to 'hi_res', otherwise partition_pdf simply extracts the text from the document
and processes it.
api_url api_url
The URL for the Unstructured API. Defaults to the hosted Unstructured API. The URL for the Unstructured API. Defaults to the hosted Unstructured API.
api_key api_key
@ -135,10 +127,6 @@ def partition_multiple_via_api(
"UNSTRUCTURED-API-KEY": api_key, "UNSTRUCTURED-API-KEY": api_key,
} }
# set default values for kwargs
strategy = request_kwargs.pop("strategy", "hi_res")
request_kwargs["strategy"] = strategy
if filenames is not None: if filenames is not None:
if content_types and len(content_types) != len(filenames): if content_types and len(content_types) != len(filenames):
raise ValueError("content_types and filenames must have the same length.") raise ValueError("content_types and filenames must have the same length.")