mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-03 03:23:25 +00:00
feat: modify test-ingest-src and evaluation-metrics to allow EXPORT_DIR (#2551)
The current `test-ingest-src.sh` and `evaluation-metrics` do not allow passing the `EXPORT_DIR` (`OUTPUT_ROOT` in `evaluation-metrics`). It is currently saving at the current working directory (`unstructured/test_unstructured_ingest`). When running the eval from `core-product`, all outputs is now saved at `core-product/upstream-unstructured/test_unstructured_ingest` which is undesirable. This PR modifies two scripts to accommodate such behavior: 1. `test-ingest-src.sh` - assign `EVAL_OUTPUT_ROOT` to the value set within the environment if exist, or the current working directory if not. Then calls to run `evaluation-metrics.sh`. 2. `evaluation-metrics.sh` - accepting param from `test-ingest-src.sh` if exist, or to the value set within the environment if exist, or the current directory if not. (Note: I also add param to `evaluation-metrics.sh` because it makes sense to allow a separate run to be able to specify an export directory) This PR should work in sync with another PR under `core-product`, which I will add the link here later. **To test:** Run the script below, change `$SCRIPT_DIR` as needed to see the result. ``` export OVERWRITE_FIXTURES=true ./upstream-unstructured/test_unstructured_ingest/src/s3.sh SCRIPT_DIR=$(dirname "$(realpath "$0")") bash -x ./upstream-unstructured/test_unstructured_ingest/evaluation-metrics.sh text-extraction "$SCRIPT_DIR" ``` ---- This PR also updates the requirements by `make pip-compile` since the `click` module was not found.
This commit is contained in:
parent
ad561b7939
commit
d06936d35a
@ -52,7 +52,7 @@ mypy-extensions==1.0.0
|
||||
# unstructured-client
|
||||
nltk==3.8.1
|
||||
# via -r base.in
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via -r base.in
|
||||
packaging==23.2
|
||||
# via
|
||||
@ -60,7 +60,7 @@ packaging==23.2
|
||||
# unstructured-client
|
||||
python-dateutil==2.8.2
|
||||
# via unstructured-client
|
||||
python-iso639==2024.1.2
|
||||
python-iso639==2024.2.7
|
||||
# via -r base.in
|
||||
python-magic==0.4.27
|
||||
# via -r base.in
|
||||
@ -81,7 +81,7 @@ soupsieve==2.5
|
||||
# via beautifulsoup4
|
||||
tabulate==0.9.0
|
||||
# via -r base.in
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via nltk
|
||||
typing-extensions==4.9.0
|
||||
# via
|
||||
@ -93,7 +93,7 @@ typing-inspect==0.9.0
|
||||
# dataclasses-json
|
||||
# dataclasses-json-speakeasy
|
||||
# unstructured-client
|
||||
unstructured-client==0.17.0
|
||||
unstructured-client==0.18.0
|
||||
# via -r base.in
|
||||
urllib3==1.26.18
|
||||
# via
|
||||
|
||||
@ -9,7 +9,7 @@ anyio==3.7.1
|
||||
# -c constraints.in
|
||||
# httpx
|
||||
# jupyter-server
|
||||
appnope==0.1.3
|
||||
appnope==0.1.4
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
@ -65,7 +65,7 @@ comm==0.2.1
|
||||
# via
|
||||
# ipykernel
|
||||
# ipywidgets
|
||||
debugpy==1.8.0
|
||||
debugpy==1.8.1
|
||||
# via ipykernel
|
||||
decorator==5.1.1
|
||||
# via ipython
|
||||
@ -87,11 +87,11 @@ fqdn==1.5.1
|
||||
# via jsonschema
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
httpcore==1.0.2
|
||||
httpcore==1.0.3
|
||||
# via httpx
|
||||
httpx==0.26.0
|
||||
# via jupyterlab
|
||||
identify==2.5.33
|
||||
identify==2.5.34
|
||||
# via pre-commit
|
||||
idna==3.6
|
||||
# via
|
||||
@ -109,7 +109,7 @@ importlib-metadata==7.0.1
|
||||
# jupyterlab
|
||||
# jupyterlab-server
|
||||
# nbconvert
|
||||
ipykernel==6.29.0
|
||||
ipykernel==6.29.2
|
||||
# via
|
||||
# jupyter
|
||||
# jupyter-console
|
||||
@ -122,7 +122,7 @@ ipython==8.12.3
|
||||
# ipykernel
|
||||
# ipywidgets
|
||||
# jupyter-console
|
||||
ipywidgets==8.1.1
|
||||
ipywidgets==8.1.2
|
||||
# via jupyter
|
||||
isoduration==20.11.0
|
||||
# via jsonschema
|
||||
@ -181,15 +181,15 @@ jupyter-server==2.12.5
|
||||
# notebook-shim
|
||||
jupyter-server-terminals==0.5.2
|
||||
# via jupyter-server
|
||||
jupyterlab==4.1.0
|
||||
jupyterlab==4.1.1
|
||||
# via notebook
|
||||
jupyterlab-pygments==0.3.0
|
||||
# via nbconvert
|
||||
jupyterlab-server==2.25.2
|
||||
jupyterlab-server==2.25.3
|
||||
# via
|
||||
# jupyterlab
|
||||
# notebook
|
||||
jupyterlab-widgets==3.0.9
|
||||
jupyterlab-widgets==3.0.10
|
||||
# via ipywidgets
|
||||
markupsafe==2.1.5
|
||||
# via
|
||||
@ -203,7 +203,7 @@ mistune==3.0.2
|
||||
# via nbconvert
|
||||
nbclient==0.9.0
|
||||
# via nbconvert
|
||||
nbconvert==7.14.2
|
||||
nbconvert==7.16.0
|
||||
# via
|
||||
# jupyter
|
||||
# jupyter-server
|
||||
@ -216,9 +216,9 @@ nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
nodeenv==1.8.0
|
||||
# via pre-commit
|
||||
notebook==7.0.7
|
||||
notebook==7.1.0
|
||||
# via jupyter
|
||||
notebook-shim==0.2.3
|
||||
notebook-shim==0.2.4
|
||||
# via
|
||||
# jupyterlab
|
||||
# notebook
|
||||
@ -252,9 +252,9 @@ platformdirs==3.10.0
|
||||
# -c test.txt
|
||||
# jupyter-core
|
||||
# virtualenv
|
||||
pre-commit==3.6.0
|
||||
pre-commit==3.6.1
|
||||
# via -r dev.in
|
||||
prometheus-client==0.19.0
|
||||
prometheus-client==0.20.0
|
||||
# via jupyter-server
|
||||
prompt-toolkit==3.0.43
|
||||
# via
|
||||
@ -320,7 +320,7 @@ rfc3986-validator==0.1.1
|
||||
# via
|
||||
# jsonschema
|
||||
# jupyter-events
|
||||
rpds-py==0.17.1
|
||||
rpds-py==0.18.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
@ -414,7 +414,7 @@ wheel==0.42.0
|
||||
# via
|
||||
# -c constraints.in
|
||||
# pip-tools
|
||||
widgetsnbextension==4.0.9
|
||||
widgetsnbextension==4.0.10
|
||||
# via ipywidgets
|
||||
zipp==3.17.0
|
||||
# via importlib-metadata
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#
|
||||
# pip-compile --output-file=extra-csv.txt extra-csv.in
|
||||
#
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c base.txt
|
||||
# pandas
|
||||
@ -20,5 +20,5 @@ six==1.16.0
|
||||
# via
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
tzdata==2023.4
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
|
||||
@ -45,7 +45,7 @@ flask==3.0.2
|
||||
# visualdl
|
||||
flask-babel==4.0.0
|
||||
# via visualdl
|
||||
fonttools==4.47.2
|
||||
fonttools==4.49.0
|
||||
# via matplotlib
|
||||
future==0.18.3
|
||||
# via bce-python-sdk
|
||||
@ -53,7 +53,7 @@ idna==3.6
|
||||
# via
|
||||
# -c base.txt
|
||||
# requests
|
||||
imageio==2.33.1
|
||||
imageio==2.34.0
|
||||
# via
|
||||
# imgaug
|
||||
# scikit-image
|
||||
@ -93,7 +93,7 @@ matplotlib==3.7.2
|
||||
# visualdl
|
||||
networkx==3.2.1
|
||||
# via scikit-image
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c base.txt
|
||||
# contourpy
|
||||
@ -197,13 +197,13 @@ six==1.16.0
|
||||
# imgaug
|
||||
# python-dateutil
|
||||
# visualdl
|
||||
tifffile==2024.1.30
|
||||
tifffile==2024.2.12
|
||||
# via scikit-image
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c base.txt
|
||||
# unstructured-paddleocr
|
||||
tzdata==2023.4
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
unstructured-paddleocr==2.6.1.3
|
||||
# via -r extra-paddleocr.in
|
||||
|
||||
@ -37,7 +37,7 @@ filelock==3.13.1
|
||||
# transformers
|
||||
flatbuffers==23.5.26
|
||||
# via onnxruntime
|
||||
fonttools==4.47.2
|
||||
fonttools==4.49.0
|
||||
# via matplotlib
|
||||
fsspec==2024.2.0
|
||||
# via
|
||||
@ -79,7 +79,7 @@ mpmath==1.3.0
|
||||
# via sympy
|
||||
networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c base.txt
|
||||
# contourpy
|
||||
@ -128,7 +128,7 @@ pdfminer-six==20221105
|
||||
# via
|
||||
# -r extra-pdf-image.in
|
||||
# pdfplumber
|
||||
pdfplumber==0.10.3
|
||||
pdfplumber==0.10.4
|
||||
# via layoutparser
|
||||
pikepdf==8.11.0
|
||||
# via -r extra-pdf-image.in
|
||||
@ -164,7 +164,7 @@ pyparsing==3.0.9
|
||||
# matplotlib
|
||||
pypdf==4.0.1
|
||||
# via -r extra-pdf-image.in
|
||||
pypdfium2==4.26.0
|
||||
pypdfium2==4.27.0
|
||||
# via pdfplumber
|
||||
pytesseract==0.3.10
|
||||
# via layoutparser
|
||||
@ -173,7 +173,7 @@ python-dateutil==2.8.2
|
||||
# -c base.txt
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-multipart==0.0.7
|
||||
python-multipart==0.0.9
|
||||
# via unstructured-inference
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
@ -217,7 +217,7 @@ sympy==1.12
|
||||
# torch
|
||||
timm==0.9.12
|
||||
# via effdet
|
||||
tokenizers==0.15.1
|
||||
tokenizers==0.15.2
|
||||
# via transformers
|
||||
torch==2.2.0
|
||||
# via
|
||||
@ -231,7 +231,7 @@ torchvision==0.17.0
|
||||
# effdet
|
||||
# layoutparser
|
||||
# timm
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
@ -246,7 +246,7 @@ typing-extensions==4.9.0
|
||||
# iopath
|
||||
# pypdf
|
||||
# torch
|
||||
tzdata==2023.4
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
unstructured-inference==0.7.23
|
||||
# via -r extra-pdf-image.in
|
||||
|
||||
@ -8,7 +8,7 @@ et-xmlfile==1.1.0
|
||||
# via openpyxl
|
||||
networkx==3.2.1
|
||||
# via -r extra-xlsx.in
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c base.txt
|
||||
# pandas
|
||||
@ -26,7 +26,7 @@ six==1.16.0
|
||||
# via
|
||||
# -c base.txt
|
||||
# python-dateutil
|
||||
tzdata==2023.4
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
xlrd==2.0.1
|
||||
# via -r extra-xlsx.in
|
||||
|
||||
@ -50,7 +50,7 @@ mpmath==1.3.0
|
||||
# via sympy
|
||||
networkx==3.2.1
|
||||
# via torch
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c base.txt
|
||||
# transformers
|
||||
@ -87,13 +87,13 @@ six==1.16.0
|
||||
# langdetect
|
||||
sympy==1.12
|
||||
# via torch
|
||||
tokenizers==0.15.1
|
||||
tokenizers==0.15.2
|
||||
# via transformers
|
||||
torch==2.2.0
|
||||
# via
|
||||
# -c constraints.in
|
||||
# -r huggingface.in
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c base.txt
|
||||
# huggingface-hub
|
||||
|
||||
@ -59,7 +59,7 @@ flatbuffers==23.5.26
|
||||
# via onnxruntime
|
||||
fsspec==2024.2.0
|
||||
# via huggingface-hub
|
||||
google-auth==2.27.0
|
||||
google-auth==2.28.0
|
||||
# via kubernetes
|
||||
googleapis-common-protos==1.62.0
|
||||
# via opentelemetry-exporter-otlp-proto-grpc
|
||||
@ -94,7 +94,7 @@ monotonic==1.6
|
||||
# via posthog
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# chroma-hnswlib
|
||||
@ -153,7 +153,7 @@ packaging==23.2
|
||||
# build
|
||||
# huggingface-hub
|
||||
# onnxruntime
|
||||
posthog==3.4.0
|
||||
posthog==3.4.1
|
||||
# via chromadb
|
||||
protobuf==4.23.4
|
||||
# via
|
||||
@ -217,13 +217,13 @@ sympy==1.12
|
||||
# via onnxruntime
|
||||
tenacity==8.2.3
|
||||
# via chromadb
|
||||
tokenizers==0.15.1
|
||||
tokenizers==0.15.2
|
||||
# via chromadb
|
||||
tomli==2.0.1
|
||||
# via
|
||||
# build
|
||||
# pyproject-hooks
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# chromadb
|
||||
@ -248,7 +248,7 @@ urllib3==1.26.18
|
||||
# -c ingest/../constraints.in
|
||||
# kubernetes
|
||||
# requests
|
||||
uvicorn[standard]==0.27.0.post1
|
||||
uvicorn[standard]==0.27.1
|
||||
# via
|
||||
# chromadb
|
||||
# uvicorn
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#
|
||||
# pip-compile --output-file=ingest/confluence.txt ingest/confluence.in
|
||||
#
|
||||
atlassian-python-api==3.41.9
|
||||
atlassian-python-api==3.41.10
|
||||
# via -r ingest/confluence.in
|
||||
beautifulsoup4==4.12.3
|
||||
# via
|
||||
|
||||
@ -15,9 +15,9 @@ charset-normalizer==3.3.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# requests
|
||||
databricks-sdk==0.18.0
|
||||
databricks-sdk==0.19.1
|
||||
# via -r ingest/databricks-volumes.in
|
||||
google-auth==2.27.0
|
||||
google-auth==2.28.0
|
||||
# via databricks-sdk
|
||||
idna==3.6
|
||||
# via
|
||||
|
||||
@ -4,11 +4,11 @@
|
||||
#
|
||||
# pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in
|
||||
#
|
||||
deltalake==0.15.2
|
||||
deltalake==0.15.3
|
||||
# via -r ingest/delta-table.in
|
||||
fsspec==2024.2.0
|
||||
# via -r ingest/delta-table.in
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# pyarrow
|
||||
|
||||
@ -58,11 +58,11 @@ jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
langchain-community==0.0.17
|
||||
langchain-community==0.0.20
|
||||
# via -r ingest/embed-aws-bedrock.in
|
||||
langchain-core==0.1.18
|
||||
langchain-core==0.1.23
|
||||
# via langchain-community
|
||||
langsmith==0.0.86
|
||||
langsmith==0.0.87
|
||||
# via
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
@ -78,7 +78,7 @@ mypy-extensions==1.0.0
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# typing-inspect
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# langchain-community
|
||||
@ -114,7 +114,7 @@ six==1.16.0
|
||||
# python-dateutil
|
||||
sniffio==1.3.0
|
||||
# via anyio
|
||||
sqlalchemy==2.0.25
|
||||
sqlalchemy==2.0.27
|
||||
# via langchain-community
|
||||
tenacity==8.2.3
|
||||
# via
|
||||
|
||||
@ -72,11 +72,11 @@ jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
langchain-community==0.0.17
|
||||
langchain-community==0.0.20
|
||||
# via -r ingest/embed-huggingface.in
|
||||
langchain-core==0.1.18
|
||||
langchain-core==0.1.23
|
||||
# via langchain-community
|
||||
langsmith==0.0.86
|
||||
langsmith==0.0.87
|
||||
# via
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
@ -102,7 +102,7 @@ nltk==3.8.1
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# sentence-transformers
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# langchain-community
|
||||
@ -160,7 +160,7 @@ sentencepiece==0.1.99
|
||||
# via sentence-transformers
|
||||
sniffio==1.3.0
|
||||
# via anyio
|
||||
sqlalchemy==2.0.25
|
||||
sqlalchemy==2.0.27
|
||||
# via langchain-community
|
||||
sympy==1.12
|
||||
# via torch
|
||||
@ -168,15 +168,15 @@ tenacity==8.2.3
|
||||
# via
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
threadpoolctl==3.2.0
|
||||
threadpoolctl==3.3.0
|
||||
# via scikit-learn
|
||||
tokenizers==0.15.1
|
||||
tokenizers==0.15.2
|
||||
# via transformers
|
||||
torch==2.2.0
|
||||
# via
|
||||
# -c ingest/../constraints.in
|
||||
# sentence-transformers
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# huggingface-hub
|
||||
|
||||
@ -43,7 +43,7 @@ frozenlist==1.4.1
|
||||
# aiosignal
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
httpcore==1.0.2
|
||||
httpcore==1.0.3
|
||||
# via httpx
|
||||
httpx==0.26.0
|
||||
# via openai
|
||||
@ -58,11 +58,11 @@ jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
langchain-community==0.0.17
|
||||
langchain-community==0.0.20
|
||||
# via -r ingest/embed-openai.in
|
||||
langchain-core==0.1.18
|
||||
langchain-core==0.1.23
|
||||
# via langchain-community
|
||||
langsmith==0.0.86
|
||||
langsmith==0.0.87
|
||||
# via
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
@ -78,11 +78,11 @@ mypy-extensions==1.0.0
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# typing-inspect
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# langchain-community
|
||||
openai==1.11.1
|
||||
openai==1.12.0
|
||||
# via -r ingest/embed-openai.in
|
||||
packaging==23.2
|
||||
# via
|
||||
@ -115,15 +115,15 @@ sniffio==1.3.0
|
||||
# anyio
|
||||
# httpx
|
||||
# openai
|
||||
sqlalchemy==2.0.25
|
||||
sqlalchemy==2.0.27
|
||||
# via langchain-community
|
||||
tenacity==8.2.3
|
||||
# via
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
tiktoken==0.5.2
|
||||
tiktoken==0.6.0
|
||||
# via -r ingest/embed-openai.in
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# openai
|
||||
|
||||
@ -41,11 +41,11 @@ fsspec==2024.2.0
|
||||
# gcsfs
|
||||
gcsfs==2024.2.0
|
||||
# via -r ingest/gcs.in
|
||||
google-api-core==2.16.2
|
||||
google-api-core==2.17.1
|
||||
# via
|
||||
# google-cloud-core
|
||||
# google-cloud-storage
|
||||
google-auth==2.27.0
|
||||
google-auth==2.28.0
|
||||
# via
|
||||
# gcsfs
|
||||
# google-api-core
|
||||
|
||||
@ -15,11 +15,11 @@ charset-normalizer==3.3.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# requests
|
||||
google-api-core==2.16.2
|
||||
google-api-core==2.17.1
|
||||
# via google-api-python-client
|
||||
google-api-python-client==2.116.0
|
||||
google-api-python-client==2.118.0
|
||||
# via -r ingest/google-drive.in
|
||||
google-auth==2.27.0
|
||||
google-auth==2.28.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-api-python-client
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#
|
||||
# pip-compile --output-file=ingest/jira.txt ingest/jira.in
|
||||
#
|
||||
atlassian-python-api==3.41.9
|
||||
atlassian-python-api==3.41.10
|
||||
# via -r ingest/jira.in
|
||||
beautifulsoup4==4.12.3
|
||||
# via
|
||||
|
||||
@ -20,7 +20,7 @@ h11==0.14.0
|
||||
# via httpcore
|
||||
htmlbuilder==1.0.0
|
||||
# via -r ingest/notion.in
|
||||
httpcore==1.0.2
|
||||
httpcore==1.0.3
|
||||
# via httpx
|
||||
httpx==0.26.0
|
||||
# via notion-client
|
||||
|
||||
@ -21,7 +21,7 @@ idna==3.6
|
||||
# requests
|
||||
loguru==0.7.2
|
||||
# via pinecone-client
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# pinecone-client
|
||||
@ -41,7 +41,7 @@ six==1.16.0
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# python-dateutil
|
||||
tqdm==4.66.1
|
||||
tqdm==4.66.2
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# pinecone-client
|
||||
|
||||
@ -28,7 +28,7 @@ h2==4.1.0
|
||||
# via httpx
|
||||
hpack==4.0.0
|
||||
# via h2
|
||||
httpcore==1.0.2
|
||||
httpcore==1.0.3
|
||||
# via httpx
|
||||
httpx[http2]==0.26.0
|
||||
# via
|
||||
@ -41,7 +41,7 @@ idna==3.6
|
||||
# -c ingest/../base.txt
|
||||
# anyio
|
||||
# httpx
|
||||
numpy==1.26.3
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ingest/../base.txt
|
||||
# qdrant-client
|
||||
@ -55,7 +55,7 @@ pydantic==1.10.14
|
||||
# via
|
||||
# -c ingest/../constraints.in
|
||||
# qdrant-client
|
||||
qdrant-client==1.7.2
|
||||
qdrant-client==1.7.3
|
||||
# via -r ingest/qdrant.in
|
||||
sniffio==1.3.0
|
||||
# via
|
||||
|
||||
@ -68,7 +68,7 @@ six==1.16.0
|
||||
# python-dateutil
|
||||
time-machine==2.13.0
|
||||
# via pendulum
|
||||
tzdata==2023.4
|
||||
tzdata==2024.1
|
||||
# via pendulum
|
||||
urllib3==1.26.18
|
||||
# via
|
||||
|
||||
@ -4,5 +4,5 @@
|
||||
#
|
||||
# pip-compile --output-file=ingest/slack.txt ingest/slack.in
|
||||
#
|
||||
slack-sdk==3.26.2
|
||||
slack-sdk==3.27.0
|
||||
# via -r ingest/slack.in
|
||||
|
||||
@ -8,7 +8,7 @@ appdirs==1.4.4
|
||||
# via label-studio-tools
|
||||
autoflake==2.2.1
|
||||
# via -r test.in
|
||||
black==24.1.1
|
||||
black==24.2.0
|
||||
# via -r test.in
|
||||
certifi==2024.2.2
|
||||
# via
|
||||
@ -111,7 +111,7 @@ requests==2.31.0
|
||||
# via
|
||||
# -c base.txt
|
||||
# label-studio-sdk
|
||||
ruff==0.2.0
|
||||
ruff==0.2.1
|
||||
# via -r test.in
|
||||
six==1.16.0
|
||||
# via
|
||||
|
||||
@ -5,13 +5,13 @@ set -e
|
||||
SCRIPT_DIR=$(dirname "$(realpath "$0")")
|
||||
cd "$SCRIPT_DIR"/.. || exit 1
|
||||
|
||||
EVAL_NAME="$1"
|
||||
|
||||
# List all structured outputs to use in this evaluation
|
||||
OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
|
||||
OUTPUT_ROOT=${2:-${OUTPUT_ROOT:-$SCRIPT_DIR}}
|
||||
OUTPUT_DIR=$OUTPUT_ROOT/structured-output-eval
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
EVAL_NAME="$1"
|
||||
|
||||
if [ "$EVAL_NAME" == "text-extraction" ]; then
|
||||
METRIC_STRATEGY="measure-text-extraction-accuracy-command"
|
||||
elif [ "$EVAL_NAME" == "element-type" ]; then
|
||||
|
||||
@ -11,6 +11,8 @@ fi
|
||||
touch "$SKIPPED_FILES_LOG"
|
||||
cd "$SCRIPT_DIR"/.. || exit 1
|
||||
|
||||
EVAL_OUTPUT_ROOT=${EVAL_OUTPUT_ROOT:-$SCRIPT_DIR}
|
||||
|
||||
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
|
||||
export OMP_THREAD_LIMIT=1
|
||||
|
||||
@ -121,6 +123,6 @@ all_eval=(
|
||||
for eval in "${all_eval[@]}"; do
|
||||
CURRENT_TEST="evaluation-metrics.sh $eval"
|
||||
echo "--------- RUNNING SCRIPT evaluation-metrics.sh $eval ---------"
|
||||
./test_unstructured_ingest/evaluation-metrics.sh "$eval"
|
||||
./test_unstructured_ingest/evaluation-metrics.sh "$eval" "$EVAL_OUTPUT_ROOT"
|
||||
echo "--------- FINISHED SCRIPT evaluation-metrics.sh $eval ---------"
|
||||
done
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user