feat: modify test-ingest-src and evaluation-metrics to allow EXPORT_DIR (#2551)

The current `test-ingest-src.sh` and `evaluation-metrics` do not allow
passing the `EXPORT_DIR` (`OUTPUT_ROOT` in `evaluation-metrics`). It is
currently saving at the current working directory
(`unstructured/test_unstructured_ingest`). When running the eval from
`core-product`, all outputs is now saved at
`core-product/upstream-unstructured/test_unstructured_ingest` which is
undesirable.

This PR modifies two scripts to accommodate such behavior:
1. `test-ingest-src.sh` - assign `EVAL_OUTPUT_ROOT` to the value set
within the environment if exist, or the current working directory if
not. Then calls to run `evaluation-metrics.sh`.
2. `evaluation-metrics.sh` - accepting param from `test-ingest-src.sh`
if exist, or to the value set within the environment if exist, or the
current directory if not.

(Note: I also add param to `evaluation-metrics.sh` because it makes
sense to allow a separate run to be able to specify an export directory)

This PR should work in sync with another PR under `core-product`, which
I will add the link here later.

**To test:**

Run the script below, change `$SCRIPT_DIR` as needed to see the result.

```
export OVERWRITE_FIXTURES=true

./upstream-unstructured/test_unstructured_ingest/src/s3.sh

SCRIPT_DIR=$(dirname "$(realpath "$0")")
bash -x ./upstream-unstructured/test_unstructured_ingest/evaluation-metrics.sh text-extraction "$SCRIPT_DIR"
```

----

This PR also updates the requirements by `make pip-compile` since the
`click` module was not found.
This commit is contained in:
Klaijan 2024-02-17 12:21:15 +07:00 committed by GitHub
parent ad561b7939
commit d06936d35a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 96 additions and 94 deletions

View File

@ -52,7 +52,7 @@ mypy-extensions==1.0.0
# unstructured-client # unstructured-client
nltk==3.8.1 nltk==3.8.1
# via -r base.in # via -r base.in
numpy==1.26.3 numpy==1.26.4
# via -r base.in # via -r base.in
packaging==23.2 packaging==23.2
# via # via
@ -60,7 +60,7 @@ packaging==23.2
# unstructured-client # unstructured-client
python-dateutil==2.8.2 python-dateutil==2.8.2
# via unstructured-client # via unstructured-client
python-iso639==2024.1.2 python-iso639==2024.2.7
# via -r base.in # via -r base.in
python-magic==0.4.27 python-magic==0.4.27
# via -r base.in # via -r base.in
@ -81,7 +81,7 @@ soupsieve==2.5
# via beautifulsoup4 # via beautifulsoup4
tabulate==0.9.0 tabulate==0.9.0
# via -r base.in # via -r base.in
tqdm==4.66.1 tqdm==4.66.2
# via nltk # via nltk
typing-extensions==4.9.0 typing-extensions==4.9.0
# via # via
@ -93,7 +93,7 @@ typing-inspect==0.9.0
# dataclasses-json # dataclasses-json
# dataclasses-json-speakeasy # dataclasses-json-speakeasy
# unstructured-client # unstructured-client
unstructured-client==0.17.0 unstructured-client==0.18.0
# via -r base.in # via -r base.in
urllib3==1.26.18 urllib3==1.26.18
# via # via

View File

@ -9,7 +9,7 @@ anyio==3.7.1
# -c constraints.in # -c constraints.in
# httpx # httpx
# jupyter-server # jupyter-server
appnope==0.1.3 appnope==0.1.4
# via # via
# ipykernel # ipykernel
# ipython # ipython
@ -65,7 +65,7 @@ comm==0.2.1
# via # via
# ipykernel # ipykernel
# ipywidgets # ipywidgets
debugpy==1.8.0 debugpy==1.8.1
# via ipykernel # via ipykernel
decorator==5.1.1 decorator==5.1.1
# via ipython # via ipython
@ -87,11 +87,11 @@ fqdn==1.5.1
# via jsonschema # via jsonschema
h11==0.14.0 h11==0.14.0
# via httpcore # via httpcore
httpcore==1.0.2 httpcore==1.0.3
# via httpx # via httpx
httpx==0.26.0 httpx==0.26.0
# via jupyterlab # via jupyterlab
identify==2.5.33 identify==2.5.34
# via pre-commit # via pre-commit
idna==3.6 idna==3.6
# via # via
@ -109,7 +109,7 @@ importlib-metadata==7.0.1
# jupyterlab # jupyterlab
# jupyterlab-server # jupyterlab-server
# nbconvert # nbconvert
ipykernel==6.29.0 ipykernel==6.29.2
# via # via
# jupyter # jupyter
# jupyter-console # jupyter-console
@ -122,7 +122,7 @@ ipython==8.12.3
# ipykernel # ipykernel
# ipywidgets # ipywidgets
# jupyter-console # jupyter-console
ipywidgets==8.1.1 ipywidgets==8.1.2
# via jupyter # via jupyter
isoduration==20.11.0 isoduration==20.11.0
# via jsonschema # via jsonschema
@ -181,15 +181,15 @@ jupyter-server==2.12.5
# notebook-shim # notebook-shim
jupyter-server-terminals==0.5.2 jupyter-server-terminals==0.5.2
# via jupyter-server # via jupyter-server
jupyterlab==4.1.0 jupyterlab==4.1.1
# via notebook # via notebook
jupyterlab-pygments==0.3.0 jupyterlab-pygments==0.3.0
# via nbconvert # via nbconvert
jupyterlab-server==2.25.2 jupyterlab-server==2.25.3
# via # via
# jupyterlab # jupyterlab
# notebook # notebook
jupyterlab-widgets==3.0.9 jupyterlab-widgets==3.0.10
# via ipywidgets # via ipywidgets
markupsafe==2.1.5 markupsafe==2.1.5
# via # via
@ -203,7 +203,7 @@ mistune==3.0.2
# via nbconvert # via nbconvert
nbclient==0.9.0 nbclient==0.9.0
# via nbconvert # via nbconvert
nbconvert==7.14.2 nbconvert==7.16.0
# via # via
# jupyter # jupyter
# jupyter-server # jupyter-server
@ -216,9 +216,9 @@ nest-asyncio==1.6.0
# via ipykernel # via ipykernel
nodeenv==1.8.0 nodeenv==1.8.0
# via pre-commit # via pre-commit
notebook==7.0.7 notebook==7.1.0
# via jupyter # via jupyter
notebook-shim==0.2.3 notebook-shim==0.2.4
# via # via
# jupyterlab # jupyterlab
# notebook # notebook
@ -252,9 +252,9 @@ platformdirs==3.10.0
# -c test.txt # -c test.txt
# jupyter-core # jupyter-core
# virtualenv # virtualenv
pre-commit==3.6.0 pre-commit==3.6.1
# via -r dev.in # via -r dev.in
prometheus-client==0.19.0 prometheus-client==0.20.0
# via jupyter-server # via jupyter-server
prompt-toolkit==3.0.43 prompt-toolkit==3.0.43
# via # via
@ -320,7 +320,7 @@ rfc3986-validator==0.1.1
# via # via
# jsonschema # jsonschema
# jupyter-events # jupyter-events
rpds-py==0.17.1 rpds-py==0.18.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
@ -414,7 +414,7 @@ wheel==0.42.0
# via # via
# -c constraints.in # -c constraints.in
# pip-tools # pip-tools
widgetsnbextension==4.0.9 widgetsnbextension==4.0.10
# via ipywidgets # via ipywidgets
zipp==3.17.0 zipp==3.17.0
# via importlib-metadata # via importlib-metadata

View File

@ -4,7 +4,7 @@
# #
# pip-compile --output-file=extra-csv.txt extra-csv.in # pip-compile --output-file=extra-csv.txt extra-csv.in
# #
numpy==1.26.3 numpy==1.26.4
# via # via
# -c base.txt # -c base.txt
# pandas # pandas
@ -20,5 +20,5 @@ six==1.16.0
# via # via
# -c base.txt # -c base.txt
# python-dateutil # python-dateutil
tzdata==2023.4 tzdata==2024.1
# via pandas # via pandas

View File

@ -45,7 +45,7 @@ flask==3.0.2
# visualdl # visualdl
flask-babel==4.0.0 flask-babel==4.0.0
# via visualdl # via visualdl
fonttools==4.47.2 fonttools==4.49.0
# via matplotlib # via matplotlib
future==0.18.3 future==0.18.3
# via bce-python-sdk # via bce-python-sdk
@ -53,7 +53,7 @@ idna==3.6
# via # via
# -c base.txt # -c base.txt
# requests # requests
imageio==2.33.1 imageio==2.34.0
# via # via
# imgaug # imgaug
# scikit-image # scikit-image
@ -93,7 +93,7 @@ matplotlib==3.7.2
# visualdl # visualdl
networkx==3.2.1 networkx==3.2.1
# via scikit-image # via scikit-image
numpy==1.26.3 numpy==1.26.4
# via # via
# -c base.txt # -c base.txt
# contourpy # contourpy
@ -197,13 +197,13 @@ six==1.16.0
# imgaug # imgaug
# python-dateutil # python-dateutil
# visualdl # visualdl
tifffile==2024.1.30 tifffile==2024.2.12
# via scikit-image # via scikit-image
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c base.txt # -c base.txt
# unstructured-paddleocr # unstructured-paddleocr
tzdata==2023.4 tzdata==2024.1
# via pandas # via pandas
unstructured-paddleocr==2.6.1.3 unstructured-paddleocr==2.6.1.3
# via -r extra-paddleocr.in # via -r extra-paddleocr.in

View File

@ -37,7 +37,7 @@ filelock==3.13.1
# transformers # transformers
flatbuffers==23.5.26 flatbuffers==23.5.26
# via onnxruntime # via onnxruntime
fonttools==4.47.2 fonttools==4.49.0
# via matplotlib # via matplotlib
fsspec==2024.2.0 fsspec==2024.2.0
# via # via
@ -79,7 +79,7 @@ mpmath==1.3.0
# via sympy # via sympy
networkx==3.2.1 networkx==3.2.1
# via torch # via torch
numpy==1.26.3 numpy==1.26.4
# via # via
# -c base.txt # -c base.txt
# contourpy # contourpy
@ -128,7 +128,7 @@ pdfminer-six==20221105
# via # via
# -r extra-pdf-image.in # -r extra-pdf-image.in
# pdfplumber # pdfplumber
pdfplumber==0.10.3 pdfplumber==0.10.4
# via layoutparser # via layoutparser
pikepdf==8.11.0 pikepdf==8.11.0
# via -r extra-pdf-image.in # via -r extra-pdf-image.in
@ -164,7 +164,7 @@ pyparsing==3.0.9
# matplotlib # matplotlib
pypdf==4.0.1 pypdf==4.0.1
# via -r extra-pdf-image.in # via -r extra-pdf-image.in
pypdfium2==4.26.0 pypdfium2==4.27.0
# via pdfplumber # via pdfplumber
pytesseract==0.3.10 pytesseract==0.3.10
# via layoutparser # via layoutparser
@ -173,7 +173,7 @@ python-dateutil==2.8.2
# -c base.txt # -c base.txt
# matplotlib # matplotlib
# pandas # pandas
python-multipart==0.0.7 python-multipart==0.0.9
# via unstructured-inference # via unstructured-inference
pytz==2024.1 pytz==2024.1
# via pandas # via pandas
@ -217,7 +217,7 @@ sympy==1.12
# torch # torch
timm==0.9.12 timm==0.9.12
# via effdet # via effdet
tokenizers==0.15.1 tokenizers==0.15.2
# via transformers # via transformers
torch==2.2.0 torch==2.2.0
# via # via
@ -231,7 +231,7 @@ torchvision==0.17.0
# effdet # effdet
# layoutparser # layoutparser
# timm # timm
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c base.txt # -c base.txt
# huggingface-hub # huggingface-hub
@ -246,7 +246,7 @@ typing-extensions==4.9.0
# iopath # iopath
# pypdf # pypdf
# torch # torch
tzdata==2023.4 tzdata==2024.1
# via pandas # via pandas
unstructured-inference==0.7.23 unstructured-inference==0.7.23
# via -r extra-pdf-image.in # via -r extra-pdf-image.in

View File

@ -8,7 +8,7 @@ et-xmlfile==1.1.0
# via openpyxl # via openpyxl
networkx==3.2.1 networkx==3.2.1
# via -r extra-xlsx.in # via -r extra-xlsx.in
numpy==1.26.3 numpy==1.26.4
# via # via
# -c base.txt # -c base.txt
# pandas # pandas
@ -26,7 +26,7 @@ six==1.16.0
# via # via
# -c base.txt # -c base.txt
# python-dateutil # python-dateutil
tzdata==2023.4 tzdata==2024.1
# via pandas # via pandas
xlrd==2.0.1 xlrd==2.0.1
# via -r extra-xlsx.in # via -r extra-xlsx.in

View File

@ -50,7 +50,7 @@ mpmath==1.3.0
# via sympy # via sympy
networkx==3.2.1 networkx==3.2.1
# via torch # via torch
numpy==1.26.3 numpy==1.26.4
# via # via
# -c base.txt # -c base.txt
# transformers # transformers
@ -87,13 +87,13 @@ six==1.16.0
# langdetect # langdetect
sympy==1.12 sympy==1.12
# via torch # via torch
tokenizers==0.15.1 tokenizers==0.15.2
# via transformers # via transformers
torch==2.2.0 torch==2.2.0
# via # via
# -c constraints.in # -c constraints.in
# -r huggingface.in # -r huggingface.in
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c base.txt # -c base.txt
# huggingface-hub # huggingface-hub

View File

@ -59,7 +59,7 @@ flatbuffers==23.5.26
# via onnxruntime # via onnxruntime
fsspec==2024.2.0 fsspec==2024.2.0
# via huggingface-hub # via huggingface-hub
google-auth==2.27.0 google-auth==2.28.0
# via kubernetes # via kubernetes
googleapis-common-protos==1.62.0 googleapis-common-protos==1.62.0
# via opentelemetry-exporter-otlp-proto-grpc # via opentelemetry-exporter-otlp-proto-grpc
@ -94,7 +94,7 @@ monotonic==1.6
# via posthog # via posthog
mpmath==1.3.0 mpmath==1.3.0
# via sympy # via sympy
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# chroma-hnswlib # chroma-hnswlib
@ -153,7 +153,7 @@ packaging==23.2
# build # build
# huggingface-hub # huggingface-hub
# onnxruntime # onnxruntime
posthog==3.4.0 posthog==3.4.1
# via chromadb # via chromadb
protobuf==4.23.4 protobuf==4.23.4
# via # via
@ -217,13 +217,13 @@ sympy==1.12
# via onnxruntime # via onnxruntime
tenacity==8.2.3 tenacity==8.2.3
# via chromadb # via chromadb
tokenizers==0.15.1 tokenizers==0.15.2
# via chromadb # via chromadb
tomli==2.0.1 tomli==2.0.1
# via # via
# build # build
# pyproject-hooks # pyproject-hooks
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# chromadb # chromadb
@ -248,7 +248,7 @@ urllib3==1.26.18
# -c ingest/../constraints.in # -c ingest/../constraints.in
# kubernetes # kubernetes
# requests # requests
uvicorn[standard]==0.27.0.post1 uvicorn[standard]==0.27.1
# via # via
# chromadb # chromadb
# uvicorn # uvicorn

View File

@ -4,7 +4,7 @@
# #
# pip-compile --output-file=ingest/confluence.txt ingest/confluence.in # pip-compile --output-file=ingest/confluence.txt ingest/confluence.in
# #
atlassian-python-api==3.41.9 atlassian-python-api==3.41.10
# via -r ingest/confluence.in # via -r ingest/confluence.in
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
# via # via

View File

@ -15,9 +15,9 @@ charset-normalizer==3.3.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# requests # requests
databricks-sdk==0.18.0 databricks-sdk==0.19.1
# via -r ingest/databricks-volumes.in # via -r ingest/databricks-volumes.in
google-auth==2.27.0 google-auth==2.28.0
# via databricks-sdk # via databricks-sdk
idna==3.6 idna==3.6
# via # via

View File

@ -4,11 +4,11 @@
# #
# pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in # pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in
# #
deltalake==0.15.2 deltalake==0.15.3
# via -r ingest/delta-table.in # via -r ingest/delta-table.in
fsspec==2024.2.0 fsspec==2024.2.0
# via -r ingest/delta-table.in # via -r ingest/delta-table.in
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# pyarrow # pyarrow

View File

@ -58,11 +58,11 @@ jsonpatch==1.33
# via langchain-core # via langchain-core
jsonpointer==2.4 jsonpointer==2.4
# via jsonpatch # via jsonpatch
langchain-community==0.0.17 langchain-community==0.0.20
# via -r ingest/embed-aws-bedrock.in # via -r ingest/embed-aws-bedrock.in
langchain-core==0.1.18 langchain-core==0.1.23
# via langchain-community # via langchain-community
langsmith==0.0.86 langsmith==0.0.87
# via # via
# langchain-community # langchain-community
# langchain-core # langchain-core
@ -78,7 +78,7 @@ mypy-extensions==1.0.0
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# typing-inspect # typing-inspect
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# langchain-community # langchain-community
@ -114,7 +114,7 @@ six==1.16.0
# python-dateutil # python-dateutil
sniffio==1.3.0 sniffio==1.3.0
# via anyio # via anyio
sqlalchemy==2.0.25 sqlalchemy==2.0.27
# via langchain-community # via langchain-community
tenacity==8.2.3 tenacity==8.2.3
# via # via

View File

@ -72,11 +72,11 @@ jsonpatch==1.33
# via langchain-core # via langchain-core
jsonpointer==2.4 jsonpointer==2.4
# via jsonpatch # via jsonpatch
langchain-community==0.0.17 langchain-community==0.0.20
# via -r ingest/embed-huggingface.in # via -r ingest/embed-huggingface.in
langchain-core==0.1.18 langchain-core==0.1.23
# via langchain-community # via langchain-community
langsmith==0.0.86 langsmith==0.0.87
# via # via
# langchain-community # langchain-community
# langchain-core # langchain-core
@ -102,7 +102,7 @@ nltk==3.8.1
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# sentence-transformers # sentence-transformers
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# langchain-community # langchain-community
@ -160,7 +160,7 @@ sentencepiece==0.1.99
# via sentence-transformers # via sentence-transformers
sniffio==1.3.0 sniffio==1.3.0
# via anyio # via anyio
sqlalchemy==2.0.25 sqlalchemy==2.0.27
# via langchain-community # via langchain-community
sympy==1.12 sympy==1.12
# via torch # via torch
@ -168,15 +168,15 @@ tenacity==8.2.3
# via # via
# langchain-community # langchain-community
# langchain-core # langchain-core
threadpoolctl==3.2.0 threadpoolctl==3.3.0
# via scikit-learn # via scikit-learn
tokenizers==0.15.1 tokenizers==0.15.2
# via transformers # via transformers
torch==2.2.0 torch==2.2.0
# via # via
# -c ingest/../constraints.in # -c ingest/../constraints.in
# sentence-transformers # sentence-transformers
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# huggingface-hub # huggingface-hub

View File

@ -43,7 +43,7 @@ frozenlist==1.4.1
# aiosignal # aiosignal
h11==0.14.0 h11==0.14.0
# via httpcore # via httpcore
httpcore==1.0.2 httpcore==1.0.3
# via httpx # via httpx
httpx==0.26.0 httpx==0.26.0
# via openai # via openai
@ -58,11 +58,11 @@ jsonpatch==1.33
# via langchain-core # via langchain-core
jsonpointer==2.4 jsonpointer==2.4
# via jsonpatch # via jsonpatch
langchain-community==0.0.17 langchain-community==0.0.20
# via -r ingest/embed-openai.in # via -r ingest/embed-openai.in
langchain-core==0.1.18 langchain-core==0.1.23
# via langchain-community # via langchain-community
langsmith==0.0.86 langsmith==0.0.87
# via # via
# langchain-community # langchain-community
# langchain-core # langchain-core
@ -78,11 +78,11 @@ mypy-extensions==1.0.0
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# typing-inspect # typing-inspect
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# langchain-community # langchain-community
openai==1.11.1 openai==1.12.0
# via -r ingest/embed-openai.in # via -r ingest/embed-openai.in
packaging==23.2 packaging==23.2
# via # via
@ -115,15 +115,15 @@ sniffio==1.3.0
# anyio # anyio
# httpx # httpx
# openai # openai
sqlalchemy==2.0.25 sqlalchemy==2.0.27
# via langchain-community # via langchain-community
tenacity==8.2.3 tenacity==8.2.3
# via # via
# langchain-community # langchain-community
# langchain-core # langchain-core
tiktoken==0.5.2 tiktoken==0.6.0
# via -r ingest/embed-openai.in # via -r ingest/embed-openai.in
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# openai # openai

View File

@ -41,11 +41,11 @@ fsspec==2024.2.0
# gcsfs # gcsfs
gcsfs==2024.2.0 gcsfs==2024.2.0
# via -r ingest/gcs.in # via -r ingest/gcs.in
google-api-core==2.16.2 google-api-core==2.17.1
# via # via
# google-cloud-core # google-cloud-core
# google-cloud-storage # google-cloud-storage
google-auth==2.27.0 google-auth==2.28.0
# via # via
# gcsfs # gcsfs
# google-api-core # google-api-core

View File

@ -15,11 +15,11 @@ charset-normalizer==3.3.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# requests # requests
google-api-core==2.16.2 google-api-core==2.17.1
# via google-api-python-client # via google-api-python-client
google-api-python-client==2.116.0 google-api-python-client==2.118.0
# via -r ingest/google-drive.in # via -r ingest/google-drive.in
google-auth==2.27.0 google-auth==2.28.0
# via # via
# google-api-core # google-api-core
# google-api-python-client # google-api-python-client

View File

@ -4,7 +4,7 @@
# #
# pip-compile --output-file=ingest/jira.txt ingest/jira.in # pip-compile --output-file=ingest/jira.txt ingest/jira.in
# #
atlassian-python-api==3.41.9 atlassian-python-api==3.41.10
# via -r ingest/jira.in # via -r ingest/jira.in
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
# via # via

View File

@ -20,7 +20,7 @@ h11==0.14.0
# via httpcore # via httpcore
htmlbuilder==1.0.0 htmlbuilder==1.0.0
# via -r ingest/notion.in # via -r ingest/notion.in
httpcore==1.0.2 httpcore==1.0.3
# via httpx # via httpx
httpx==0.26.0 httpx==0.26.0
# via notion-client # via notion-client

View File

@ -21,7 +21,7 @@ idna==3.6
# requests # requests
loguru==0.7.2 loguru==0.7.2
# via pinecone-client # via pinecone-client
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# pinecone-client # pinecone-client
@ -41,7 +41,7 @@ six==1.16.0
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# python-dateutil # python-dateutil
tqdm==4.66.1 tqdm==4.66.2
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# pinecone-client # pinecone-client

View File

@ -28,7 +28,7 @@ h2==4.1.0
# via httpx # via httpx
hpack==4.0.0 hpack==4.0.0
# via h2 # via h2
httpcore==1.0.2 httpcore==1.0.3
# via httpx # via httpx
httpx[http2]==0.26.0 httpx[http2]==0.26.0
# via # via
@ -41,7 +41,7 @@ idna==3.6
# -c ingest/../base.txt # -c ingest/../base.txt
# anyio # anyio
# httpx # httpx
numpy==1.26.3 numpy==1.26.4
# via # via
# -c ingest/../base.txt # -c ingest/../base.txt
# qdrant-client # qdrant-client
@ -55,7 +55,7 @@ pydantic==1.10.14
# via # via
# -c ingest/../constraints.in # -c ingest/../constraints.in
# qdrant-client # qdrant-client
qdrant-client==1.7.2 qdrant-client==1.7.3
# via -r ingest/qdrant.in # via -r ingest/qdrant.in
sniffio==1.3.0 sniffio==1.3.0
# via # via

View File

@ -68,7 +68,7 @@ six==1.16.0
# python-dateutil # python-dateutil
time-machine==2.13.0 time-machine==2.13.0
# via pendulum # via pendulum
tzdata==2023.4 tzdata==2024.1
# via pendulum # via pendulum
urllib3==1.26.18 urllib3==1.26.18
# via # via

View File

@ -4,5 +4,5 @@
# #
# pip-compile --output-file=ingest/slack.txt ingest/slack.in # pip-compile --output-file=ingest/slack.txt ingest/slack.in
# #
slack-sdk==3.26.2 slack-sdk==3.27.0
# via -r ingest/slack.in # via -r ingest/slack.in

View File

@ -8,7 +8,7 @@ appdirs==1.4.4
# via label-studio-tools # via label-studio-tools
autoflake==2.2.1 autoflake==2.2.1
# via -r test.in # via -r test.in
black==24.1.1 black==24.2.0
# via -r test.in # via -r test.in
certifi==2024.2.2 certifi==2024.2.2
# via # via
@ -111,7 +111,7 @@ requests==2.31.0
# via # via
# -c base.txt # -c base.txt
# label-studio-sdk # label-studio-sdk
ruff==0.2.0 ruff==0.2.1
# via -r test.in # via -r test.in
six==1.16.0 six==1.16.0
# via # via

View File

@ -5,13 +5,13 @@ set -e
SCRIPT_DIR=$(dirname "$(realpath "$0")") SCRIPT_DIR=$(dirname "$(realpath "$0")")
cd "$SCRIPT_DIR"/.. || exit 1 cd "$SCRIPT_DIR"/.. || exit 1
EVAL_NAME="$1"
# List all structured outputs to use in this evaluation # List all structured outputs to use in this evaluation
OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} OUTPUT_ROOT=${2:-${OUTPUT_ROOT:-$SCRIPT_DIR}}
OUTPUT_DIR=$OUTPUT_ROOT/structured-output-eval OUTPUT_DIR=$OUTPUT_ROOT/structured-output-eval
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
EVAL_NAME="$1"
if [ "$EVAL_NAME" == "text-extraction" ]; then if [ "$EVAL_NAME" == "text-extraction" ]; then
METRIC_STRATEGY="measure-text-extraction-accuracy-command" METRIC_STRATEGY="measure-text-extraction-accuracy-command"
elif [ "$EVAL_NAME" == "element-type" ]; then elif [ "$EVAL_NAME" == "element-type" ]; then

View File

@ -11,6 +11,8 @@ fi
touch "$SKIPPED_FILES_LOG" touch "$SKIPPED_FILES_LOG"
cd "$SCRIPT_DIR"/.. || exit 1 cd "$SCRIPT_DIR"/.. || exit 1
EVAL_OUTPUT_ROOT=${EVAL_OUTPUT_ROOT:-$SCRIPT_DIR}
# NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs # NOTE(crag): sets number of tesseract threads to 1 which may help with more reproducible outputs
export OMP_THREAD_LIMIT=1 export OMP_THREAD_LIMIT=1
@ -121,6 +123,6 @@ all_eval=(
for eval in "${all_eval[@]}"; do for eval in "${all_eval[@]}"; do
CURRENT_TEST="evaluation-metrics.sh $eval" CURRENT_TEST="evaluation-metrics.sh $eval"
echo "--------- RUNNING SCRIPT evaluation-metrics.sh $eval ---------" echo "--------- RUNNING SCRIPT evaluation-metrics.sh $eval ---------"
./test_unstructured_ingest/evaluation-metrics.sh "$eval" ./test_unstructured_ingest/evaluation-metrics.sh "$eval" "$EVAL_OUTPUT_ROOT"
echo "--------- FINISHED SCRIPT evaluation-metrics.sh $eval ---------" echo "--------- FINISHED SCRIPT evaluation-metrics.sh $eval ---------"
done done