mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-10-07 14:27:19 +00:00
add requirements files to ingest cache hash key (#3641)
This PR adds the requirement files for base and extras for the ingest cache's hash key. - The current workflow uses only the ingest requirements to generate hash key for the gitaction cache - Sometimes only base or extra requirements (like extra-pdf.txt) updated but not any ingest requirements -> this would mean the ingest test would fetch a cache with outdated non-ingest dependencies - When we generate new ingest cache we actually do check first base and extra requirements and generate a base env before layer on top the ingest dependencies. - This PR allows the ingest step to recognize changes to non-ingest dependency changes and trigger new cache generation when either ingest or base/extra requirement files changes. This PR also bumps the setup python action version in cache actions; it also adds installation of `virtualenv` for the ingest cache action to avoid errors like https://github.com/Unstructured-IO/unstructured/actions/runs/10905551870/job/30265057515?pr=3641#step:3:111
This commit is contained in:
parent
2d3cd45b23
commit
22998354db
2
.github/actions/base-cache/action.yml
vendored
2
.github/actions/base-cache/action.yml
vendored
@ -22,7 +22,7 @@ runs:
|
|||||||
lookup-only: ${{ inputs.check-only }}
|
lookup-only: ${{ inputs.check-only }}
|
||||||
- name: Set up Python ${{ inputs.python-version }}
|
- name: Set up Python ${{ inputs.python-version }}
|
||||||
if: steps.virtualenv-cache-restore.outputs.cache-hit != 'true'
|
if: steps.virtualenv-cache-restore.outputs.cache-hit != 'true'
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
- name: Setup virtual environment (no cache hit)
|
- name: Setup virtual environment (no cache hit)
|
||||||
|
21
.github/actions/base-ingest-cache/action.yml
vendored
21
.github/actions/base-ingest-cache/action.yml
vendored
@ -18,24 +18,27 @@ runs:
|
|||||||
path: |
|
path: |
|
||||||
.venv
|
.venv
|
||||||
nltk_data
|
nltk_data
|
||||||
key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}
|
key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }}
|
||||||
lookup-only: ${{ inputs.check-only }}
|
lookup-only: ${{ inputs.check-only }}
|
||||||
- name: Restore base virtual environment
|
|
||||||
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
|
||||||
uses: ./.github/actions/base-cache
|
|
||||||
with:
|
|
||||||
python-version: ${{ inputs.python-version }}
|
|
||||||
- name: Set up Python ${{ inputs.python-version }}
|
- name: Set up Python ${{ inputs.python-version }}
|
||||||
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
- name: Setup virtual environment (no cache hit)
|
- name: Setup virtual environment (no cache hit)
|
||||||
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
python${{ inputs.python-version }} -m venv .venv
|
python${{ inputs.python-version }} -m pip install --upgrade virtualenv
|
||||||
|
if [ ! -d ".venv" ]; then
|
||||||
|
python${{ inputs.python-version }} -m venv .venv
|
||||||
|
fi
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
if [ "${{ inputs.python-version == '3.12' }}" == "true" ]; then
|
||||||
|
python -m ensurepip --upgrade
|
||||||
|
python -m pip install --upgrade setuptools
|
||||||
|
fi
|
||||||
|
make install-ci
|
||||||
make install-all-ingest
|
make install-all-ingest
|
||||||
- name: Save Ingest Cache
|
- name: Save Ingest Cache
|
||||||
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
|
||||||
@ -45,5 +48,5 @@ runs:
|
|||||||
path: |
|
path: |
|
||||||
.venv
|
.venv
|
||||||
nltk_data
|
nltk_data
|
||||||
key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}
|
key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }}
|
||||||
|
|
||||||
|
26
.github/workflows/ci.yml
vendored
26
.github/workflows/ci.yml
vendored
@ -96,6 +96,7 @@ jobs:
|
|||||||
- name: Lint
|
- name: Lint
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
make install-ci
|
||||||
make check
|
make check
|
||||||
|
|
||||||
shellcheck:
|
shellcheck:
|
||||||
@ -174,9 +175,10 @@ jobs:
|
|||||||
- name: Test
|
- name: Test
|
||||||
env:
|
env:
|
||||||
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
||||||
|
PYTHON: python${{ matrix.python-version }}
|
||||||
|
NLTK_DATA: ${{ github.workspace }}/nltk_data
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
make install-nltk-models
|
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y poppler-utils
|
sudo apt-get install -y poppler-utils
|
||||||
make install-pandoc install-test
|
make install-pandoc install-test
|
||||||
@ -207,6 +209,7 @@ jobs:
|
|||||||
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
make install-ci
|
||||||
make install-nltk-models
|
make install-nltk-models
|
||||||
make test-no-extras CI=true
|
make test-no-extras CI=true
|
||||||
|
|
||||||
@ -267,6 +270,7 @@ jobs:
|
|||||||
- uses: ./.github/actions/base-ingest-cache
|
- uses: ./.github/actions/base-ingest-cache
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
check-only: 'true'
|
||||||
|
|
||||||
test_ingest_unit:
|
test_ingest_unit:
|
||||||
strategy:
|
strategy:
|
||||||
@ -289,10 +293,14 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Test Ingest (unit)
|
- name: Test Ingest (unit)
|
||||||
|
env:
|
||||||
|
NLTK_DATA: ${{ github.workspace }}/nltk_data
|
||||||
|
PYTHON: python${{ matrix.python-version }}
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
make install-nltk-models
|
make install-ci
|
||||||
PYTHONPATH=. pytest test_unstructured_ingest/unit
|
make install-all-ingest
|
||||||
|
PYTHONPATH=. ${PYTHON} -m pytest test_unstructured_ingest/unit
|
||||||
|
|
||||||
|
|
||||||
test_ingest_src:
|
test_ingest_src:
|
||||||
@ -365,9 +373,12 @@ jobs:
|
|||||||
MXBAI_API_KEY: ${{secrets.MXBAI_API_KEY}}
|
MXBAI_API_KEY: ${{secrets.MXBAI_API_KEY}}
|
||||||
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
|
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
|
||||||
CI: "true"
|
CI: "true"
|
||||||
|
NLTK_DATA: ${{ github.workspace }}/nltk_data
|
||||||
|
PYTHON: python${{ matrix.python-version }}
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
make install-nltk-models
|
make install-ci
|
||||||
|
make install-all-ingest
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
|
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
|
||||||
make install-pandoc
|
make install-pandoc
|
||||||
@ -435,9 +446,12 @@ jobs:
|
|||||||
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
|
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
|
||||||
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
|
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
|
||||||
CI: "true"
|
CI: "true"
|
||||||
|
NLTK_DATA: ${{ github.workspace }}/nltk_data
|
||||||
|
PYTHON: python${{ matrix.python-version }}
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
make install-nltk-models
|
make install-ci
|
||||||
|
make install-all-ingest
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
|
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
|
||||||
make install-pandoc
|
make install-pandoc
|
||||||
@ -469,6 +483,8 @@ jobs:
|
|||||||
- name: Validate --help
|
- name: Validate --help
|
||||||
run: |
|
run: |
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
make install-ci
|
||||||
|
make install-all-ingest
|
||||||
./test_unstructured_ingest/test-help.sh
|
./test_unstructured_ingest/test-help.sh
|
||||||
|
|
||||||
|
|
||||||
|
155
Makefile
155
Makefile
@ -2,6 +2,7 @@ PACKAGE_NAME := unstructured
|
|||||||
PIP_VERSION := 23.2.1
|
PIP_VERSION := 23.2.1
|
||||||
CURRENT_DIR := $(shell pwd)
|
CURRENT_DIR := $(shell pwd)
|
||||||
ARCH := $(shell uname -m)
|
ARCH := $(shell uname -m)
|
||||||
|
PYTHON ?= python3
|
||||||
|
|
||||||
.PHONY: help
|
.PHONY: help
|
||||||
help: Makefile
|
help: Makefile
|
||||||
@ -28,177 +29,177 @@ install-base-ci: install-base-pip-packages install-nltk-models install-test inst
|
|||||||
|
|
||||||
.PHONY: install-base-pip-packages
|
.PHONY: install-base-pip-packages
|
||||||
install-base-pip-packages:
|
install-base-pip-packages:
|
||||||
python3 -m pip install pip==${PIP_VERSION}
|
${PYTHON} -m pip install pip==${PIP_VERSION}
|
||||||
python3 -m pip install -r requirements/base.txt
|
${PYTHON} -m pip install -r requirements/base.txt
|
||||||
|
|
||||||
.PHONY: install-huggingface
|
.PHONY: install-huggingface
|
||||||
install-huggingface:
|
install-huggingface:
|
||||||
python3 -m pip install pip==${PIP_VERSION}
|
${PYTHON} -m pip install pip==${PIP_VERSION}
|
||||||
python3 -m pip install -r requirements/huggingface.txt
|
${PYTHON} -m pip install -r requirements/huggingface.txt
|
||||||
|
|
||||||
.PHONY: install-nltk-models
|
.PHONY: install-nltk-models
|
||||||
install-nltk-models:
|
install-nltk-models:
|
||||||
python3 -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
|
${PYTHON} -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
|
||||||
|
|
||||||
.PHONY: install-test
|
.PHONY: install-test
|
||||||
install-test:
|
install-test:
|
||||||
python3 -m pip install -r requirements/test.txt
|
${PYTHON} -m pip install -r requirements/test.txt
|
||||||
# NOTE(yao) - CI seem to always install tesseract to test so it would make sense to also require
|
# NOTE(yao) - CI seem to always install tesseract to test so it would make sense to also require
|
||||||
# pytesseract installation into the virtual env for testing
|
# pytesseract installation into the virtual env for testing
|
||||||
python3 -m pip install unstructured_pytesseract
|
${PYTHON} -m pip install unstructured_pytesseract
|
||||||
# python3 -m pip install argilla==1.28.0 -c requirements/deps/constraints.txt
|
# ${PYTHON} -m pip install argilla==1.28.0 -c requirements/deps/constraints.txt
|
||||||
# NOTE(robinson) - Installing weaviate-client separately here because the requests
|
# NOTE(robinson) - Installing weaviate-client separately here because the requests
|
||||||
# version conflicts with label_studio_sdk
|
# version conflicts with label_studio_sdk
|
||||||
python3 -m pip install weaviate-client -c requirements/deps/constraints.txt
|
${PYTHON} -m pip install weaviate-client -c requirements/deps/constraints.txt
|
||||||
|
|
||||||
.PHONY: install-dev
|
.PHONY: install-dev
|
||||||
install-dev:
|
install-dev:
|
||||||
python3 -m pip install -r requirements/dev.txt
|
${PYTHON} -m pip install -r requirements/dev.txt
|
||||||
|
|
||||||
.PHONY: install-build
|
.PHONY: install-build
|
||||||
install-build:
|
install-build:
|
||||||
python3 -m pip install -r requirements/build.txt
|
${PYTHON} -m pip install -r requirements/build.txt
|
||||||
|
|
||||||
.PHONY: install-csv
|
.PHONY: install-csv
|
||||||
install-csv:
|
install-csv:
|
||||||
python3 -m pip install -r requirements/extra-csv.txt
|
${PYTHON} -m pip install -r requirements/extra-csv.txt
|
||||||
|
|
||||||
.PHONY: install-docx
|
.PHONY: install-docx
|
||||||
install-docx:
|
install-docx:
|
||||||
python3 -m pip install -r requirements/extra-docx.txt
|
${PYTHON} -m pip install -r requirements/extra-docx.txt
|
||||||
|
|
||||||
.PHONY: install-epub
|
.PHONY: install-epub
|
||||||
install-epub:
|
install-epub:
|
||||||
python3 -m pip install -r requirements/extra-epub.txt
|
${PYTHON} -m pip install -r requirements/extra-epub.txt
|
||||||
|
|
||||||
.PHONY: install-odt
|
.PHONY: install-odt
|
||||||
install-odt:
|
install-odt:
|
||||||
python3 -m pip install -r requirements/extra-odt.txt
|
${PYTHON} -m pip install -r requirements/extra-odt.txt
|
||||||
|
|
||||||
.PHONY: install-pypandoc
|
.PHONY: install-pypandoc
|
||||||
install-pypandoc:
|
install-pypandoc:
|
||||||
python3 -m pip install -r requirements/extra-pandoc.txt
|
${PYTHON} -m pip install -r requirements/extra-pandoc.txt
|
||||||
|
|
||||||
.PHONY: install-markdown
|
.PHONY: install-markdown
|
||||||
install-markdown:
|
install-markdown:
|
||||||
python3 -m pip install -r requirements/extra-markdown.txt
|
${PYTHON} -m pip install -r requirements/extra-markdown.txt
|
||||||
|
|
||||||
.PHONY: install-pdf-image
|
.PHONY: install-pdf-image
|
||||||
install-pdf-image:
|
install-pdf-image:
|
||||||
python3 -m pip install -r requirements/extra-pdf-image.txt
|
${PYTHON} -m pip install -r requirements/extra-pdf-image.txt
|
||||||
|
|
||||||
.PHONY: install-pptx
|
.PHONY: install-pptx
|
||||||
install-pptx:
|
install-pptx:
|
||||||
python3 -m pip install -r requirements/extra-pptx.txt
|
${PYTHON} -m pip install -r requirements/extra-pptx.txt
|
||||||
|
|
||||||
.PHONY: install-xlsx
|
.PHONY: install-xlsx
|
||||||
install-xlsx:
|
install-xlsx:
|
||||||
python3 -m pip install -r requirements/extra-xlsx.txt
|
${PYTHON} -m pip install -r requirements/extra-xlsx.txt
|
||||||
|
|
||||||
.PHONY: install-all-docs
|
.PHONY: install-all-docs
|
||||||
install-all-docs: install-base install-csv install-docx install-epub install-odt install-pypandoc install-markdown install-pdf-image install-pptx install-xlsx
|
install-all-docs: install-base install-csv install-docx install-epub install-odt install-pypandoc install-markdown install-pdf-image install-pptx install-xlsx
|
||||||
|
|
||||||
.PHONY: install-all-ingest
|
.PHONY: install-all-ingest
|
||||||
install-all-ingest:
|
install-all-ingest:
|
||||||
find requirements/ingest -type f -name "*.txt" -exec python3 -m pip install -r '{}' ';'
|
find requirements/ingest -type f -name "*.txt" -exec ${PYTHON} -m pip install -r '{}' ';'
|
||||||
|
|
||||||
|
|
||||||
.PHONY: install-ingest-google-drive
|
.PHONY: install-ingest-google-drive
|
||||||
install-ingest-google-drive:
|
install-ingest-google-drive:
|
||||||
python3 -m pip install -r requirements/ingest/google-drive.txt
|
${PYTHON} -m pip install -r requirements/ingest/google-drive.txt
|
||||||
|
|
||||||
## install-ingest-s3: install requirements for the s3 connector
|
## install-ingest-s3: install requirements for the s3 connector
|
||||||
.PHONY: install-ingest-s3
|
.PHONY: install-ingest-s3
|
||||||
install-ingest-s3:
|
install-ingest-s3:
|
||||||
python3 -m pip install -r requirements/ingest/s3.txt
|
${PYTHON} -m pip install -r requirements/ingest/s3.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-gcs
|
.PHONY: install-ingest-gcs
|
||||||
install-ingest-gcs:
|
install-ingest-gcs:
|
||||||
python3 -m pip install -r requirements/ingest/gcs.txt
|
${PYTHON} -m pip install -r requirements/ingest/gcs.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-dropbox
|
.PHONY: install-ingest-dropbox
|
||||||
install-ingest-dropbox:
|
install-ingest-dropbox:
|
||||||
python3 -m pip install -r requirements/ingest/dropbox.txt
|
${PYTHON} -m pip install -r requirements/ingest/dropbox.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-azure
|
.PHONY: install-ingest-azure
|
||||||
install-ingest-azure:
|
install-ingest-azure:
|
||||||
python3 -m pip install -r requirements/ingest/azure.txt
|
${PYTHON} -m pip install -r requirements/ingest/azure.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-box
|
.PHONY: install-ingest-box
|
||||||
install-ingest-box:
|
install-ingest-box:
|
||||||
python3 -m pip install -r requirements/ingest/box.txt
|
${PYTHON} -m pip install -r requirements/ingest/box.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-delta-table
|
.PHONY: install-ingest-delta-table
|
||||||
install-ingest-delta-table:
|
install-ingest-delta-table:
|
||||||
python3 -m pip install -r requirements/ingest/delta-table.txt
|
${PYTHON} -m pip install -r requirements/ingest/delta-table.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-discord
|
.PHONY: install-ingest-discord
|
||||||
install-ingest-discord:
|
install-ingest-discord:
|
||||||
pip install -r requirements/ingest/discord.txt
|
${PYTHON} -m pip install -r requirements/ingest/discord.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-github
|
.PHONY: install-ingest-github
|
||||||
install-ingest-github:
|
install-ingest-github:
|
||||||
python3 -m pip install -r requirements/ingest/github.txt
|
${PYTHON} -m pip install -r requirements/ingest/github.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-biomed
|
.PHONY: install-ingest-biomed
|
||||||
install-ingest-biomed:
|
install-ingest-biomed:
|
||||||
python3 -m pip install -r requirements/ingest/biomed.txt
|
${PYTHON} -m pip install -r requirements/ingest/biomed.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-gitlab
|
.PHONY: install-ingest-gitlab
|
||||||
install-ingest-gitlab:
|
install-ingest-gitlab:
|
||||||
python3 -m pip install -r requirements/ingest/gitlab.txt
|
${PYTHON} -m pip install -r requirements/ingest/gitlab.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-onedrive
|
.PHONY: install-ingest-onedrive
|
||||||
install-ingest-onedrive:
|
install-ingest-onedrive:
|
||||||
python3 -m pip install -r requirements/ingest/onedrive.txt
|
${PYTHON} -m pip install -r requirements/ingest/onedrive.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-outlook
|
.PHONY: install-ingest-outlook
|
||||||
install-ingest-outlook:
|
install-ingest-outlook:
|
||||||
python3 -m pip install -r requirements/ingest/outlook.txt
|
${PYTHON} -m pip install -r requirements/ingest/outlook.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-reddit
|
.PHONY: install-ingest-reddit
|
||||||
install-ingest-reddit:
|
install-ingest-reddit:
|
||||||
python3 -m pip install -r requirements/ingest/reddit.txt
|
${PYTHON} -m pip install -r requirements/ingest/reddit.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-slack
|
.PHONY: install-ingest-slack
|
||||||
install-ingest-slack:
|
install-ingest-slack:
|
||||||
pip install -r requirements/ingest/slack.txt
|
${PYTHON} -m pip install -r requirements/ingest/slack.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-kafka
|
.PHONY: install-ingest-kafka
|
||||||
install-ingest-kafka:
|
install-ingest-kafka:
|
||||||
python3 -m pip install -r requirements/ingest/kafka.txt
|
${PYTHON} -m pip install -r requirements/ingest/kafka.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-wikipedia
|
.PHONY: install-ingest-wikipedia
|
||||||
install-ingest-wikipedia:
|
install-ingest-wikipedia:
|
||||||
python3 -m pip install -r requirements/ingest/wikipedia.txt
|
${PYTHON} -m pip install -r requirements/ingest/wikipedia.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-elasticsearch
|
.PHONY: install-ingest-elasticsearch
|
||||||
install-ingest-elasticsearch:
|
install-ingest-elasticsearch:
|
||||||
python3 -m pip install -r requirements/ingest/elasticsearch.txt
|
${PYTHON} -m pip install -r requirements/ingest/elasticsearch.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-opensearch
|
.PHONY: install-ingest-opensearch
|
||||||
install-ingest-opensearch:
|
install-ingest-opensearch:
|
||||||
python3 -m pip install -r requirements/ingest/opensearch.txt
|
${PYTHON} -m pip install -r requirements/ingest/opensearch.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-confluence
|
.PHONY: install-ingest-confluence
|
||||||
install-ingest-confluence:
|
install-ingest-confluence:
|
||||||
python3 -m pip install -r requirements/ingest/confluence.txt
|
${PYTHON} -m pip install -r requirements/ingest/confluence.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-airtable
|
.PHONY: install-ingest-airtable
|
||||||
install-ingest-airtable:
|
install-ingest-airtable:
|
||||||
python3 -m pip install -r requirements/ingest/airtable.txt
|
${PYTHON} -m pip install -r requirements/ingest/airtable.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-sharepoint
|
.PHONY: install-ingest-sharepoint
|
||||||
install-ingest-sharepoint:
|
install-ingest-sharepoint:
|
||||||
python3 -m pip install -r requirements/ingest/sharepoint.txt
|
${PYTHON} -m pip install -r requirements/ingest/sharepoint.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-singlestore
|
.PHONY: install-ingest-singlestore
|
||||||
install-ingest-singlestore:
|
install-ingest-singlestore:
|
||||||
python3 -m pip install -r requirements/ingest/singlestore.txt
|
${PYTHON} -m pip install -r requirements/ingest/singlestore.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-weaviate
|
.PHONY: install-ingest-weaviate
|
||||||
install-ingest-weaviate:
|
install-ingest-weaviate:
|
||||||
python3 -m pip install -r requirements/ingest/weaviate.txt
|
${PYTHON} -m pip install -r requirements/ingest/weaviate.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-local
|
.PHONY: install-ingest-local
|
||||||
install-ingest-local:
|
install-ingest-local:
|
||||||
@ -206,63 +207,63 @@ install-ingest-local:
|
|||||||
|
|
||||||
.PHONY: install-ingest-notion
|
.PHONY: install-ingest-notion
|
||||||
install-ingest-notion:
|
install-ingest-notion:
|
||||||
python3 -m pip install -r requirements/ingest/notion.txt
|
${PYTHON} -m pip install -r requirements/ingest/notion.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-salesforce
|
.PHONY: install-ingest-salesforce
|
||||||
install-ingest-salesforce:
|
install-ingest-salesforce:
|
||||||
python3 -m pip install -r requirements/ingest/salesforce.txt
|
${PYTHON} -m pip install -r requirements/ingest/salesforce.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-jira
|
.PHONY: install-ingest-jira
|
||||||
install-ingest-jira:
|
install-ingest-jira:
|
||||||
python3 -m pip install -r requirements/ingest/jira.txt
|
${PYTHON} -m pip install -r requirements/ingest/jira.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-hubspot
|
.PHONY: install-ingest-hubspot
|
||||||
install-ingest-hubspot:
|
install-ingest-hubspot:
|
||||||
python3 -m pip install -r requirements/ingest/hubspot.txt
|
${PYTHON} -m pip install -r requirements/ingest/hubspot.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-sftp
|
.PHONY: install-ingest-sftp
|
||||||
install-ingest-sftp:
|
install-ingest-sftp:
|
||||||
python3 -m pip install -r requirements/ingest/sftp.txt
|
${PYTHON} -m pip install -r requirements/ingest/sftp.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-pinecone
|
.PHONY: install-ingest-pinecone
|
||||||
install-ingest-pinecone:
|
install-ingest-pinecone:
|
||||||
python3 -m pip install -r requirements/ingest/pinecone.txt
|
${PYTHON} -m pip install -r requirements/ingest/pinecone.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-qdrant
|
.PHONY: install-ingest-qdrant
|
||||||
install-ingest-qdrant:
|
install-ingest-qdrant:
|
||||||
python3 -m pip install -r requirements/ingest/qdrant.txt
|
${PYTHON} -m pip install -r requirements/ingest/qdrant.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-chroma
|
.PHONY: install-ingest-chroma
|
||||||
install-ingest-chroma:
|
install-ingest-chroma:
|
||||||
python3 -m pip install -r requirements/ingest/chroma.txt
|
${PYTHON} -m pip install -r requirements/ingest/chroma.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-postgres
|
.PHONY: install-ingest-postgres
|
||||||
install-ingest-postgres:
|
install-ingest-postgres:
|
||||||
python3 -m pip install -r requirements/ingest/postgres.txt
|
${PYTHON} -m pip install -r requirements/ingest/postgres.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-mongodb
|
.PHONY: install-ingest-mongodb
|
||||||
install-ingest-mongodb:
|
install-ingest-mongodb:
|
||||||
python3 -m pip install -r requirements/ingest/mongodb.txt
|
${PYTHON} -m pip install -r requirements/ingest/mongodb.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-databricks-volumes
|
.PHONY: install-ingest-databricks-volumes
|
||||||
install-ingest-databricks-volumes:
|
install-ingest-databricks-volumes:
|
||||||
python3 -m pip install -r requirements/ingest/databricks-volumes.txt
|
${PYTHON} -m pip install -r requirements/ingest/databricks-volumes.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-astradb
|
.PHONY: install-ingest-astradb
|
||||||
install-ingest-astradb:
|
install-ingest-astradb:
|
||||||
python3 -m pip install -r requirements/ingest/astradb.txt
|
${PYTHON} -m pip install -r requirements/ingest/astradb.txt
|
||||||
|
|
||||||
.PHONY: install-ingest-clarifai
|
.PHONY: install-ingest-clarifai
|
||||||
install-ingest-clarifai:
|
install-ingest-clarifai:
|
||||||
python3 -m pip install -r requirements/ingest/clarifai.txt
|
${PYTHON} -m pip install -r requirements/ingest/clarifai.txt
|
||||||
|
|
||||||
.PHONY: install-embed-huggingface
|
.PHONY: install-embed-huggingface
|
||||||
install-embed-huggingface:
|
install-embed-huggingface:
|
||||||
python3 -m pip install -r requirements/ingest/embed-huggingface.txt
|
${PYTHON} -m pip install -r requirements/ingest/embed-huggingface.txt
|
||||||
|
|
||||||
.PHONY: install-unstructured-inference
|
.PHONY: install-unstructured-inference
|
||||||
install-unstructured-inference:
|
install-unstructured-inference:
|
||||||
python3 -m pip install -r requirements/ingest/local-inference.txt
|
${PYTHON} -m pip install -r requirements/ingest/local-inference.txt
|
||||||
|
|
||||||
## install-local-inference: installs requirements for local inference
|
## install-local-inference: installs requirements for local inference
|
||||||
.PHONY: install-local-inference
|
.PHONY: install-local-inference
|
||||||
@ -281,12 +282,12 @@ pip-compile:
|
|||||||
.PHONY: install-project-local
|
.PHONY: install-project-local
|
||||||
install-project-local: install
|
install-project-local: install
|
||||||
# MAYBE TODO: fail if already exists?
|
# MAYBE TODO: fail if already exists?
|
||||||
pip install -e .
|
${PYTHON} -m pip install -e .
|
||||||
|
|
||||||
## uninstall-project-local: uninstall unstructured from your local python environment
|
## uninstall-project-local: uninstall unstructured from your local python environment
|
||||||
.PHONY: uninstall-project-local
|
.PHONY: uninstall-project-local
|
||||||
uninstall-project-local:
|
uninstall-project-local:
|
||||||
pip uninstall ${PACKAGE_NAME}
|
${PYTHON} -m pip uninstall ${PACKAGE_NAME}
|
||||||
|
|
||||||
#################
|
#################
|
||||||
# Test and Lint #
|
# Test and Lint #
|
||||||
@ -299,12 +300,12 @@ export UNSTRUCTURED_INCLUDE_DEBUG_METADATA ?= false
|
|||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test:
|
||||||
PYTHONPATH=. CI=$(CI) \
|
PYTHONPATH=. CI=$(CI) \
|
||||||
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) pytest test_${PACKAGE_NAME} -m "not chipper" --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40
|
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) ${PYTHON} -m pytest test_${PACKAGE_NAME} -m "not chipper" --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40
|
||||||
|
|
||||||
.PHONY: test-chipper
|
.PHONY: test-chipper
|
||||||
test-chipper:
|
test-chipper:
|
||||||
PYTHONPATH=. CI=$(CI) \
|
PYTHONPATH=. CI=$(CI) \
|
||||||
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) pytest test_${PACKAGE_NAME} -m "chipper" --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40
|
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) ${PYTHON} -m pytest test_${PACKAGE_NAME} -m "chipper" --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40
|
||||||
|
|
||||||
.PHONY: test-unstructured-api-unit
|
.PHONY: test-unstructured-api-unit
|
||||||
test-unstructured-api-unit:
|
test-unstructured-api-unit:
|
||||||
@ -313,7 +314,7 @@ test-unstructured-api-unit:
|
|||||||
.PHONY: test-no-extras
|
.PHONY: test-no-extras
|
||||||
test-no-extras:
|
test-no-extras:
|
||||||
PYTHONPATH=. CI=$(CI) \
|
PYTHONPATH=. CI=$(CI) \
|
||||||
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) pytest \
|
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) ${PYTHON} -m pytest \
|
||||||
test_${PACKAGE_NAME}/partition/test_text.py \
|
test_${PACKAGE_NAME}/partition/test_text.py \
|
||||||
test_${PACKAGE_NAME}/partition/test_email.py \
|
test_${PACKAGE_NAME}/partition/test_email.py \
|
||||||
test_${PACKAGE_NAME}/partition/html/test_partition.py \
|
test_${PACKAGE_NAME}/partition/html/test_partition.py \
|
||||||
@ -321,48 +322,48 @@ test-no-extras:
|
|||||||
|
|
||||||
.PHONY: test-extra-csv
|
.PHONY: test-extra-csv
|
||||||
test-extra-csv:
|
test-extra-csv:
|
||||||
PYTHONPATH=. CI=$(CI) pytest \
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest \
|
||||||
test_unstructured/partition/test_csv.py \
|
test_unstructured/partition/test_csv.py \
|
||||||
test_unstructured/partition/test_tsv.py
|
test_unstructured/partition/test_tsv.py
|
||||||
|
|
||||||
.PHONY: test-extra-docx
|
.PHONY: test-extra-docx
|
||||||
test-extra-docx:
|
test-extra-docx:
|
||||||
PYTHONPATH=. CI=$(CI) pytest \
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest \
|
||||||
test_unstructured/partition/test_doc.py \
|
test_unstructured/partition/test_doc.py \
|
||||||
test_unstructured/partition/test_docx.py
|
test_unstructured/partition/test_docx.py
|
||||||
|
|
||||||
.PHONY: test-extra-epub
|
.PHONY: test-extra-epub
|
||||||
test-extra-epub:
|
test-extra-epub:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_unstructured/partition/test_epub.py
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest test_unstructured/partition/test_epub.py
|
||||||
|
|
||||||
.PHONY: test-extra-markdown
|
.PHONY: test-extra-markdown
|
||||||
test-extra-markdown:
|
test-extra-markdown:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_unstructured/partition/test_md.py
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest test_unstructured/partition/test_md.py
|
||||||
|
|
||||||
.PHONY: test-extra-odt
|
.PHONY: test-extra-odt
|
||||||
test-extra-odt:
|
test-extra-odt:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_unstructured/partition/test_odt.py
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest test_unstructured/partition/test_odt.py
|
||||||
|
|
||||||
.PHONY: test-extra-pdf-image
|
.PHONY: test-extra-pdf-image
|
||||||
test-extra-pdf-image:
|
test-extra-pdf-image:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_unstructured/partition/pdf_image
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest test_unstructured/partition/pdf_image
|
||||||
|
|
||||||
.PHONY: test-extra-pptx
|
.PHONY: test-extra-pptx
|
||||||
test-extra-pptx:
|
test-extra-pptx:
|
||||||
PYTHONPATH=. CI=$(CI) pytest \
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest \
|
||||||
test_unstructured/partition/test_ppt.py \
|
test_unstructured/partition/test_ppt.py \
|
||||||
test_unstructured/partition/test_pptx.py
|
test_unstructured/partition/test_pptx.py
|
||||||
|
|
||||||
.PHONY: test-extra-pypandoc
|
.PHONY: test-extra-pypandoc
|
||||||
test-extra-pypandoc:
|
test-extra-pypandoc:
|
||||||
PYTHONPATH=. CI=$(CI) pytest \
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest \
|
||||||
test_unstructured/partition/test_org.py \
|
test_unstructured/partition/test_org.py \
|
||||||
test_unstructured/partition/test_rst.py \
|
test_unstructured/partition/test_rst.py \
|
||||||
test_unstructured/partition/test_rtf.py
|
test_unstructured/partition/test_rtf.py
|
||||||
|
|
||||||
.PHONY: test-extra-xlsx
|
.PHONY: test-extra-xlsx
|
||||||
test-extra-xlsx:
|
test-extra-xlsx:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_unstructured/partition/test_xlsx.py
|
PYTHONPATH=. CI=$(CI) ${PYTHON} -m pytest test_unstructured/partition/test_xlsx.py
|
||||||
|
|
||||||
## check: runs linters (includes tests)
|
## check: runs linters (includes tests)
|
||||||
.PHONY: check
|
.PHONY: check
|
||||||
@ -374,11 +375,11 @@ check-shfmt:
|
|||||||
|
|
||||||
.PHONY: check-black
|
.PHONY: check-black
|
||||||
check-black:
|
check-black:
|
||||||
black . --check --line-length=100
|
${PYTHON} -m black . --check --line-length=100
|
||||||
|
|
||||||
.PHONY: check-flake8
|
.PHONY: check-flake8
|
||||||
check-flake8:
|
check-flake8:
|
||||||
flake8 .
|
${PYTHON} -m flake8 .
|
||||||
|
|
||||||
.PHONY: check-licenses
|
.PHONY: check-licenses
|
||||||
check-licenses:
|
check-licenses:
|
||||||
@ -434,7 +435,7 @@ version-sync:
|
|||||||
|
|
||||||
.PHONY: check-coverage
|
.PHONY: check-coverage
|
||||||
check-coverage:
|
check-coverage:
|
||||||
coverage report --fail-under=90
|
${PYTHON} -m coverage report --fail-under=90
|
||||||
|
|
||||||
## check-deps: check consistency of dependencies
|
## check-deps: check consistency of dependencies
|
||||||
.PHONY: check-deps
|
.PHONY: check-deps
|
||||||
|
@ -4,6 +4,7 @@ set -u -o pipefail
|
|||||||
|
|
||||||
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
SKIPPED_FILES_LOG=$SCRIPT_DIR/skipped-files.txt
|
||||||
|
PYTHON=${PYTHON:-python}
|
||||||
# If the file already exists, reset it
|
# If the file already exists, reset it
|
||||||
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
if [ -f "$SKIPPED_FILES_LOG" ]; then
|
||||||
rm "$SKIPPED_FILES_LOG"
|
rm "$SKIPPED_FILES_LOG"
|
||||||
@ -92,7 +93,7 @@ function print_last_run() {
|
|||||||
|
|
||||||
trap print_last_run EXIT
|
trap print_last_run EXIT
|
||||||
|
|
||||||
python_version=$(python --version 2>&1)
|
python_version=$(${PYTHON} --version 2>&1)
|
||||||
|
|
||||||
tests_to_ignore=(
|
tests_to_ignore=(
|
||||||
'notion.sh'
|
'notion.sh'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user