From df1ba399056f6a598f111a8206f15a44c7196bf9 Mon Sep 17 00:00:00 2001 From: Yuming Long <63475068+yuming-long@users.noreply.github.com> Date: Wed, 26 Jul 2023 16:55:35 -0400 Subject: [PATCH] Chore: add uns api repo unittests (#954) * stage * git clone * ci ignore markdown file * make install * use env instead * remove md * add script * wrong env value * add note * maybe don't rm * no cd../ --------- Co-authored-by: cragwolfe --- .github/workflows/ci.yml | 47 +++++++++++++++++++++++++++ CHANGELOG.md | 3 +- Makefile | 4 +++ scripts/test-unstructured-api-unit.sh | 39 ++++++++++++++++++++++ unstructured/__version__.py | 2 +- 5 files changed, 93 insertions(+), 2 deletions(-) create mode 100755 scripts/test-unstructured-api-unit.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cda04b60..22e410e12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -220,6 +220,53 @@ jobs: make install-ingest-wikipedia ./test_unstructured_ingest/test-ingest.sh + test_unstructured_api_unit: + strategy: + matrix: + # NOTE(yuming): Unstructured API only use Python 3.8 + python-version: ["3.8"] + runs-on: ubuntu-latest + env: + NLTK_DATA: ${{ github.workspace }}/nltk_data + needs: [setup, lint] + steps: + - uses: actions/checkout@v3 + - uses: actions/cache/restore@v3 + id: virtualenv-cache + with: + path: | + .venv + nltk_data + key: unstructured-${{ runner.os }}-${{ matrix.python-version }}-${{ env.GHA_CACHE_KEY_VERSION }}-${{ hashFiles('requirements/*.txt') }} + - name: Set up flag for running Unstructured API unit tests + run: | + # NOTE: Change env `SKIP_API_UNIT_FOR_BREAKING_CHANGE` to true if there is a breaking change in Unstructured repo that will break unstructured api unit tests + # TODO: Change env back to false once API unit tests is in sync with unstructured repo + echo "SKIP_API_UNIT_FOR_BREAKING_CHANGE=true" >> $GITHUB_ENV + - name: Set up Python ${{ matrix.python-version }} + if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false' + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Setup virtual environment (no cache hit) + if: steps.virtualenv-cache.outputs.cache-hit != 'true' && env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false' + run: | + python${{ matrix.python-version}} -m venv .venv + source .venv/bin/activate + mkdir "$NLTK_DATA" + make install-ci + - name: Test Unstructured API Unit + if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false' + run: | + source .venv/bin/activate + sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice + make install-pandoc + sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5 + sudo apt-get install -y tesseract-ocr tesseract-ocr-kor + tesseract --version + make install-nltk-models + make test-unstructured-api-unit + changelog: runs-on: ubuntu-latest steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 60921905d..8b90464c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ -## 0.8.5-dev0 +## 0.8.5-dev1 ### Enhancements +* Adds optional Unstructured API unit tests in CI * Tracks last modified date for all document types. ### Features diff --git a/Makefile b/Makefile index a7cfb209f..097b9ccbc 100644 --- a/Makefile +++ b/Makefile @@ -177,6 +177,10 @@ export CI ?= false test: PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing +.PHONY: test-unstructured-api-unit +test-unstructured-api-unit: + scripts/test-unstructured-api-unit.sh + ## check: runs linters (includes tests) .PHONY: check check: check-src check-tests check-version diff --git a/scripts/test-unstructured-api-unit.sh b/scripts/test-unstructured-api-unit.sh new file mode 100755 index 000000000..bf0718471 --- /dev/null +++ b/scripts/test-unstructured-api-unit.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +set -euo pipefail + +cleanup() { + rm -rf unstructured-api +} + +handle_error() { + cleanup + exit 1 +} + +# Remove the unstructured-api directory if it exists +if [ -d "unstructured-api" ]; then + rm -rf unstructured-api +fi + +# Clone the repository +git clone https://github.com/Unstructured-IO/unstructured-api.git --depth 1 + +# Install dependencies and project locally +cd unstructured-api && make install && cd ../ +make install-project-local +pip show unstructured | grep Version + +# Run tests and capture exit status +(cd unstructured-api && make test) +test_exit_status=$? + +# Check the exit status and handle errors +if [ $test_exit_status -ne 0 ]; then + echo "Test failed, see the error message above." + handle_error +fi + +cleanup + +echo "Test and cleanup completed successfully." diff --git a/unstructured/__version__.py b/unstructured/__version__.py index ac2543761..1b15eacd6 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.8.5-dev0" # pragma: no cover +__version__ = "0.8.5-dev1" # pragma: no cover