Chore: add uns api repo unittests (#954)

* stage

* git clone

* ci ignore markdown file

* make install

* use env instead

* remove md

* add script

* wrong env value

* add note

* maybe don't rm

* no cd../

---------

Co-authored-by: cragwolfe <crag@unstructured.io>
This commit is contained in:
Yuming Long 2023-07-26 16:55:35 -04:00 committed by GitHub
parent d9aed66b65
commit df1ba39905
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 2 deletions

View File

@ -220,6 +220,53 @@ jobs:
make install-ingest-wikipedia make install-ingest-wikipedia
./test_unstructured_ingest/test-ingest.sh ./test_unstructured_ingest/test-ingest.sh
test_unstructured_api_unit:
strategy:
matrix:
# NOTE(yuming): Unstructured API only use Python 3.8
python-version: ["3.8"]
runs-on: ubuntu-latest
env:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup, lint]
steps:
- uses: actions/checkout@v3
- uses: actions/cache/restore@v3
id: virtualenv-cache
with:
path: |
.venv
nltk_data
key: unstructured-${{ runner.os }}-${{ matrix.python-version }}-${{ env.GHA_CACHE_KEY_VERSION }}-${{ hashFiles('requirements/*.txt') }}
- name: Set up flag for running Unstructured API unit tests
run: |
# NOTE: Change env `SKIP_API_UNIT_FOR_BREAKING_CHANGE` to true if there is a breaking change in Unstructured repo that will break unstructured api unit tests
# TODO: Change env back to false once API unit tests is in sync with unstructured repo
echo "SKIP_API_UNIT_FOR_BREAKING_CHANGE=true" >> $GITHUB_ENV
- name: Set up Python ${{ matrix.python-version }}
if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment (no cache hit)
if: steps.virtualenv-cache.outputs.cache-hit != 'true' && env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
run: |
python${{ matrix.python-version}} -m venv .venv
source .venv/bin/activate
mkdir "$NLTK_DATA"
make install-ci
- name: Test Unstructured API Unit
if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
run: |
source .venv/bin/activate
sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
make install-pandoc
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
tesseract --version
make install-nltk-models
make test-unstructured-api-unit
changelog: changelog:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

View File

@ -1,7 +1,8 @@
## 0.8.5-dev0 ## 0.8.5-dev1
### Enhancements ### Enhancements
* Adds optional Unstructured API unit tests in CI
* Tracks last modified date for all document types. * Tracks last modified date for all document types.
### Features ### Features

View File

@ -177,6 +177,10 @@ export CI ?= false
test: test:
PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
.PHONY: test-unstructured-api-unit
test-unstructured-api-unit:
scripts/test-unstructured-api-unit.sh
## check: runs linters (includes tests) ## check: runs linters (includes tests)
.PHONY: check .PHONY: check
check: check-src check-tests check-version check: check-src check-tests check-version

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -euo pipefail
cleanup() {
rm -rf unstructured-api
}
handle_error() {
cleanup
exit 1
}
# Remove the unstructured-api directory if it exists
if [ -d "unstructured-api" ]; then
rm -rf unstructured-api
fi
# Clone the repository
git clone https://github.com/Unstructured-IO/unstructured-api.git --depth 1
# Install dependencies and project locally
cd unstructured-api && make install && cd ../
make install-project-local
pip show unstructured | grep Version
# Run tests and capture exit status
(cd unstructured-api && make test)
test_exit_status=$?
# Check the exit status and handle errors
if [ $test_exit_status -ne 0 ]; then
echo "Test failed, see the error message above."
handle_error
fi
cleanup
echo "Test and cleanup completed successfully."

View File

@ -1 +1 @@
__version__ = "0.8.5-dev0" # pragma: no cover __version__ = "0.8.5-dev1" # pragma: no cover