mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-05 20:37:36 +00:00
Chore: add uns api repo unittests (#954)
* stage * git clone * ci ignore markdown file * make install * use env instead * remove md * add script * wrong env value * add note * maybe don't rm * no cd../ --------- Co-authored-by: cragwolfe <crag@unstructured.io>
This commit is contained in:
parent
d9aed66b65
commit
df1ba39905
47
.github/workflows/ci.yml
vendored
47
.github/workflows/ci.yml
vendored
@ -220,6 +220,53 @@ jobs:
|
|||||||
make install-ingest-wikipedia
|
make install-ingest-wikipedia
|
||||||
./test_unstructured_ingest/test-ingest.sh
|
./test_unstructured_ingest/test-ingest.sh
|
||||||
|
|
||||||
|
test_unstructured_api_unit:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
# NOTE(yuming): Unstructured API only use Python 3.8
|
||||||
|
python-version: ["3.8"]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
NLTK_DATA: ${{ github.workspace }}/nltk_data
|
||||||
|
needs: [setup, lint]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: actions/cache/restore@v3
|
||||||
|
id: virtualenv-cache
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
.venv
|
||||||
|
nltk_data
|
||||||
|
key: unstructured-${{ runner.os }}-${{ matrix.python-version }}-${{ env.GHA_CACHE_KEY_VERSION }}-${{ hashFiles('requirements/*.txt') }}
|
||||||
|
- name: Set up flag for running Unstructured API unit tests
|
||||||
|
run: |
|
||||||
|
# NOTE: Change env `SKIP_API_UNIT_FOR_BREAKING_CHANGE` to true if there is a breaking change in Unstructured repo that will break unstructured api unit tests
|
||||||
|
# TODO: Change env back to false once API unit tests is in sync with unstructured repo
|
||||||
|
echo "SKIP_API_UNIT_FOR_BREAKING_CHANGE=true" >> $GITHUB_ENV
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Setup virtual environment (no cache hit)
|
||||||
|
if: steps.virtualenv-cache.outputs.cache-hit != 'true' && env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
|
||||||
|
run: |
|
||||||
|
python${{ matrix.python-version}} -m venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
mkdir "$NLTK_DATA"
|
||||||
|
make install-ci
|
||||||
|
- name: Test Unstructured API Unit
|
||||||
|
if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
|
||||||
|
run: |
|
||||||
|
source .venv/bin/activate
|
||||||
|
sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
|
||||||
|
make install-pandoc
|
||||||
|
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
|
||||||
|
sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
|
||||||
|
tesseract --version
|
||||||
|
make install-nltk-models
|
||||||
|
make test-unstructured-api-unit
|
||||||
|
|
||||||
changelog:
|
changelog:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
## 0.8.5-dev0
|
## 0.8.5-dev1
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
|
* Adds optional Unstructured API unit tests in CI
|
||||||
* Tracks last modified date for all document types.
|
* Tracks last modified date for all document types.
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
|||||||
4
Makefile
4
Makefile
@ -177,6 +177,10 @@ export CI ?= false
|
|||||||
test:
|
test:
|
||||||
PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
|
PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
|
||||||
|
|
||||||
|
.PHONY: test-unstructured-api-unit
|
||||||
|
test-unstructured-api-unit:
|
||||||
|
scripts/test-unstructured-api-unit.sh
|
||||||
|
|
||||||
## check: runs linters (includes tests)
|
## check: runs linters (includes tests)
|
||||||
.PHONY: check
|
.PHONY: check
|
||||||
check: check-src check-tests check-version
|
check: check-src check-tests check-version
|
||||||
|
|||||||
39
scripts/test-unstructured-api-unit.sh
Executable file
39
scripts/test-unstructured-api-unit.sh
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
rm -rf unstructured-api
|
||||||
|
}
|
||||||
|
|
||||||
|
handle_error() {
|
||||||
|
cleanup
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove the unstructured-api directory if it exists
|
||||||
|
if [ -d "unstructured-api" ]; then
|
||||||
|
rm -rf unstructured-api
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://github.com/Unstructured-IO/unstructured-api.git --depth 1
|
||||||
|
|
||||||
|
# Install dependencies and project locally
|
||||||
|
cd unstructured-api && make install && cd ../
|
||||||
|
make install-project-local
|
||||||
|
pip show unstructured | grep Version
|
||||||
|
|
||||||
|
# Run tests and capture exit status
|
||||||
|
(cd unstructured-api && make test)
|
||||||
|
test_exit_status=$?
|
||||||
|
|
||||||
|
# Check the exit status and handle errors
|
||||||
|
if [ $test_exit_status -ne 0 ]; then
|
||||||
|
echo "Test failed, see the error message above."
|
||||||
|
handle_error
|
||||||
|
fi
|
||||||
|
|
||||||
|
cleanup
|
||||||
|
|
||||||
|
echo "Test and cleanup completed successfully."
|
||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.8.5-dev0" # pragma: no cover
|
__version__ = "0.8.5-dev1" # pragma: no cover
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user