From 60fe231f0886b56fc9d7b10e8c839f172f9e0720 Mon Sep 17 00:00:00 2001 From: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Date: Thu, 29 Jun 2023 10:31:01 -0700 Subject: [PATCH] fix: use api key where needed in tests (#843) * passes api key for unstructured-api to unit and ingest tests as needed. * adds check for env var CI to otherwise skip tests that require an api key --- .github/workflows/ci.yml | 8 +++- .../ingest-test-fixtures-update-pr.yml | 2 +- CHANGELOG.md | 3 +- Makefile | 7 ++- test_unstructured/partition/test_api.py | 28 ++++++++---- .../test-ingest-against-api.sh | 43 +++++++++---------- unstructured/__version__.py | 2 +- unstructured/partition/api.py | 2 +- 8 files changed, 57 insertions(+), 38 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 430f3a528..123c096e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,6 +133,8 @@ jobs: mkdir "$NLTK_DATA" make install-ci - name: Test + env: + UNS_API_KEY: ${{ secrets.UNS_API_KEY }} run: | source .venv/bin/activate sudo apt-get update @@ -144,7 +146,7 @@ jobs: # NOTE(robinson) - Installing weaviate-client separately here because the requests # version conflicts with label_studio_sdk pip install weaviate-client - make test + make test CI=true make check-coverage test_ingest: @@ -185,6 +187,7 @@ jobs: SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }} GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }} + UNS_API_KEY: ${{ secrets.UNS_API_KEY }} run: | source .venv/bin/activate sudo apt-get update @@ -242,5 +245,6 @@ jobs: - name: Test Dockerfile run: | source .venv/bin/activate + echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file make docker-build - make docker-test + make docker-test CI=true diff --git a/.github/workflows/ingest-test-fixtures-update-pr.yml b/.github/workflows/ingest-test-fixtures-update-pr.yml index 9b5d4464f..c63a20c41 100644 --- a/.github/workflows/ingest-test-fixtures-update-pr.yml +++ b/.github/workflows/ingest-test-fixtures-update-pr.yml @@ -62,6 +62,7 @@ jobs: SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }} GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }} + UNS_API_KEY: ${{ secrets.UNS_API_KEY }} OVERWRITE_FIXTURES: "true" run: | source .venv/bin/activate @@ -80,7 +81,6 @@ jobs: make install-ingest-slack make install-ingest-wikipedia ./test_unstructured_ingest/test-ingest.sh - make install - name: Save branch name to environment file id: branch diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a8f86252..00121cd5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.7.11-dev0 +## 0.7.11-dev1 ### Enhancements @@ -6,6 +6,7 @@ ### Fixes +* Fix tests that call unstructured-api by passing through an api-key * Fixed page breaks being given (incorrect) page numbers ## 0.7.10 diff --git a/Makefile b/Makefile index 72936a256..9308a3027 100644 --- a/Makefile +++ b/Makefile @@ -146,10 +146,12 @@ uninstall-project-local: # Test and Lint # ################# +export CI ?= false + ## test: runs all unittests .PHONY: test test: - PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing + PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing ## check: runs linters (includes tests) .PHONY: check @@ -224,8 +226,9 @@ docker-test: docker run --rm \ -v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \ -v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \ + $(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \ $(DOCKER_IMAGE) \ - bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured" + bash -c "CI=$(CI) pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured" .PHONY: docker-smoke-test docker-smoke-test: diff --git a/test_unstructured/partition/test_api.py b/test_unstructured/partition/test_api.py index d270782c2..d922a3e20 100644 --- a/test_unstructured/partition/test_api.py +++ b/test_unstructured/partition/test_api.py @@ -13,6 +13,8 @@ DIRECTORY = pathlib.Path(__file__).parent.resolve() EML_TEST_FILE = "eml/fake-email.eml" +skip_outside_ci = os.getenv("CI", "").lower() in {"", "false", "f", "0"} + class MockResponse: def __init__(self, status_code): @@ -94,19 +96,18 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch): partition_via_api(filename=filename) -@pytest.mark.skip(reason="Temporary skip until since API key is now required") def test_partition_via_api_valid_request_data_kwargs(): filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf") - elements = partition_via_api(filename=filename, strategy="fast") + elements = partition_via_api(filename=filename, strategy="fast", api_key=get_api_key()) + assert isinstance(elements, list) -@pytest.mark.skip(reason="Temporary skip until since API key is now required") def test_partition_via_api_invalid_request_data_kwargs(): filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf") with pytest.raises(ValueError): - partition_via_api(filename=filename, strategy="not_a_strategy") + partition_via_api(filename=filename, strategy="not_a_strategy", api_key=get_api_key()) class MockMultipleResponse: @@ -291,26 +292,37 @@ def test_partition_multiple_via_api_from_files_raises_without_filenames(monkeypa ) -@pytest.mark.skip(reason="Temporary skip until since API key is now required") +def get_api_key(): + api_key = os.getenv("UNS_API_KEY") + if api_key is None: + raise ValueError("UNS_API_KEY environment variable not set") + return api_key + + +@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI") def test_partition_multiple_via_api_valid_request_data_kwargs(): filenames = [ os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"), os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"), ] - elements = partition_multiple_via_api(filenames=filenames, strategy="fast") + elements = partition_multiple_via_api( + filenames=filenames, + strategy="fast", + api_key=get_api_key(), + ) assert isinstance(elements, list) -@pytest.mark.skip(reason="Temporary skip until since API key is now required") +@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI") def test_partition_multiple_via_api_invalid_request_data_kwargs(): filenames = [ os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"), os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"), ] - with pytest.raises(ValueError): partition_multiple_via_api( filenames=filenames, strategy="not_a_strategy", + api_key=get_api_key(), ) diff --git a/test_unstructured_ingest/test-ingest-against-api.sh b/test_unstructured_ingest/test-ingest-against-api.sh index 50fe1d2b4..ffa9d88cb 100755 --- a/test_unstructured_ingest/test-ingest-against-api.sh +++ b/test_unstructured_ingest/test-ingest-against-api.sh @@ -2,26 +2,25 @@ set -e -# TODO(crag): do not exit 0 but proceed with the test if an API key env var is defined -# shellcheck disable=SC2317 -exit 0 +if [ -z "$UNS_API_KEY" ]; then + echo "Skipping ingest test against api because the UNS_API_KEY env var is not set." + exit 0 +fi -#SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -#cd "$SCRIPT_DIR"/.. || exit 1 -# -#PYTHONPATH=. ./unstructured/ingest/main.py \ -# --local-input-path example-docs \ -# --local-file-glob "*.pdf" \ -# --structured-output-dir api-ingest-output \ -# --partition-by-api \ -# --partition-strategy hi_res \ -# --verbose \ -# --reprocess -# -#set +e -# -#if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then -# echo -# echo "8 files should have been created." -# exit 1 -#fi +PYTHONPATH=. ./unstructured/ingest/main.py \ + --api-key "$UNS_API_KEY" \ + --local-input-path example-docs \ + --local-file-glob "*.pdf" \ + --structured-output-dir api-ingest-output \ + --partition-by-api \ + --partition-strategy hi_res \ + --verbose \ + --reprocess + +set +e + +if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then + echo + echo "8 files should have been created." + exit 1 +fi diff --git a/unstructured/__version__.py b/unstructured/__version__.py index ba1667f0e..3d76feb0a 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.7.11-dev0" # pragma: no cover +__version__ = "0.7.11-dev1" # pragma: no cover diff --git a/unstructured/partition/api.py b/unstructured/partition/api.py index 0e3216e72..a479f874f 100644 --- a/unstructured/partition/api.py +++ b/unstructured/partition/api.py @@ -110,7 +110,7 @@ def partition_multiple_via_api( Parameters ---------- - filename + filenames A list of strings defining the target filename paths. content_types A list of strings defining the file contents in MIME types.