fix: use api key where needed in tests (#843)

* passes api key for unstructured-api to unit and ingest tests as needed.
* adds check for env var CI to otherwise skip tests that require an api key
This commit is contained in:
ryannikolaidis 2023-06-29 10:31:01 -07:00 committed by GitHub
parent 9882c2b83f
commit 60fe231f08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 57 additions and 38 deletions

View File

@ -133,6 +133,8 @@ jobs:
mkdir "$NLTK_DATA"
make install-ci
- name: Test
env:
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
run: |
source .venv/bin/activate
sudo apt-get update
@ -144,7 +146,7 @@ jobs:
# NOTE(robinson) - Installing weaviate-client separately here because the requests
# version conflicts with label_studio_sdk
pip install weaviate-client
make test
make test CI=true
make check-coverage
test_ingest:
@ -185,6 +187,7 @@ jobs:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
run: |
source .venv/bin/activate
sudo apt-get update
@ -242,5 +245,6 @@ jobs:
- name: Test Dockerfile
run: |
source .venv/bin/activate
echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file
make docker-build
make docker-test
make docker-test CI=true

View File

@ -62,6 +62,7 @@ jobs:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
OVERWRITE_FIXTURES: "true"
run: |
source .venv/bin/activate
@ -80,7 +81,6 @@ jobs:
make install-ingest-slack
make install-ingest-wikipedia
./test_unstructured_ingest/test-ingest.sh
make install
- name: Save branch name to environment file
id: branch

View File

@ -1,4 +1,4 @@
## 0.7.11-dev0
## 0.7.11-dev1
### Enhancements
@ -6,6 +6,7 @@
### Fixes
* Fix tests that call unstructured-api by passing through an api-key
* Fixed page breaks being given (incorrect) page numbers
## 0.7.10

View File

@ -146,10 +146,12 @@ uninstall-project-local:
# Test and Lint #
#################
export CI ?= false
## test: runs all unittests
.PHONY: test
test:
PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
## check: runs linters (includes tests)
.PHONY: check
@ -224,8 +226,9 @@ docker-test:
docker run --rm \
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \
$(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \
$(DOCKER_IMAGE) \
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
bash -c "CI=$(CI) pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
.PHONY: docker-smoke-test
docker-smoke-test:

View File

@ -13,6 +13,8 @@ DIRECTORY = pathlib.Path(__file__).parent.resolve()
EML_TEST_FILE = "eml/fake-email.eml"
skip_outside_ci = os.getenv("CI", "").lower() in {"", "false", "f", "0"}
class MockResponse:
def __init__(self, status_code):
@ -94,19 +96,18 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch):
partition_via_api(filename=filename)
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
def test_partition_via_api_valid_request_data_kwargs():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
elements = partition_via_api(filename=filename, strategy="fast")
elements = partition_via_api(filename=filename, strategy="fast", api_key=get_api_key())
assert isinstance(elements, list)
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
def test_partition_via_api_invalid_request_data_kwargs():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
with pytest.raises(ValueError):
partition_via_api(filename=filename, strategy="not_a_strategy")
partition_via_api(filename=filename, strategy="not_a_strategy", api_key=get_api_key())
class MockMultipleResponse:
@ -291,26 +292,37 @@ def test_partition_multiple_via_api_from_files_raises_without_filenames(monkeypa
)
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
def get_api_key():
api_key = os.getenv("UNS_API_KEY")
if api_key is None:
raise ValueError("UNS_API_KEY environment variable not set")
return api_key
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
def test_partition_multiple_via_api_valid_request_data_kwargs():
filenames = [
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
]
elements = partition_multiple_via_api(filenames=filenames, strategy="fast")
elements = partition_multiple_via_api(
filenames=filenames,
strategy="fast",
api_key=get_api_key(),
)
assert isinstance(elements, list)
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
def test_partition_multiple_via_api_invalid_request_data_kwargs():
filenames = [
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
]
with pytest.raises(ValueError):
partition_multiple_via_api(
filenames=filenames,
strategy="not_a_strategy",
api_key=get_api_key(),
)

View File

@ -2,26 +2,25 @@
set -e
# TODO(crag): do not exit 0 but proceed with the test if an API key env var is defined
# shellcheck disable=SC2317
exit 0
if [ -z "$UNS_API_KEY" ]; then
echo "Skipping ingest test against api because the UNS_API_KEY env var is not set."
exit 0
fi
#SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
#cd "$SCRIPT_DIR"/.. || exit 1
#
#PYTHONPATH=. ./unstructured/ingest/main.py \
# --local-input-path example-docs \
# --local-file-glob "*.pdf" \
# --structured-output-dir api-ingest-output \
# --partition-by-api \
# --partition-strategy hi_res \
# --verbose \
# --reprocess
#
#set +e
#
#if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then
# echo
# echo "8 files should have been created."
# exit 1
#fi
PYTHONPATH=. ./unstructured/ingest/main.py \
--api-key "$UNS_API_KEY" \
--local-input-path example-docs \
--local-file-glob "*.pdf" \
--structured-output-dir api-ingest-output \
--partition-by-api \
--partition-strategy hi_res \
--verbose \
--reprocess
set +e
if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then
echo
echo "8 files should have been created."
exit 1
fi

View File

@ -1 +1 @@
__version__ = "0.7.11-dev0" # pragma: no cover
__version__ = "0.7.11-dev1" # pragma: no cover

View File

@ -110,7 +110,7 @@ def partition_multiple_via_api(
Parameters
----------
filename
filenames
A list of strings defining the target filename paths.
content_types
A list of strings defining the file contents in MIME types.