mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-02 18:19:51 +00:00
fix: use api key where needed in tests (#843)
* passes api key for unstructured-api to unit and ingest tests as needed. * adds check for env var CI to otherwise skip tests that require an api key
This commit is contained in:
parent
9882c2b83f
commit
60fe231f08
8
.github/workflows/ci.yml
vendored
8
.github/workflows/ci.yml
vendored
@ -133,6 +133,8 @@ jobs:
|
||||
mkdir "$NLTK_DATA"
|
||||
make install-ci
|
||||
- name: Test
|
||||
env:
|
||||
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
sudo apt-get update
|
||||
@ -144,7 +146,7 @@ jobs:
|
||||
# NOTE(robinson) - Installing weaviate-client separately here because the requests
|
||||
# version conflicts with label_studio_sdk
|
||||
pip install weaviate-client
|
||||
make test
|
||||
make test CI=true
|
||||
make check-coverage
|
||||
|
||||
test_ingest:
|
||||
@ -185,6 +187,7 @@ jobs:
|
||||
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
|
||||
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
|
||||
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
|
||||
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
sudo apt-get update
|
||||
@ -242,5 +245,6 @@ jobs:
|
||||
- name: Test Dockerfile
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file
|
||||
make docker-build
|
||||
make docker-test
|
||||
make docker-test CI=true
|
||||
|
||||
@ -62,6 +62,7 @@ jobs:
|
||||
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
|
||||
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
|
||||
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
|
||||
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
|
||||
OVERWRITE_FIXTURES: "true"
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
@ -80,7 +81,6 @@ jobs:
|
||||
make install-ingest-slack
|
||||
make install-ingest-wikipedia
|
||||
./test_unstructured_ingest/test-ingest.sh
|
||||
make install
|
||||
|
||||
- name: Save branch name to environment file
|
||||
id: branch
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
## 0.7.11-dev0
|
||||
## 0.7.11-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* Fix tests that call unstructured-api by passing through an api-key
|
||||
* Fixed page breaks being given (incorrect) page numbers
|
||||
|
||||
## 0.7.10
|
||||
|
||||
7
Makefile
7
Makefile
@ -146,10 +146,12 @@ uninstall-project-local:
|
||||
# Test and Lint #
|
||||
#################
|
||||
|
||||
export CI ?= false
|
||||
|
||||
## test: runs all unittests
|
||||
.PHONY: test
|
||||
test:
|
||||
PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
|
||||
PYTHONPATH=. CI=$(CI) pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
|
||||
|
||||
## check: runs linters (includes tests)
|
||||
.PHONY: check
|
||||
@ -224,8 +226,9 @@ docker-test:
|
||||
docker run --rm \
|
||||
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
|
||||
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \
|
||||
$(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \
|
||||
$(DOCKER_IMAGE) \
|
||||
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
|
||||
bash -c "CI=$(CI) pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
|
||||
|
||||
.PHONY: docker-smoke-test
|
||||
docker-smoke-test:
|
||||
|
||||
@ -13,6 +13,8 @@ DIRECTORY = pathlib.Path(__file__).parent.resolve()
|
||||
|
||||
EML_TEST_FILE = "eml/fake-email.eml"
|
||||
|
||||
skip_outside_ci = os.getenv("CI", "").lower() in {"", "false", "f", "0"}
|
||||
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, status_code):
|
||||
@ -94,19 +96,18 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch):
|
||||
partition_via_api(filename=filename)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
|
||||
def test_partition_via_api_valid_request_data_kwargs():
|
||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
|
||||
|
||||
elements = partition_via_api(filename=filename, strategy="fast")
|
||||
elements = partition_via_api(filename=filename, strategy="fast", api_key=get_api_key())
|
||||
|
||||
assert isinstance(elements, list)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
|
||||
def test_partition_via_api_invalid_request_data_kwargs():
|
||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
|
||||
with pytest.raises(ValueError):
|
||||
partition_via_api(filename=filename, strategy="not_a_strategy")
|
||||
partition_via_api(filename=filename, strategy="not_a_strategy", api_key=get_api_key())
|
||||
|
||||
|
||||
class MockMultipleResponse:
|
||||
@ -291,26 +292,37 @@ def test_partition_multiple_via_api_from_files_raises_without_filenames(monkeypa
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
|
||||
def get_api_key():
|
||||
api_key = os.getenv("UNS_API_KEY")
|
||||
if api_key is None:
|
||||
raise ValueError("UNS_API_KEY environment variable not set")
|
||||
return api_key
|
||||
|
||||
|
||||
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
|
||||
def test_partition_multiple_via_api_valid_request_data_kwargs():
|
||||
filenames = [
|
||||
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
|
||||
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
|
||||
]
|
||||
|
||||
elements = partition_multiple_via_api(filenames=filenames, strategy="fast")
|
||||
elements = partition_multiple_via_api(
|
||||
filenames=filenames,
|
||||
strategy="fast",
|
||||
api_key=get_api_key(),
|
||||
)
|
||||
assert isinstance(elements, list)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Temporary skip until since API key is now required")
|
||||
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
|
||||
def test_partition_multiple_via_api_invalid_request_data_kwargs():
|
||||
filenames = [
|
||||
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
|
||||
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
|
||||
]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
partition_multiple_via_api(
|
||||
filenames=filenames,
|
||||
strategy="not_a_strategy",
|
||||
api_key=get_api_key(),
|
||||
)
|
||||
|
||||
@ -2,26 +2,25 @@
|
||||
|
||||
set -e
|
||||
|
||||
# TODO(crag): do not exit 0 but proceed with the test if an API key env var is defined
|
||||
# shellcheck disable=SC2317
|
||||
exit 0
|
||||
if [ -z "$UNS_API_KEY" ]; then
|
||||
echo "Skipping ingest test against api because the UNS_API_KEY env var is not set."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
#SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||
#cd "$SCRIPT_DIR"/.. || exit 1
|
||||
#
|
||||
#PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
# --local-input-path example-docs \
|
||||
# --local-file-glob "*.pdf" \
|
||||
# --structured-output-dir api-ingest-output \
|
||||
# --partition-by-api \
|
||||
# --partition-strategy hi_res \
|
||||
# --verbose \
|
||||
# --reprocess
|
||||
#
|
||||
#set +e
|
||||
#
|
||||
#if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then
|
||||
# echo
|
||||
# echo "8 files should have been created."
|
||||
# exit 1
|
||||
#fi
|
||||
PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--api-key "$UNS_API_KEY" \
|
||||
--local-input-path example-docs \
|
||||
--local-file-glob "*.pdf" \
|
||||
--structured-output-dir api-ingest-output \
|
||||
--partition-by-api \
|
||||
--partition-strategy hi_res \
|
||||
--verbose \
|
||||
--reprocess
|
||||
|
||||
set +e
|
||||
|
||||
if [ "$(find 'api-ingest-output' -type f -printf '.' | wc -c)" != 8 ]; then
|
||||
echo
|
||||
echo "8 files should have been created."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.7.11-dev0" # pragma: no cover
|
||||
__version__ = "0.7.11-dev1" # pragma: no cover
|
||||
|
||||
@ -110,7 +110,7 @@ def partition_multiple_via_api(
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename
|
||||
filenames
|
||||
A list of strings defining the target filename paths.
|
||||
content_types
|
||||
A list of strings defining the file contents in MIME types.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user