chore: small edits (#3480)

Add comments and fix decorators on some tests.
This commit is contained in:
John 2024-08-06 15:21:43 -04:00 committed by GitHub
parent 73bef27ef1
commit 24a1f298e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 17 deletions

View File

@ -18,7 +18,7 @@ from ..unit_utils import ANY, FixtureRequest, example_doc_path, method_mock
DIRECTORY = pathlib.Path(__file__).parent.resolve()
skip_outside_ci = os.getenv("CI", "").lower() in {"", "false", "f", "0"}
is_in_ci = os.getenv("CI", "").lower() not in {"", "false", "f", "0"}
skip_not_on_main = os.getenv("GITHUB_REF_NAME", "").lower() != "main"
@ -102,7 +102,7 @@ def test_partition_via_api_raises_with_bad_response(request: FixtureRequest):
partition_mock_.assert_called_once()
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_via_api_with_no_strategy():
elements_no_strategy = partition_via_api(
@ -129,7 +129,7 @@ def test_partition_via_api_with_no_strategy():
assert elements_no_strategy[3].text != elements_hi_res[3].text
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_via_api_with_image_hi_res_strategy_includes_coordinates():
# coordinates not included by default to limit payload size
@ -145,7 +145,7 @@ def test_partition_via_api_with_image_hi_res_strategy_includes_coordinates():
assert elements[0].metadata.coordinates is not None
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_via_api_valid_request_data_kwargs():
elements = partition_via_api(
@ -159,7 +159,7 @@ def test_partition_via_api_valid_request_data_kwargs():
assert isinstance(elements, list)
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_via_api_image_block_extraction():
elements = partition_via_api(
@ -353,7 +353,7 @@ def get_api_key():
return api_key
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(skip_not_on_main, reason="Skipping test run outside of main branch")
def test_partition_multiple_via_api_valid_request_data_kwargs():
filenames = [
@ -371,7 +371,7 @@ def test_partition_multiple_via_api_valid_request_data_kwargs():
assert isinstance(elements, list)
@pytest.mark.skipif(skip_outside_ci, reason="Skipping test run outside of CI")
@pytest.mark.skipif(not is_in_ci, reason="Skipping test run outside of CI")
def test_partition_multiple_via_api_invalid_request_data_kwargs():
filenames = [
example_doc_path("pdf/layout-parser-paper-fast.pdf"),

View File

@ -106,16 +106,7 @@ def test_auto_partition_doc_from_filename(
@pytest.mark.skipif(is_in_docker, reason="Passes in CI but not Docker. Remove skip on #3364 fix.")
@pytest.mark.xfail(sys.platform == "darwin", reason="#3364", raises=KeyError, strict=True)
def test_auto_partition_doc_from_file(expected_docx_elements: list[Element]):
# -- NOTE(scanny): https://github.com/Unstructured-IO/unstructured/issues/3364
# -- detect_filetype() identifies .doc as `application/x-ole-storage` which is true but not
# -- specific enough. The `FileType.MSG` file-type is assigned (which is also an OLE file)
# -- and `partition()` routes the document to `partition_msg` which is where the `KeyError`
# -- comes from.
# -- For some reason, this xfail problem only occurs locally, not in CI, possibly because we
# -- use two different `libmagic` sourcs (`libmagic` on CI and `libmagic1` on Mac). Doesn't
# -- matter much though because when we add disambiguation they'll both get it right.
with open(example_doc_path("simple.doc"), "rb") as f:
elements = partition(file=f)

View File

@ -17,9 +17,10 @@ from unstructured.staging.weaviate import (
)
is_in_docker = os.path.exists("/.dockerenv")
is_in_ci = os.getenv("CI", "").lower() not in {"", "false", "f", "0"}
def test_stage_for_weaviate(filename="example-docs/layout-parser-paper-fast.pdf"):
def test_stage_for_weaviate():
element_dict = {
"element_id": "015301d4f56aa4b20ec10ac889d2343f",
"text": "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis",
@ -53,6 +54,7 @@ def test_stage_for_weaviate(filename="example-docs/layout-parser-paper-fast.pdf"
}
@pytest.mark.skipif(not is_in_ci, reason="Integration test that depends on having secret keys")
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
def test_weaviate_schema_is_valid():
unstructured_class = create_unstructured_weaviate_class()