From acda4d07073ad104793f6e435ee9c4150faf87d8 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 20 May 2024 19:05:13 -0400 Subject: [PATCH] fix: set `skip_infer_tables` explicitly in `test_partition_via_api_with_no_strategy` (#3057) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary A `partition_via_api` test that only runs on `main` was [failing](https://github.com/Unstructured-IO/unstructured/actions/runs/9159429513/job/25181600959) with the following output, likely due to the change in the default behavior for `skip_infer_table_types`. This PR explicitly sets the `skip_infer_table_types` param to avoid the failure.. ```python =========================== short test summary info ============================ FAILED test_unstructured/partition/test_api.py::test_partition_via_api_with_no_strategy - AssertionError: assert 'Zejiang Shen® (<), Ruochen Zhang?, Melissa Dell®, Benjamin Charles Germain Lee?, Jacob Carlson®, and Weining Li®' != 'Zejiang Shen® (<), Ruochen Zhang?, Melissa Dell®, Benjamin Charles Germain Lee?, Jacob Carlson®, and Weining Li®' + where 'Zejiang Shen® (<), Ruochen Zhang?, Melissa Dell®, Benjamin Charles Germain Lee?, Jacob Carlson®, and Weining Li®' = .text + and 'Zejiang Shen® (<), Ruochen Zhang?, Melissa Dell®, Benjamin Charles Germain Lee?, Jacob Carlson®, and Weining Li®' = .text = 1 failed, 2299 passed, 9 skipped, 2 deselected, 2 xfailed, 9 xpassed, 14 warnings in 1241.64s (0:20:41) = make: *** [Makefile:302: test] Error 1 ``` ### Testing After temporarily removing the "skip if not on `main`" `pytest` mark, the [unit tests pass](https://github.com/Unstructured-IO/unstructured/actions/runs/9163268381/job/25192040902?pr=3057O) on the feature branch. --- test_unstructured/partition/test_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test_unstructured/partition/test_api.py b/test_unstructured/partition/test_api.py index cf61f837c..8d535bf86 100644 --- a/test_unstructured/partition/test_api.py +++ b/test_unstructured/partition/test_api.py @@ -175,8 +175,11 @@ def test_partition_via_api_with_no_strategy(): filename=filename, strategy="auto", api_key=get_api_key(), + skip_infer_table_types=["pdf"], + ) + elements_hi_res = partition_via_api( + filename=filename, strategy="hi_res", api_key=get_api_key(), skip_infer_table_types=["pdf"] ) - elements_hi_res = partition_via_api(filename=filename, strategy="hi_res", api_key=get_api_key()) # confirm that hi_res strategy was not passed as default to partition by comparing outputs # elements_hi_res[3].text =