diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a7f2539d..834055102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ### Fixes +* **Fix table structure metric script** Update the call to table agent to now provide OCR tokens as required * **Fix element extraction not working when using "auto" strategy for pdf and image** If element extraction is specified, the "auto" strategy falls back to the "hi_res" strategy. ## 0.11.6 diff --git a/unstructured/metrics/table_structure.py b/unstructured/metrics/table_structure.py index ff79114f9..53f9171ad 100644 --- a/unstructured/metrics/table_structure.py +++ b/unstructured/metrics/table_structure.py @@ -3,6 +3,7 @@ import pandas as pd from PIL import Image from unstructured.partition.pdf import convert_pdf_to_images +from unstructured.partition.pdf_image.ocr import get_table_tokens from unstructured.utils import requires_dependencies @@ -20,7 +21,9 @@ def image_or_pdf_to_dataframe(filename: str) -> pd.DataFrame: else: image = Image.open(filename).convert("RGB") - return tables_agent.run_prediction(image, result_format="dataframe") + return tables_agent.run_prediction( + image, ocr_tokens=get_table_tokens(image), result_format="dataframe" + ) @requires_dependencies("unstructured_inference")