mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 02:53:31 +00:00
fix: update table structure eval to use new table inference interface (#2306)
Provide OCR tokens for table eval script. Right now
`unstructured-inference` can compute OCR components when they are not
passed in but in a future release we will be required to pass in OCR
results into table structure extraction model:
d3b2981313/CHANGELOG.md (0719)
This PR prepares for the upcoming change by passing ocr token into table
structure extraction process.
## test
Create a new virtual env that follows the setup in readme then upgrade
`inference` with `pip install unstructured-inference --upgrade`.
Run test `PYTHONPATH=. pytest
test_unstructured/metrics/test_table_structure.py` would fail on main
branch but fixed in this PR.
---------
Co-authored-by: Austin Walker <awalk89@gmail.com>
This commit is contained in:
parent
dd1443ab6f
commit
1b70ea86b3
@ -12,6 +12,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Fix table structure metric script** Update the call to table agent to now provide OCR tokens as required
|
||||
* **Fix element extraction not working when using "auto" strategy for pdf and image** If element extraction is specified, the "auto" strategy falls back to the "hi_res" strategy.
|
||||
|
||||
## 0.11.6
|
||||
|
||||
@ -3,6 +3,7 @@ import pandas as pd
|
||||
from PIL import Image
|
||||
|
||||
from unstructured.partition.pdf import convert_pdf_to_images
|
||||
from unstructured.partition.pdf_image.ocr import get_table_tokens
|
||||
from unstructured.utils import requires_dependencies
|
||||
|
||||
|
||||
@ -20,7 +21,9 @@ def image_or_pdf_to_dataframe(filename: str) -> pd.DataFrame:
|
||||
else:
|
||||
image = Image.open(filename).convert("RGB")
|
||||
|
||||
return tables_agent.run_prediction(image, result_format="dataframe")
|
||||
return tables_agent.run_prediction(
|
||||
image, ocr_tokens=get_table_tokens(image), result_format="dataframe"
|
||||
)
|
||||
|
||||
|
||||
@requires_dependencies("unstructured_inference")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user