mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-28 07:33:36 +00:00
fix: return ocr coordinates points as tuple (#1219)
The `add_pytesseract_bbox_to_elements` returned the
`metadata.coordinates.points` as `Tuple` whereas other strategies
returned as `List`. Make change accordingly for consistency.
Previously:
```
element.metadata.coordinates.points = [
(x1, y1),
(x2, y2),
(x3, y3),
(x4, y4),
]
```
Currently:
```
element.metadata.coordinates.points = (
(x1, y1),
(x2, y2),
(x3, y3),
(x4, y4),
)
```
This commit is contained in:
parent
64b4287308
commit
4b830e3b05
@ -8,6 +8,8 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* Edit `add_pytesseract_bbox_to_elements`'s (`ocr_only` strategy) `metadata.coordinates.points` return type to `Tuple` for consistency.
|
||||
|
||||
## 0.10.7
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -774,12 +774,12 @@ def test_partition_pdf_with_ocr_has_coordinates_from_filename(
|
||||
filename="example-docs/chevron-page.pdf",
|
||||
):
|
||||
elements = pdf.partition_pdf(filename=filename, strategy="ocr_only")
|
||||
assert elements[0].metadata.coordinates.points == [
|
||||
assert elements[0].metadata.coordinates.points == (
|
||||
(657.0, 2144.0),
|
||||
(657.0, 2106.0),
|
||||
(1043.0, 2106.0),
|
||||
(1043.0, 2144.0),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_partition_pdf_with_ocr_has_coordinates_from_file(
|
||||
@ -790,9 +790,9 @@ def test_partition_pdf_with_ocr_has_coordinates_from_file(
|
||||
file=f,
|
||||
strategy="ocr_only",
|
||||
)
|
||||
assert elements[0].metadata.coordinates.points == [
|
||||
assert elements[0].metadata.coordinates.points == (
|
||||
(657.0, 2144.0),
|
||||
(657.0, 2106.0),
|
||||
(1043.0, 2106.0),
|
||||
(1043.0, 2144.0),
|
||||
]
|
||||
)
|
||||
|
||||
@ -503,11 +503,12 @@ def add_pytesseract_bbox_to_elements(elements, bboxes, width, height):
|
||||
max_y = max(max_y, y2)
|
||||
|
||||
points = ((min_x, min_y), (min_x, max_y), (max_x, max_y), (max_x, min_y))
|
||||
converted_points = []
|
||||
for point in points:
|
||||
x, y = point
|
||||
new_x, new_y = point_space.convert_coordinates_to_new_system(pixel_space, x, y)
|
||||
converted_points.append((new_x, new_y))
|
||||
converted_points = tuple(
|
||||
[
|
||||
point_space.convert_coordinates_to_new_system(pixel_space, *point)
|
||||
for point in points
|
||||
],
|
||||
)
|
||||
|
||||
element.metadata.coordinates = CoordinatesMetadata(
|
||||
points=converted_points,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user