mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-18 21:57:58 +00:00

### Summary Duplicate PR of #1259 because of issues with checks Closes #1227, which found that `nan` values were present in the coordinates being generated for some elements. This breaks logic out from `add_pytesseract_bbox_to_elements` to new functions `_get_element_box` and `convert_multiple_coordinates_to_new_system`. It also updates the logic to check that the current bounding box matches the first character of the element's text (as to avoid the `~` characters that `pytesseract.image_to_boxes` includes, but are not present in `pytesseract.image_to_string`. ### Testing ``` from unstructured.partition.image import partition_image from PIL import Image, ImageDraw filename="example-docs/layout-parser-paper-with-table.jpg" elements = partition_image(filename=filename, strategy="ocr_only") image = Image.open(filename) draw = ImageDraw.Draw(image) for i, element in enumerate(elements): print(i, element.metadata.coordinates) if element.metadata.coordinates: draw.polygon(element.metadata.coordinates.points, outline="red", width=2) output = "example-docs/box-layout-parser-paper-with-table.jpg" image.save(output) image.close() ``` --------- Co-authored-by: qued <64741807+qued@users.noreply.github.com> Co-authored-by: cragwolfe <crag@unstructured.io> Co-authored-by: Yao You <theyaoyou@gmail.com>
114 lines
3.8 KiB
Python
114 lines
3.8 KiB
Python
from __future__ import annotations
|
|
|
|
from enum import Enum
|
|
from typing import Any, Dict, Sequence, Tuple, Union
|
|
|
|
|
|
class Orientation(Enum):
|
|
SCREEN = (1, -1) # Origin in top left, y increases in the down direction
|
|
CARTESIAN = (1, 1) # Origin in bottom left, y increases in upward direction
|
|
|
|
|
|
def convert_coordinate(old_t, old_t_max, new_t_max, t_orientation):
|
|
"""Convert a coordinate into another system along an axis using a linear transformation"""
|
|
return (
|
|
(1 - old_t / old_t_max) * (1 - t_orientation) / 2
|
|
+ old_t / old_t_max * (1 + t_orientation) / 2
|
|
) * new_t_max
|
|
|
|
|
|
class CoordinateSystem:
|
|
"""A finite coordinate plane with given width and height."""
|
|
|
|
orientation: Orientation
|
|
|
|
def __init__(self, width: Union[int, float], height: Union[int, float]):
|
|
self.width = width
|
|
self.height = height
|
|
|
|
def __eq__(self, other: object):
|
|
if not isinstance(other, CoordinateSystem):
|
|
return False
|
|
return (
|
|
str(self.__class__.__name__) == str(other.__class__.__name__)
|
|
and self.width == other.width
|
|
and self.height == other.height
|
|
and self.orientation == other.orientation
|
|
)
|
|
|
|
def convert_from_relative(
|
|
self,
|
|
x: Union[float, int],
|
|
y: Union[float, int],
|
|
) -> Tuple[Union[float, int], Union[float, int]]:
|
|
"""Convert to this coordinate system from a relative coordinate system."""
|
|
x_orientation, y_orientation = self.orientation.value
|
|
new_x = convert_coordinate(x, 1, self.width, x_orientation)
|
|
new_y = convert_coordinate(y, 1, self.height, y_orientation)
|
|
return new_x, new_y
|
|
|
|
def convert_to_relative(
|
|
self,
|
|
x: Union[float, int],
|
|
y: Union[float, int],
|
|
) -> Tuple[Union[float, int], Union[float, int]]:
|
|
"""Convert from this coordinate system to a relative coordinate system."""
|
|
x_orientation, y_orientation = self.orientation.value
|
|
new_x = convert_coordinate(x, self.width, 1, x_orientation)
|
|
new_y = convert_coordinate(y, self.height, 1, y_orientation)
|
|
return new_x, new_y
|
|
|
|
def convert_coordinates_to_new_system(
|
|
self,
|
|
new_system: CoordinateSystem,
|
|
x: Union[float, int],
|
|
y: Union[float, int],
|
|
) -> Tuple[Union[float, int], Union[float, int]]:
|
|
"""Convert from this coordinate system to another given coordinate system."""
|
|
rel_x, rel_y = self.convert_to_relative(x, y)
|
|
return new_system.convert_from_relative(rel_x, rel_y)
|
|
|
|
def convert_multiple_coordinates_to_new_system(
|
|
self,
|
|
new_system: CoordinateSystem,
|
|
coordinates: Sequence[Tuple[Union[float, int], Union[float, int]]],
|
|
) -> Tuple[Tuple[Union[float, int], Union[float, int]], ...]:
|
|
"""Convert (x, y) coordinates from current system to another coordinate system."""
|
|
new_system_coordinates = []
|
|
for x, y in coordinates:
|
|
new_system_coordinates.append(
|
|
self.convert_coordinates_to_new_system(new_system=new_system, x=x, y=y),
|
|
)
|
|
return tuple(new_system_coordinates)
|
|
|
|
|
|
class RelativeCoordinateSystem(CoordinateSystem):
|
|
"""Relative coordinate system where x and y are on a scale from 0 to 1."""
|
|
|
|
orientation = Orientation.CARTESIAN
|
|
|
|
def __init__(self):
|
|
self.width = 1
|
|
self.height = 1
|
|
|
|
|
|
class PixelSpace(CoordinateSystem):
|
|
"""Coordinate system representing a pixel space, such as an image. The origin is at the top
|
|
left."""
|
|
|
|
orientation = Orientation.SCREEN
|
|
|
|
|
|
class PointSpace(CoordinateSystem):
|
|
"""Coordinate system representing a point space, such as a pdf. The origin is at the bottom
|
|
left."""
|
|
|
|
orientation = Orientation.CARTESIAN
|
|
|
|
|
|
TYPE_TO_COORDINATE_SYSTEM_MAP: Dict[str, Any] = {
|
|
"PixelSpace": PixelSpace,
|
|
"PointSpace": PointSpace,
|
|
"CoordinateSystem": CoordinateSystem,
|
|
}
|