diff --git a/CHANGELOG.md b/CHANGELOG.md index e7a0b5eff..fd0ee7f15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.10.23-dev0 + +### Enhancements + +* **Add functionality to limit precision when serializing to json** Precision for `points` is limited to 1 decimal point if coordinates["system"] == "PixelSpace" (otherwise 2 decimal points?). Precision for `detection_class_prob` is limited to 5 decimal points. + +### Features + +### Fixes + ## 0.10.22 ### Enhancements diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c42cfa17f..debea5071 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.22" # pragma: no cover +__version__ = "0.10.23-dev0" # pragma: no cover diff --git a/unstructured/staging/base.py b/unstructured/staging/base.py index e0aeda79c..f8904016a 100644 --- a/unstructured/staging/base.py +++ b/unstructured/staging/base.py @@ -1,8 +1,10 @@ import csv import io import json +from copy import deepcopy from typing import Any, Dict, List, Optional +from unstructured.documents.coordinates import PixelSpace from unstructured.documents.elements import ( TYPE_TO_TEXT_ELEMENT_MAP, CheckBox, @@ -77,6 +79,27 @@ def convert_to_dict(elements: List[Element]) -> List[Dict[str, Any]]: return convert_to_isd(elements) +def _fix_metadata_field_precision(elements: List[Element]) -> List[Element]: + out_elements = [] + for element in elements: + el = deepcopy(element) + if el.metadata.coordinates: + precision = 1 if isinstance(el.metadata.coordinates.system, PixelSpace) else 2 + points = el.metadata.coordinates.points + rounded_points = [] + for point in points: + x, y = point + rounded_point = (round(x, precision), round(y, precision)) + rounded_points.append(rounded_point) + el.metadata.coordinates.points = tuple(rounded_points) + + if el.metadata.detection_class_prob: + el.metadata.detection_class_prob = round(el.metadata.detection_class_prob, 5) + + out_elements.append(el) + return out_elements + + def elements_to_json( elements: List[Element], filename: Optional[str] = None, @@ -87,7 +110,9 @@ def elements_to_json( Saves a list of elements to a JSON file if filename is specified. Otherwise, return the list of elements as a string. """ - element_dict = convert_to_dict(elements) + + pre_processed_elements = _fix_metadata_field_precision(elements) + element_dict = convert_to_dict(pre_processed_elements) if filename is not None: with open(filename, "w", encoding=encoding) as f: json.dump(element_dict, f, indent=indent)