mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-03 05:39:47 +00:00
feat: less precision in json floats (#1718)
Closes #1340. ### Summary - add functionality to limit precision when serializing to JSON ### Testing ``` elements = partition(raw_doc.<extension>) output_json = elements_to_json(elements) print(output_json) ```
This commit is contained in:
parent
ad1b93dbaa
commit
ef391e1a3e
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
||||
## 0.10.23-dev0
|
||||
|
||||
### Enhancements
|
||||
|
||||
* **Add functionality to limit precision when serializing to json** Precision for `points` is limited to 1 decimal point if coordinates["system"] == "PixelSpace" (otherwise 2 decimal points?). Precision for `detection_class_prob` is limited to 5 decimal points.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
## 0.10.22
|
||||
|
||||
### Enhancements
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.10.22" # pragma: no cover
|
||||
__version__ = "0.10.23-dev0" # pragma: no cover
|
||||
|
@ -1,8 +1,10 @@
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from unstructured.documents.coordinates import PixelSpace
|
||||
from unstructured.documents.elements import (
|
||||
TYPE_TO_TEXT_ELEMENT_MAP,
|
||||
CheckBox,
|
||||
@ -77,6 +79,27 @@ def convert_to_dict(elements: List[Element]) -> List[Dict[str, Any]]:
|
||||
return convert_to_isd(elements)
|
||||
|
||||
|
||||
def _fix_metadata_field_precision(elements: List[Element]) -> List[Element]:
|
||||
out_elements = []
|
||||
for element in elements:
|
||||
el = deepcopy(element)
|
||||
if el.metadata.coordinates:
|
||||
precision = 1 if isinstance(el.metadata.coordinates.system, PixelSpace) else 2
|
||||
points = el.metadata.coordinates.points
|
||||
rounded_points = []
|
||||
for point in points:
|
||||
x, y = point
|
||||
rounded_point = (round(x, precision), round(y, precision))
|
||||
rounded_points.append(rounded_point)
|
||||
el.metadata.coordinates.points = tuple(rounded_points)
|
||||
|
||||
if el.metadata.detection_class_prob:
|
||||
el.metadata.detection_class_prob = round(el.metadata.detection_class_prob, 5)
|
||||
|
||||
out_elements.append(el)
|
||||
return out_elements
|
||||
|
||||
|
||||
def elements_to_json(
|
||||
elements: List[Element],
|
||||
filename: Optional[str] = None,
|
||||
@ -87,7 +110,9 @@ def elements_to_json(
|
||||
Saves a list of elements to a JSON file if filename is specified.
|
||||
Otherwise, return the list of elements as a string.
|
||||
"""
|
||||
element_dict = convert_to_dict(elements)
|
||||
|
||||
pre_processed_elements = _fix_metadata_field_precision(elements)
|
||||
element_dict = convert_to_dict(pre_processed_elements)
|
||||
if filename is not None:
|
||||
with open(filename, "w", encoding=encoding) as f:
|
||||
json.dump(element_dict, f, indent=indent)
|
||||
|
Loading…
x
Reference in New Issue
Block a user