feat: less precision in json floats (#1718)

Closes #1340.
### Summary
- add functionality to limit precision when serializing to JSON
### Testing
```
elements = partition(raw_doc.<extension>)
output_json = elements_to_json(elements)
print(output_json)
```
This commit is contained in:
Christine Straub 2023-10-13 11:06:36 -07:00 committed by GitHub
parent ad1b93dbaa
commit ef391e1a3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 2 deletions

View File

@ -1,3 +1,13 @@
## 0.10.23-dev0
### Enhancements
* **Add functionality to limit precision when serializing to json** Precision for `points` is limited to 1 decimal point if coordinates["system"] == "PixelSpace" (otherwise 2 decimal points?). Precision for `detection_class_prob` is limited to 5 decimal points.
### Features
### Fixes
## 0.10.22
### Enhancements

View File

@ -1 +1 @@
__version__ = "0.10.22" # pragma: no cover
__version__ = "0.10.23-dev0" # pragma: no cover

View File

@ -1,8 +1,10 @@
import csv
import io
import json
from copy import deepcopy
from typing import Any, Dict, List, Optional
from unstructured.documents.coordinates import PixelSpace
from unstructured.documents.elements import (
TYPE_TO_TEXT_ELEMENT_MAP,
CheckBox,
@ -77,6 +79,27 @@ def convert_to_dict(elements: List[Element]) -> List[Dict[str, Any]]:
return convert_to_isd(elements)
def _fix_metadata_field_precision(elements: List[Element]) -> List[Element]:
out_elements = []
for element in elements:
el = deepcopy(element)
if el.metadata.coordinates:
precision = 1 if isinstance(el.metadata.coordinates.system, PixelSpace) else 2
points = el.metadata.coordinates.points
rounded_points = []
for point in points:
x, y = point
rounded_point = (round(x, precision), round(y, precision))
rounded_points.append(rounded_point)
el.metadata.coordinates.points = tuple(rounded_points)
if el.metadata.detection_class_prob:
el.metadata.detection_class_prob = round(el.metadata.detection_class_prob, 5)
out_elements.append(el)
return out_elements
def elements_to_json(
elements: List[Element],
filename: Optional[str] = None,
@ -87,7 +110,9 @@ def elements_to_json(
Saves a list of elements to a JSON file if filename is specified.
Otherwise, return the list of elements as a string.
"""
element_dict = convert_to_dict(elements)
pre_processed_elements = _fix_metadata_field_precision(elements)
element_dict = convert_to_dict(pre_processed_elements)
if filename is not None:
with open(filename, "w", encoding=encoding) as f:
json.dump(element_dict, f, indent=indent)