fix: remove .max_characters from ElementMetadata (#2032)

This metadata field is assumedly vestigial and is unused by any code in
the repo. `max_characters` is an optional argument to `chunk_by_title()`
and has meaning in that context, but is not written to the metadata.

Remove this unused field.
This commit is contained in:
Steve Canny 2023-11-08 11:56:31 -08:00 committed by GitHub
parent 0e2c21e5a2
commit c688216b38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 0 additions and 2 deletions

View File

@ -157,7 +157,6 @@ def test_default_pandas_dtypes():
emphasized_text_tags=["emphasized", "text", "tags"],
text_as_html="text_as_html",
regex_metadata={"key": [RegexMetadata(text="text", start=0, end=4)]},
max_characters=2,
is_continuation=True,
detection_class_prob=0.5,
),

View File

@ -188,7 +188,6 @@ class ElementMetadata:
regex_metadata: Optional[Dict[str, List[RegexMetadata]]] = None
# Chunking metadata fields
max_characters: Optional[int] = None
is_continuation: Optional[bool] = None
# Detection Model Class Probabilities from Unstructured-Inference Hi-Res