'
diff --git a/unstructured/chunking/base.py b/unstructured/chunking/base.py
index 90057d11d..b91c3982e 100644
--- a/unstructured/chunking/base.py
+++ b/unstructured/chunking/base.py
@@ -774,6 +774,8 @@ class TextPreChunk:
# -- Python 3.7+ maintains dict insertion order --
ordered_unique_keys = {key: None for val_list in values for key in val_list}
yield field_name, list(ordered_unique_keys.keys())
+ elif strategy is CS.STRING_CONCATENATE:
+ yield field_name, " ".join(val.strip() for val in values)
elif strategy is CS.DROP:
continue
else: # pragma: no cover
diff --git a/unstructured/documents/elements.py b/unstructured/documents/elements.py
index d6f4c3fc3..a9636b5d6 100644
--- a/unstructured/documents/elements.py
+++ b/unstructured/documents/elements.py
@@ -458,6 +458,9 @@ class ConsolidationStrategy(enum.Enum):
FIRST = "first"
"""Use the first value encountered, omit if not present in any elements."""
+ STRING_CONCATENATE = "string_concatenate"
+ """Combine the values of this field across elements. Only suitable for fields of `str` type."""
+
LIST_CONCATENATE = "LIST_CONCATENATE"
"""Concatenate the list values across elements. Only suitable for fields of `List` type."""
@@ -507,7 +510,7 @@ class ConsolidationStrategy(enum.Enum):
"sent_to": cls.FIRST,
"signature": cls.FIRST,
"subject": cls.FIRST,
- "text_as_html": cls.FIRST, # -- only occurs in Table --
+ "text_as_html": cls.STRING_CONCATENATE,
"table_as_cells": cls.FIRST, # -- only occurs in Table --
"url": cls.FIRST,
"key_value_pairs": cls.DROP, # -- only occurs in FormKeysValues --