From 51cf6bf7161d56eee4e108ada5dfa3c1c314f2cc Mon Sep 17 00:00:00 2001
From: Steve Canny <stcanny@gmail.com>
Date: Fri, 23 Feb 2024 10:22:44 -0800
Subject: [PATCH] rfctr(chunking): extract strategy-specific chunking options
 (#2556)

**Summary**
A pluggable chunking strategy needs its own local set of chunking
options that subclasses a base-class in `unstructured`.

Extract distinct `_ByTitleChunkingOptions` and `_BasicChunkingOptions`
for the existing two chunking strategies and move their
strategy-specific option setting and validation to the respective
subclass.

This was also a good opportunity for us to clean up a few odds and ends
we'd been meaning to.

Might be worth looking at the commits individually as they are cohesive
incremental steps toward the goal.
---
 test_unstructured/chunking/test_base.py  | 187 ++++++++++-------------
 test_unstructured/chunking/test_basic.py |   2 +-
 test_unstructured/chunking/test_title.py | 105 ++++++++++++-
 unstructured/chunking/base.py            |  91 +++--------
 unstructured/chunking/basic.py           |  36 ++++-
 unstructured/chunking/title.py           | 124 +++++++++++++--
 6 files changed, 340 insertions(+), 205 deletions(-)

diff --git a/test_unstructured/chunking/test_base.py b/test_unstructured/chunking/test_base.py
index 22ebf68e7..3a3674ced 100644
--- a/test_unstructured/chunking/test_base.py
+++ b/test_unstructured/chunking/test_base.py
@@ -4,15 +4,15 @@
 
 from __future__ import annotations
 
-from typing import Sequence
+from typing import Optional, Sequence
 
 import pytest
 
 from unstructured.chunking.base import (
-    BasePreChunker,
     ChunkingOptions,
     PreChunkBuilder,
     PreChunkCombiner,
+    PreChunker,
     TablePreChunk,
     TextPreChunk,
     TextPreChunkAccumulator,
@@ -48,7 +48,7 @@ class DescribeChunkingOptions:
             ValueError,
             match=f"'max_characters' argument must be > 0, got {max_characters}",
         ):
-            ChunkingOptions.new(max_characters=max_characters)
+            ChunkingOptions(max_characters=max_characters)._validate()
 
     def it_does_not_complain_when_specifying_max_characters_by_itself(self):
         """Caller can specify `max_characters` arg without specifying any others.
@@ -58,44 +58,25 @@ class DescribeChunkingOptions:
         and trigger an exception.
         """
         try:
-            ChunkingOptions.new(max_characters=50)
+            ChunkingOptions(max_characters=50)._validate()
         except ValueError:
             pytest.fail("did not accept `max_characters` as option by itself")
 
-    @pytest.mark.parametrize("n_chars", [-1, -42])
-    def it_rejects_combine_text_under_n_chars_for_n_less_than_zero(self, n_chars: int):
-        with pytest.raises(
-            ValueError,
-            match=f"'combine_text_under_n_chars' argument must be >= 0, got {n_chars}",
-        ):
-            ChunkingOptions.new(combine_text_under_n_chars=n_chars)
+    @pytest.mark.parametrize(
+        ("combine_text_under_n_chars", "expected_value"), [(None, 0), (42, 42)]
+    )
+    def it_accepts_combine_text_under_n_chars_in_constructor_but_defaults_to_no_combining(
+        self, combine_text_under_n_chars: Optional[int], expected_value: int
+    ):
+        """Subclasses can store `combine_text_under_n_chars` but must validate and enable it.
 
-    def it_accepts_0_for_combine_text_under_n_chars_to_disable_chunk_combining(self):
-        """Specifying `combine_text_under_n_chars=0` is how a caller disables chunk-combining."""
-        opts = ChunkingOptions.new(combine_text_under_n_chars=0)
-        assert opts.combine_text_under_n_chars == 0
-
-    def it_does_not_complain_when_specifying_combine_text_under_n_chars_by_itself(self):
-        """Caller can specify `combine_text_under_n_chars` arg without specifying other options."""
-        try:
-            opts = ChunkingOptions.new(combine_text_under_n_chars=50)
-        except ValueError:
-            pytest.fail("did not accept `combine_text_under_n_chars` as option by itself")
-
-        assert opts.combine_text_under_n_chars == 50
-
-    def it_silently_accepts_combine_text_under_n_chars_greater_than_maxchars(self):
-        """`combine_text_under_n_chars` > `max_characters` doesn't affect chunking behavior.
-
-        So rather than raising an exception or warning, we just cap that value at `max_characters`
-        which is the behavioral equivalent.
+        The `combine_text_under_n_chars` option is not used by all chunkers and its behavior can
+        differ between subtypes. It is present in and stored by the contructur but it defaults to
+        `0` (no pre-chunk combining) and must be overridden by subclasses to give it the desired
+        behavior.
         """
-        try:
-            opts = ChunkingOptions.new(max_characters=500, combine_text_under_n_chars=600)
-        except ValueError:
-            pytest.fail("did not accept `combine_text_under_n_chars` greater than `max_characters`")
-
-        assert opts.combine_text_under_n_chars == 500
+        opts = ChunkingOptions(combine_text_under_n_chars=combine_text_under_n_chars)
+        assert opts.combine_text_under_n_chars == expected_value
 
     @pytest.mark.parametrize("n_chars", [-1, -42])
     def it_rejects_new_after_n_chars_for_n_less_than_zero(self, n_chars: int):
@@ -103,7 +84,7 @@ class DescribeChunkingOptions:
             ValueError,
             match=f"'new_after_n_chars' argument must be >= 0, got {n_chars}",
         ):
-            ChunkingOptions.new(new_after_n_chars=n_chars)
+            ChunkingOptions(new_after_n_chars=n_chars)._validate()
 
     def it_rejects_overlap_not_less_than_max_characters(self):
         with pytest.raises(
@@ -113,26 +94,23 @@ class DescribeChunkingOptions:
             ChunkingOptions(max_characters=200, overlap=300)._validate()
 
     def it_does_not_complain_when_specifying_new_after_n_chars_by_itself(self):
-        """Caller can specify `new_after_n_chars` arg without specifying any other options.
-
-        In particular, `combine_text_under_n_chars` value is adjusted down to the
-        `new_after_n_chars` value when the default for `combine_text_under_n_chars` exceeds the
-        value of `new_after_n_chars`.
-        """
+        """Caller can specify `new_after_n_chars` arg without specifying any other options."""
+        opts = ChunkingOptions(new_after_n_chars=200)
         try:
-            opts = ChunkingOptions.new(new_after_n_chars=200)
+            opts._validate()
         except ValueError:
             pytest.fail("did not accept `new_after_n_chars` as option by itself")
 
         assert opts.soft_max == 200
-        assert opts.combine_text_under_n_chars == 200
 
     def it_accepts_0_for_new_after_n_chars_to_put_each_element_into_its_own_chunk(self):
         """Specifying `new_after_n_chars=0` places each element into its own pre-chunk.
 
         This puts each element into its own chunk, although long chunks are still split.
         """
-        opts = ChunkingOptions.new(new_after_n_chars=0)
+        opts = ChunkingOptions(new_after_n_chars=0)
+        opts._validate()
+
         assert opts.soft_max == 0
 
     def it_silently_accepts_new_after_n_chars_greater_than_maxchars(self):
@@ -141,33 +119,32 @@ class DescribeChunkingOptions:
         So rather than raising an exception or warning, we just cap that value at `max_characters`
         which is the behavioral equivalent.
         """
+        opts = ChunkingOptions(max_characters=444, new_after_n_chars=555)
         try:
-            opts = ChunkingOptions.new(max_characters=444, new_after_n_chars=555)
+            opts._validate()
         except ValueError:
             pytest.fail("did not accept `new_after_n_chars` greater than `max_characters`")
 
         assert opts.soft_max == 444
 
     def it_knows_how_much_overlap_to_apply_to_split_chunks(self):
-        assert ChunkingOptions.new(overlap=10).overlap == 10
+        assert ChunkingOptions(overlap=10).overlap == 10
 
     def and_it_uses_the_same_value_for_inter_chunk_overlap_when_asked_to_overlap_all_chunks(self):
-        assert ChunkingOptions.new(overlap=10, overlap_all=True).inter_chunk_overlap == 10
+        assert ChunkingOptions(overlap=10, overlap_all=True).inter_chunk_overlap == 10
 
     def but_it_does_not_overlap_pre_chunks_by_default(self):
-        assert ChunkingOptions.new(overlap=10).inter_chunk_overlap == 0
+        assert ChunkingOptions(overlap=10).inter_chunk_overlap == 0
 
     def it_knows_the_text_separator_string(self):
-        assert ChunkingOptions.new().text_separator == "\n\n"
+        assert ChunkingOptions().text_separator == "\n\n"
 
 
 class Describe_TextSplitter:
     """Unit-test suite for `unstructured.chunking.base._TextSplitter` objects."""
 
     def it_splits_on_a_preferred_separator_when_it_can(self):
-        opts = ChunkingOptions.new(
-            max_characters=50, text_splitting_separators=("\n", " "), overlap=10
-        )
+        opts = ChunkingOptions(max_characters=50, text_splitting_separators=("\n", " "), overlap=10)
         split = _TextSplitter(opts)
         text = (
             "Lorem ipsum dolor amet consectetur adipiscing.  \n  "
@@ -189,9 +166,7 @@ class Describe_TextSplitter:
         assert remainder == ""
 
     def and_it_splits_on_the_next_available_separator_when_the_first_is_not_available(self):
-        opts = ChunkingOptions.new(
-            max_characters=40, text_splitting_separators=("\n", " "), overlap=10
-        )
+        opts = ChunkingOptions(max_characters=40, text_splitting_separators=("\n", " "), overlap=10)
         split = _TextSplitter(opts)
         text = (
             "Lorem ipsum dolor amet consectetur adipiscing. In rhoncus ipsum sed lectus porta"
@@ -211,9 +186,7 @@ class Describe_TextSplitter:
         assert remainder == ""
 
     def and_it_splits_on_an_arbitrary_character_as_a_last_resort(self):
-        opts = ChunkingOptions.new(
-            max_characters=30, text_splitting_separators=("\n", " "), overlap=10
-        )
+        opts = ChunkingOptions(max_characters=30, text_splitting_separators=("\n", " "), overlap=10)
         split = _TextSplitter(opts)
         text = "Loremipsumdolorametconsecteturadipiscingelit. In rhoncus ipsum sed lectus porta."
 
@@ -237,7 +210,7 @@ class Describe_TextSplitter:
         ],
     )
     def it_does_not_split_a_string_that_is_not_longer_than_maxlen(self, text: str):
-        opts = ChunkingOptions.new(max_characters=46, overlap=10)
+        opts = ChunkingOptions(max_characters=46, overlap=10)
         split = _TextSplitter(opts)
 
         s, remainder = split(text)
@@ -246,7 +219,7 @@ class Describe_TextSplitter:
         assert remainder == ""
 
     def it_fills_the_window_when_falling_back_to_an_arbitrary_character_split(self):
-        opts = ChunkingOptions.new(max_characters=38, overlap=10)
+        opts = ChunkingOptions(max_characters=38, overlap=10)
         split = _TextSplitter(opts)
         text = "Loremipsumdolorametconsecteturadipiscingelit. In rhoncus ipsum sed lectus porta."
 
@@ -257,9 +230,7 @@ class Describe_TextSplitter:
 
     @pytest.mark.parametrize("separators", [("\n", " "), (" ",)])
     def it_strips_whitespace_around_the_split(self, separators: Sequence[str]):
-        opts = ChunkingOptions.new(
-            max_characters=50, text_splitting_separators=separators, overlap=10
-        )
+        opts = ChunkingOptions(max_characters=50, text_splitting_separators=separators, overlap=10)
         split = _TextSplitter(opts)
         text = "Lorem ipsum dolor amet consectetur adipiscing.   \n\n In rhoncus ipsum sed lectus."
         #       |-------------------------------------------------^  50-chars
@@ -271,12 +242,12 @@ class Describe_TextSplitter:
 
 
 # ================================================================================================
-# BASE PRE-CHUNKER
+# PRE-CHUNKER
 # ================================================================================================
 
 
-class DescribeBasePreChunker:
-    """Unit-test suite for `unstructured.chunking.base.BasePreChunker` objects."""
+class DescribePreChunker:
+    """Unit-test suite for `unstructured.chunking.base.PreChunker` objects."""
 
     def it_gathers_elements_into_pre_chunks_respecting_the_specified_chunk_size(self):
         elements = [
@@ -289,9 +260,9 @@ class DescribeBasePreChunker:
             CheckBox(),
         ]
 
-        opts = ChunkingOptions.new(max_characters=150, new_after_n_chars=65)
+        opts = ChunkingOptions(max_characters=150, new_after_n_chars=65)
 
-        pre_chunk_iter = BasePreChunker.iter_pre_chunks(elements, opts=opts)
+        pre_chunk_iter = PreChunker.iter_pre_chunks(elements, opts=opts)
 
         pre_chunk = next(pre_chunk_iter)
         assert isinstance(pre_chunk, TextPreChunk)
@@ -344,7 +315,7 @@ class DescribeTablePreChunk:
         pre_chunk = TablePreChunk(
             Table(text_table, metadata=ElementMetadata(text_as_html=html_table)),
             overlap_prefix="ctus porta volutpat.",
-            opts=ChunkingOptions.new(max_characters=175),
+            opts=ChunkingOptions(max_characters=175),
         )
 
         chunk_iter = pre_chunk.iter_chunks()
@@ -393,7 +364,7 @@ class DescribeTablePreChunk:
         pre_chunk = TablePreChunk(
             Table(text_table, metadata=ElementMetadata(text_as_html=html_table)),
             overlap_prefix="",
-            opts=ChunkingOptions.new(max_characters=100, text_splitting_separators=("\n", " ")),
+            opts=ChunkingOptions(max_characters=100, text_splitting_separators=("\n", " ")),
         )
 
         chunk_iter = pre_chunk.iter_chunks()
@@ -457,7 +428,7 @@ class DescribeTablePreChunk:
         self, text: str, expected_value: str
     ):
         pre_chunk = TablePreChunk(
-            Table(text), overlap_prefix="", opts=ChunkingOptions.new(overlap=20, overlap_all=True)
+            Table(text), overlap_prefix="", opts=ChunkingOptions(overlap=20, overlap_all=True)
         )
         assert pre_chunk.overlap_tail == expected_value
 
@@ -480,7 +451,7 @@ class DescribeTablePreChunk:
         self, text: str, overlap_prefix: str, expected_value: str
     ):
         pre_chunk = TablePreChunk(
-            Table(text), overlap_prefix=overlap_prefix, opts=ChunkingOptions.new()
+            Table(text), overlap_prefix=overlap_prefix, opts=ChunkingOptions()
         )
         assert pre_chunk._text == expected_value
 
@@ -536,7 +507,7 @@ class DescribeTextPreChunk:
         self, max_characters: int, combine_text_under_n_chars: int, expected_value: bool
     ):
         """This allows `PreChunkCombiner` to operate without knowing `TextPreChunk` internals."""
-        opts = ChunkingOptions.new(
+        opts = ChunkingOptions(
             max_characters=max_characters,
             combine_text_under_n_chars=combine_text_under_n_chars,
             overlap=20,
@@ -560,7 +531,7 @@ class DescribeTextPreChunk:
 
         Note that neither the original or other pre_chunk are mutated.
         """
-        opts = ChunkingOptions.new()
+        opts = ChunkingOptions()
         pre_chunk = TextPreChunk(
             [
                 Text("Lorem ipsum dolor sit amet consectetur adipiscing elit."),
@@ -623,7 +594,7 @@ class DescribeTextPreChunk:
                 ),
             ],
             overlap_prefix="e feugiat efficitur.",
-            opts=ChunkingOptions.new(max_characters=200),
+            opts=ChunkingOptions(max_characters=200),
         )
 
         chunk_iter = pre_chunk.iter_chunks()
@@ -648,7 +619,7 @@ class DescribeTextPreChunk:
                 ),
             ],
             overlap_prefix="",
-            opts=ChunkingOptions.new(max_characters=200, text_splitting_separators=("\n", " ")),
+            opts=ChunkingOptions(max_characters=200, text_splitting_separators=("\n", " ")),
         )
 
         chunk_iter = pre_chunk.iter_chunks()
@@ -681,7 +652,7 @@ class DescribeTextPreChunk:
         self, text: str, expected_value: str
     ):
         pre_chunk = TextPreChunk(
-            [Text(text)], overlap_prefix="", opts=ChunkingOptions.new(overlap=20, overlap_all=True)
+            [Text(text)], overlap_prefix="", opts=ChunkingOptions(overlap=20, overlap_all=True)
         )
         assert pre_chunk.overlap_tail == expected_value
 
@@ -708,7 +679,7 @@ class DescribeTextPreChunk:
                 ),
             ],
             overlap_prefix="",
-            opts=ChunkingOptions.new(),
+            opts=ChunkingOptions(),
         )
 
         assert pre_chunk._all_metadata_values == {
@@ -746,7 +717,7 @@ class DescribeTextPreChunk:
                 Text("'Lorem ipsum dolor' means 'Thank you very much'.", metadata=metadata_2),
             ],
             overlap_prefix="",
-            opts=ChunkingOptions.new(),
+            opts=ChunkingOptions(),
         )
 
         # -- ad-hoc fields "coefficient" and "quotient" do not appear --
@@ -789,7 +760,7 @@ class DescribeTextPreChunk:
                 ),
             ],
             overlap_prefix="ficitur.",  # len == 8
-            opts=ChunkingOptions.new(),
+            opts=ChunkingOptions(),
         )
 
         regex_metadata = pre_chunk._consolidated_regex_meta
@@ -845,7 +816,7 @@ class DescribeTextPreChunk:
                 ),
             ],
             overlap_prefix="",
-            opts=ChunkingOptions.new(),
+            opts=ChunkingOptions(),
         )
 
         meta_kwargs = pre_chunk._meta_kwargs
@@ -881,9 +852,7 @@ class DescribeTextPreChunk:
         The text-segment contributed by each element is separated from the next by a blank line
         ("\n\n"). An element that contributes no text does not give rise to a separator.
         """
-        pre_chunk = TextPreChunk(
-            elements, overlap_prefix=overlap_prefix, opts=ChunkingOptions.new()
-        )
+        pre_chunk = TextPreChunk(elements, overlap_prefix=overlap_prefix, opts=ChunkingOptions())
         assert pre_chunk._text == expected_value
 
 
@@ -896,13 +865,13 @@ class DescribePreChunkBuilder:
     """Unit-test suite for `unstructured.chunking.base.PreChunkBuilder`."""
 
     def it_is_empty_on_construction(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=50))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=50))
 
         assert builder._text_length == 0
         assert builder._remaining_space == 50
 
     def it_accumulates_elements_added_to_it(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=150))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=150))
 
         builder.add_element(Title("Introduction"))
         assert builder._text_length == 12
@@ -919,7 +888,7 @@ class DescribePreChunkBuilder:
 
     @pytest.mark.parametrize("element", [Table("Heading\nCell text"), Text("abcd " * 200)])
     def it_will_fit_a_Table_or_oversized_element_when_empty(self, element: Element):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new())
+        builder = PreChunkBuilder(opts=ChunkingOptions())
         assert builder.will_fit(element)
 
     @pytest.mark.parametrize(
@@ -934,22 +903,20 @@ class DescribePreChunkBuilder:
     def but_not_when_it_already_contains_an_element_of_any_kind(
         self, existing_element: Element, next_element: Element
     ):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new())
+        builder = PreChunkBuilder(opts=ChunkingOptions())
         builder.add_element(existing_element)
 
         assert not builder.will_fit(next_element)
 
     @pytest.mark.parametrize("element", [Text("abcd"), Table("Fruits\nMango")])
     def it_will_not_fit_any_element_when_it_already_contains_a_table(self, element: Element):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new())
+        builder = PreChunkBuilder(opts=ChunkingOptions())
         builder.add_element(Table("Heading\nCell text"))
 
         assert not builder.will_fit(element)
 
     def it_will_not_fit_an_element_when_it_already_exceeds_the_soft_maxlen(self):
-        builder = PreChunkBuilder(
-            opts=ChunkingOptions.new(max_characters=100, new_after_n_chars=50)
-        )
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=100, new_after_n_chars=50))
         builder.add_element(
             Text("Lorem ipsum dolor sit amet consectetur adipiscing elit.")  # 55-chars
         )
@@ -957,7 +924,7 @@ class DescribePreChunkBuilder:
         assert not builder.will_fit(Text("In rhoncus ipsum."))
 
     def and_it_will_not_fit_an_element_when_that_would_cause_it_to_exceed_the_hard_maxlen(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=100))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=100))
         builder.add_element(
             Text("Lorem ipsum dolor sit amet consectetur adipiscing elit.")  # 55-chars
         )
@@ -968,7 +935,7 @@ class DescribePreChunkBuilder:
         )
 
     def but_it_will_fit_an_element_that_fits(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=100))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=100))
         builder.add_element(
             Text("Lorem ipsum dolor sit amet consectetur adipiscing elit.")  # 55-chars
         )
@@ -977,7 +944,7 @@ class DescribePreChunkBuilder:
         assert builder.will_fit(Text("In rhoncus ipsum sed lectus porto volutpat."))  # 43-chars
 
     def it_generates_a_TextPreChunk_when_flushed_and_resets_itself_to_empty(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=150))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=150))
         builder.add_element(Title("Introduction"))
         builder.add_element(
             Text(
@@ -1000,7 +967,7 @@ class DescribePreChunkBuilder:
         assert builder._remaining_space == 150
 
     def and_it_generates_a_TablePreChunk_when_it_contains_a_Table_element(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=150))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=150))
         builder.add_element(Table("Heading\nCell text"))
 
         pre_chunk = next(builder.flush())
@@ -1016,7 +983,7 @@ class DescribePreChunkBuilder:
         assert pre_chunk._table == Table("Heading\nCell text")
 
     def but_it_does_not_generate_a_pre_chunk_on_flush_when_empty(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=150))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=150))
 
         pre_chunks = list(builder.flush())
 
@@ -1025,7 +992,7 @@ class DescribePreChunkBuilder:
         assert builder._remaining_space == 150
 
     def it_computes_overlap_from_each_pre_chunk_and_applies_it_to_the_next(self):
-        opts = ChunkingOptions.new(overlap=15, overlap_all=True)
+        opts = ChunkingOptions(overlap=15, overlap_all=True)
         builder = PreChunkBuilder(opts=opts)
 
         builder.add_element(Text("Lorem ipsum dolor sit amet consectetur adipiscing elit."))
@@ -1044,7 +1011,7 @@ class DescribePreChunkBuilder:
         assert pre_chunk._text == "porta volutpat.\n\nDonec semper facilisis metus finibus."
 
     def it_considers_separator_length_when_computing_text_length_and_remaining_space(self):
-        builder = PreChunkBuilder(opts=ChunkingOptions.new(max_characters=50))
+        builder = PreChunkBuilder(opts=ChunkingOptions(max_characters=50))
         builder.add_element(Text("abcde"))
         builder.add_element(Text("fghij"))
 
@@ -1061,7 +1028,7 @@ class DescribePreChunkCombiner:
     """Unit-test suite for `unstructured.chunking.base.PreChunkCombiner`."""
 
     def it_combines_sequential_small_text_pre_chunks(self):
-        opts = ChunkingOptions.new(max_characters=250, combine_text_under_n_chars=250)
+        opts = ChunkingOptions(max_characters=250, combine_text_under_n_chars=250)
         pre_chunks = [
             TextPreChunk(
                 [
@@ -1105,7 +1072,7 @@ class DescribePreChunkCombiner:
             next(pre_chunk_iter)
 
     def but_it_does_not_combine_table_pre_chunks(self):
-        opts = ChunkingOptions.new(max_characters=250, combine_text_under_n_chars=250)
+        opts = ChunkingOptions(max_characters=250, combine_text_under_n_chars=250)
         pre_chunks = [
             TextPreChunk(
                 [
@@ -1127,7 +1094,7 @@ class DescribePreChunkCombiner:
         ]
 
         pre_chunk_iter = PreChunkCombiner(
-            pre_chunks, ChunkingOptions.new(max_characters=250, combine_text_under_n_chars=250)
+            pre_chunks, ChunkingOptions(max_characters=250, combine_text_under_n_chars=250)
         ).iter_combined_pre_chunks()
 
         pre_chunk = next(pre_chunk_iter)
@@ -1152,7 +1119,7 @@ class DescribePreChunkCombiner:
             next(pre_chunk_iter)
 
     def it_respects_the_specified_combination_threshold(self):
-        opts = ChunkingOptions.new(max_characters=250, combine_text_under_n_chars=80)
+        opts = ChunkingOptions(max_characters=250, combine_text_under_n_chars=80)
         pre_chunks = [
             TextPreChunk(  # 68
                 [
@@ -1203,7 +1170,7 @@ class DescribePreChunkCombiner:
             next(pre_chunk_iter)
 
     def it_respects_the_hard_maximum_window_length(self):
-        opts = ChunkingOptions.new(max_characters=200, combine_text_under_n_chars=200)
+        opts = ChunkingOptions(max_characters=200, combine_text_under_n_chars=200)
         pre_chunks = [
             TextPreChunk(  # 68
                 [
@@ -1256,7 +1223,7 @@ class DescribePreChunkCombiner:
 
     def it_accommodates_and_isolates_an_oversized_pre_chunk(self):
         """Such as occurs when a single element exceeds the window size."""
-        opts = ChunkingOptions.new(max_characters=150, combine_text_under_n_chars=150)
+        opts = ChunkingOptions(max_characters=150, combine_text_under_n_chars=150)
         pre_chunks = [
             TextPreChunk([Title("Lorem Ipsum")], overlap_prefix="", opts=opts),
             TextPreChunk(  # 179
@@ -1274,7 +1241,7 @@ class DescribePreChunkCombiner:
         ]
 
         pre_chunk_iter = PreChunkCombiner(
-            pre_chunks, ChunkingOptions.new(max_characters=150, combine_text_under_n_chars=150)
+            pre_chunks, ChunkingOptions(max_characters=150, combine_text_under_n_chars=150)
         ).iter_combined_pre_chunks()
 
         pre_chunk = next(pre_chunk_iter)
@@ -1303,7 +1270,7 @@ class DescribeTextPreChunkAccumulator:
     """Unit-test suite for `unstructured.chunking.base.TextPreChunkAccumulator`."""
 
     def it_generates_a_combined_TextPreChunk_when_flushed_and_resets_itself_to_empty(self):
-        opts = ChunkingOptions.new()
+        opts = ChunkingOptions(combine_text_under_n_chars=500)
         accum = TextPreChunkAccumulator(opts=opts)
 
         pre_chunk = TextPreChunk(
@@ -1362,7 +1329,7 @@ class DescribeTextPreChunkAccumulator:
             next(accum.flush())
 
     def but_it_does_not_generate_a_TextPreChunk_on_flush_when_empty(self):
-        accum = TextPreChunkAccumulator(opts=ChunkingOptions.new(max_characters=150))
+        accum = TextPreChunkAccumulator(opts=ChunkingOptions(max_characters=150))
         assert list(accum.flush()) == []
 
 
diff --git a/test_unstructured/chunking/test_basic.py b/test_unstructured/chunking/test_basic.py
index b74f28c53..acc6b049d 100644
--- a/test_unstructured/chunking/test_basic.py
+++ b/test_unstructured/chunking/test_basic.py
@@ -1,4 +1,4 @@
-"""Unit-test suite for the `unstructured.chunking.basic` module.
+"""Test suite for the `unstructured.chunking.basic` module.
 
 That module implements the baseline chunking strategy. The baseline strategy has all behaviors
 shared by all chunking strategies and no extra rules like perserve section or page boundaries.
diff --git a/test_unstructured/chunking/test_title.py b/test_unstructured/chunking/test_title.py
index 319d31d01..54db22a58 100644
--- a/test_unstructured/chunking/test_title.py
+++ b/test_unstructured/chunking/test_title.py
@@ -1,13 +1,20 @@
 # pyright: reportPrivateUsage=false
 
-"""Unit-test suite for the `unstructured.chunking.title` module."""
+"""Test suite for the `unstructured.chunking.title` module."""
 
 from __future__ import annotations
 
+from typing import Optional
+
 import pytest
 
-from unstructured.chunking.base import ChunkingOptions, TablePreChunk, TextPreChunk
-from unstructured.chunking.title import _ByTitlePreChunker, chunk_by_title
+from unstructured.chunking.base import (
+    CHUNK_MULTI_PAGE_DEFAULT,
+    PreChunker,
+    TablePreChunk,
+    TextPreChunk,
+)
+from unstructured.chunking.title import _ByTitleChunkingOptions, chunk_by_title
 from unstructured.documents.coordinates import CoordinateSystem
 from unstructured.documents.elements import (
     CheckBox,
@@ -23,6 +30,13 @@ from unstructured.documents.elements import (
 )
 from unstructured.partition.html import partition_html
 
+# ================================================================================================
+# INTEGRATION-TESTS
+# ================================================================================================
+# These test `chunk_by_title()` as an integrated whole, calling `chunk_by_title()` and inspecting
+# the outputs.
+# ================================================================================================
+
 
 def test_it_splits_a_large_element_into_multiple_chunks():
     elements: list[Element] = [
@@ -57,7 +71,7 @@ def test_split_elements_by_title_and_table():
         CheckBox(),
     ]
 
-    pre_chunks = _ByTitlePreChunker.iter_pre_chunks(elements, opts=ChunkingOptions.new())
+    pre_chunks = PreChunker.iter_pre_chunks(elements, opts=_ByTitleChunkingOptions.new())
 
     pre_chunk = next(pre_chunks)
     assert isinstance(pre_chunk, TextPreChunk)
@@ -544,3 +558,86 @@ def test_it_considers_separator_length_when_pre_chunking():
         ),
         CompositeElement("Minimize mid-text chunk-splitting"),
     ]
+
+
+# ================================================================================================
+# UNIT-TESTS
+# ================================================================================================
+# These test individual components in isolation so can exercise all edge cases while still
+# performing well.
+# ================================================================================================
+
+
+class Describe_ByTitleChunkingOptions:
+    """Unit-test suite for `unstructured.chunking.title._ByTitleChunkingOptions` objects."""
+
+    @pytest.mark.parametrize("n_chars", [-1, -42])
+    def it_rejects_combine_text_under_n_chars_for_n_less_than_zero(self, n_chars: int):
+        with pytest.raises(
+            ValueError,
+            match=f"'combine_text_under_n_chars' argument must be >= 0, got {n_chars}",
+        ):
+            _ByTitleChunkingOptions.new(combine_text_under_n_chars=n_chars)
+
+    def it_accepts_0_for_combine_text_under_n_chars_to_disable_chunk_combining(self):
+        """Specifying `combine_text_under_n_chars=0` is how a caller disables chunk-combining."""
+        opts = _ByTitleChunkingOptions(combine_text_under_n_chars=0)
+        assert opts.combine_text_under_n_chars == 0
+
+    def it_does_not_complain_when_specifying_combine_text_under_n_chars_by_itself(self):
+        """Caller can specify `combine_text_under_n_chars` arg without specifying other options."""
+        try:
+            opts = _ByTitleChunkingOptions(combine_text_under_n_chars=50)
+        except ValueError:
+            pytest.fail("did not accept `combine_text_under_n_chars` as option by itself")
+
+        assert opts.combine_text_under_n_chars == 50
+
+    @pytest.mark.parametrize(
+        ("combine_text_under_n_chars", "max_characters", "expected_hard_max"),
+        [(600, None, 500), (600, 450, 450)],
+    )
+    def it_rejects_combine_text_under_n_chars_greater_than_maxchars(
+        self, combine_text_under_n_chars: int, max_characters: Optional[int], expected_hard_max: int
+    ):
+        """`combine_text_under_n_chars` > `max_characters` can produce behavior confusing to users.
+
+        The behavior is no different from `combine_text_under_n_chars == max_characters`, but if
+        `max_characters` is left to default (500) and `combine_text_under_n_chars` is set to a
+        larger number like 1500 then it can look like chunk-combining isn't working.
+        """
+        with pytest.raises(
+            ValueError,
+            match=(
+                "'combine_text_under_n_chars' argument must not exceed `max_characters` value,"
+                f" got {combine_text_under_n_chars} > {expected_hard_max}"
+            ),
+        ):
+            _ByTitleChunkingOptions.new(
+                max_characters=max_characters, combine_text_under_n_chars=combine_text_under_n_chars
+            )
+
+    def it_does_not_complain_when_specifying_new_after_n_chars_by_itself(self):
+        """Caller can specify `new_after_n_chars` arg without specifying any other options.
+
+        In particular, `combine_text_under_n_chars` value is adjusted down to the
+        `new_after_n_chars` value when the default for `combine_text_under_n_chars` exceeds the
+        value of `new_after_n_chars`.
+        """
+        try:
+            opts = _ByTitleChunkingOptions(new_after_n_chars=200)
+        except ValueError:
+            pytest.fail("did not accept `new_after_n_chars` as option by itself")
+
+        assert opts.soft_max == 200
+        assert opts.combine_text_under_n_chars == 200
+
+    @pytest.mark.parametrize(
+        ("multipage_sections", "expected_value"),
+        [(True, True), (False, False), (None, CHUNK_MULTI_PAGE_DEFAULT)],
+    )
+    def it_knows_whether_to_break_chunks_on_page_boundaries(
+        self, multipage_sections: bool, expected_value: bool
+    ):
+        opts = _ByTitleChunkingOptions(multipage_sections=multipage_sections)
+        assert opts.multipage_sections is expected_value
diff --git a/unstructured/chunking/base.py b/unstructured/chunking/base.py
index c9958b461..66be1ce96 100644
--- a/unstructured/chunking/base.py
+++ b/unstructured/chunking/base.py
@@ -7,7 +7,7 @@ import copy
 from typing import Any, Callable, DefaultDict, Iterable, Iterator, Optional, Sequence, cast
 
 import regex
-from typing_extensions import Self, TypeAlias
+from typing_extensions import TypeAlias
 
 from unstructured.documents.elements import (
     CompositeElement,
@@ -68,9 +68,6 @@ class ChunkingOptions:
         when not specified, which effectively disables this behavior. Specifying 0 for this
         argument causes each element to appear in a chunk by itself (although an element with text
         longer than `max_characters` will be still be split into two or more chunks).
-    multipage_sections
-        Indicates that page-boundaries should not be respected while chunking, i.e. elements
-        appearing on two different pages can appear in the same chunk.
     combine_text_under_n_chars
         Provides a way to "recombine" small chunks formed by breaking on a semantic boundary. Only
         relevant for a chunking strategy that specifies higher-level semantic boundaries to be
@@ -101,9 +98,9 @@ class ChunkingOptions:
 
     def __init__(
         self,
+        *,
         combine_text_under_n_chars: Optional[int] = None,
         max_characters: Optional[int] = None,
-        multipage_sections: Optional[bool] = None,
         new_after_n_chars: Optional[int] = None,
         overlap: Optional[int] = None,
         overlap_all: Optional[bool] = None,
@@ -111,59 +108,28 @@ class ChunkingOptions:
     ):
         self._combine_text_under_n_chars_arg = combine_text_under_n_chars
         self._max_characters_arg = max_characters
-        self._multipage_sections_arg = multipage_sections
         self._new_after_n_chars_arg = new_after_n_chars
         self._overlap_arg = overlap
         self._overlap_all_arg = overlap_all
         self._text_splitting_separators = text_splitting_separators
 
-    @classmethod
-    def new(
-        cls,
-        combine_text_under_n_chars: Optional[int] = None,
-        max_characters: Optional[int] = None,
-        multipage_sections: Optional[bool] = None,
-        new_after_n_chars: Optional[int] = None,
-        overlap: Optional[int] = None,
-        overlap_all: Optional[bool] = None,
-        text_splitting_separators: Sequence[str] = ("\n", " "),
-    ) -> Self:
-        """Construct validated instance.
+    @lazyproperty
+    def boundary_predicates(self) -> tuple[BoundaryPredicate, ...]:
+        """The semantic-boundary detectors to be applied to break pre-chunks.
 
-        Raises `ValueError` on invalid arguments like overlap > max_chars.
+        Overridden by sub-typs to provide semantic-boundary isolation behaviors.
         """
-        self = cls(
-            combine_text_under_n_chars,
-            max_characters,
-            multipage_sections,
-            new_after_n_chars,
-            overlap,
-            overlap_all,
-            text_splitting_separators,
-        )
-        self._validate()
-        return self
+        return ()
 
     @lazyproperty
     def combine_text_under_n_chars(self) -> int:
-        """Combine consecutive text pre-chunks if former is smaller than this and both will fit.
+        """Combine two consecutive text pre-chunks if first is smaller than this and both will fit.
 
-        - Does not combine table chunks with text chunks even if they would both fit in the
-          chunking window.
-        - Does not combine text chunks if together they would exceed the chunking window.
-        - Defaults to `max_characters` when not specified.
-        - Is reduced to `new_after_n_chars` when it exceeds that value.
+        Default applied here is `0` which essentially disables chunk combining. Must be overridden
+        by subclass where combining behavior is supported.
         """
-        max_characters = self.hard_max
-        soft_max = self.soft_max
-        arg = self._combine_text_under_n_chars_arg
-
-        # -- `combine_text_under_n_chars` defaults to `max_characters` when not specified and is
-        # -- capped at max-chars
-        combine_text_under_n_chars = max_characters if arg is None or arg > max_characters else arg
-
-        # -- `new_after_n_chars` takes precendence on conflict with `combine_text_under_n_chars` --
-        return soft_max if combine_text_under_n_chars > soft_max else combine_text_under_n_chars
+        arg_value = self._combine_text_under_n_chars_arg
+        return arg_value if arg_value is not None else 0
 
     @lazyproperty
     def hard_max(self) -> int:
@@ -185,12 +151,6 @@ class ChunkingOptions:
         """
         return self.overlap if self._overlap_all_arg else 0
 
-    @lazyproperty
-    def multipage_sections(self) -> bool:
-        """When False, break pre-chunks on page-boundaries."""
-        arg_value = self._multipage_sections_arg
-        return CHUNK_MULTI_PAGE_DEFAULT if arg_value is None else bool(arg_value)
-
     @lazyproperty
     def overlap(self) -> int:
         """The number of characters to overlap text when splitting chunks mid-text.
@@ -256,28 +216,15 @@ class ChunkingOptions:
         if max_characters <= 0:
             raise ValueError(f"'max_characters' argument must be > 0," f" got {max_characters}")
 
-        # -- `combine_text_under_n_chars == 0` is valid (suppresses chunk combination)
-        # -- but a negative value is not
-        combine_text_under_n_chars = self._combine_text_under_n_chars_arg
-        if combine_text_under_n_chars is not None and combine_text_under_n_chars < 0:
-            raise ValueError(
-                f"'combine_text_under_n_chars' argument must be >= 0,"
-                f" got {combine_text_under_n_chars}"
-            )
-
-        # -- a negative value for `new_after_n_chars` is assumed to
-        # -- be a mistake the caller will want to know about
+        # -- a negative value for `new_after_n_chars` is assumed to be a mistake the caller will
+        # -- want to know about
         new_after_n_chars = self._new_after_n_chars_arg
         if new_after_n_chars is not None and new_after_n_chars < 0:
             raise ValueError(
                 f"'new_after_n_chars' argument must be >= 0," f" got {new_after_n_chars}"
             )
 
-        # -- overlap must be less than max-chars or the chunk text will
-        # -- never be consumed
-        # TODO: consider a heuristic like never overlap more than half,
-        # otherwise there could be corner cases leading to an infinite
-        # loop (I think).
+        # -- overlap must be less than max-chars or the chunk text will never be consumed --
         if self.overlap >= max_characters:
             raise ValueError(
                 f"'overlap' argument must be less than `max_characters`,"
@@ -402,12 +349,12 @@ class _TextSplitter:
 
 
 # ================================================================================================
-# BASE PRE-CHUNKER
+# PRE-CHUNKER
 # ================================================================================================
 
 
-class BasePreChunker:
-    """Base-class for per-strategy pre-chunkers.
+class PreChunker:
+    """Gathers sequential elements into pre-chunks as length constraints allow.
 
     The pre-chunker's responsibilities are:
 
@@ -465,7 +412,7 @@ class BasePreChunker:
     @lazyproperty
     def _boundary_predicates(self) -> tuple[BoundaryPredicate, ...]:
         """The semantic-boundary detectors to be applied to break pre-chunks."""
-        return ()
+        return self._opts.boundary_predicates
 
     def _is_in_new_semantic_unit(self, element: Element) -> bool:
         """True when `element` begins a new semantic unit such as a section or page."""
diff --git a/unstructured/chunking/basic.py b/unstructured/chunking/basic.py
index d15e7eac2..6f4ae49de 100644
--- a/unstructured/chunking/basic.py
+++ b/unstructured/chunking/basic.py
@@ -17,7 +17,9 @@ from __future__ import annotations
 
 from typing import Iterable, Optional
 
-from unstructured.chunking.base import BasePreChunker, ChunkingOptions
+from typing_extensions import Self
+
+from unstructured.chunking.base import ChunkingOptions, PreChunker
 from unstructured.documents.elements import Element
 
 
@@ -58,7 +60,7 @@ def chunk_elements(
         level of "pollution" of otherwise clean semantic chunk boundaries.
     """
     # -- raises ValueError on invalid parameters --
-    opts = ChunkingOptions.new(
+    opts = _BasicChunkingOptions.new(
         max_characters=max_characters,
         new_after_n_chars=new_after_n_chars,
         overlap=overlap,
@@ -67,14 +69,32 @@ def chunk_elements(
 
     return [
         chunk
-        for pre_chunk in BasicPreChunker.iter_pre_chunks(elements, opts)
+        for pre_chunk in PreChunker.iter_pre_chunks(elements, opts)
         for chunk in pre_chunk.iter_chunks()
     ]
 
 
-class BasicPreChunker(BasePreChunker):
-    """Produces pre-chunks from a sequence of document-elements using the "basic" rule-set.
+class _BasicChunkingOptions(ChunkingOptions):
+    """Options for `basic` chunking."""
 
-    The "basic" rule-set is essentially "no-rules" other than `Table` is segregated into its own
-    pre-chunk.
-    """
+    @classmethod
+    def new(
+        cls,
+        *,
+        max_characters: Optional[int] = None,
+        new_after_n_chars: Optional[int] = None,
+        overlap: Optional[int] = None,
+        overlap_all: Optional[bool] = None,
+    ) -> Self:
+        """Construct validated instance.
+
+        Raises `ValueError` on invalid arguments like overlap > max_chars.
+        """
+        self = cls(
+            max_characters=max_characters,
+            new_after_n_chars=new_after_n_chars,
+            overlap=overlap,
+            overlap_all=overlap_all,
+        )
+        self._validate()
+        return self
diff --git a/unstructured/chunking/title.py b/unstructured/chunking/title.py
index 432019c25..a26dd5ebb 100644
--- a/unstructured/chunking/title.py
+++ b/unstructured/chunking/title.py
@@ -7,11 +7,14 @@ from __future__ import annotations
 
 from typing import Iterable, Iterator, Optional
 
+from typing_extensions import Self
+
 from unstructured.chunking.base import (
-    BasePreChunker,
+    CHUNK_MULTI_PAGE_DEFAULT,
     BoundaryPredicate,
     ChunkingOptions,
     PreChunkCombiner,
+    PreChunker,
     is_in_next_section,
     is_on_next_page,
     is_title,
@@ -22,6 +25,7 @@ from unstructured.utils import lazyproperty
 
 def chunk_by_title(
     elements: Iterable[Element],
+    *,
     max_characters: Optional[int] = None,
     multipage_sections: Optional[bool] = None,
     combine_text_under_n_chars: Optional[int] = None,
@@ -65,7 +69,7 @@ def chunk_by_title(
         elements and not subject to text-splitting. Use this with caution as it entails a certain
         level of "pollution" of otherwise clean semantic chunk boundaries.
     """
-    opts = ChunkingOptions.new(
+    opts = _ByTitleChunkingOptions.new(
         combine_text_under_n_chars=combine_text_under_n_chars,
         max_characters=max_characters,
         multipage_sections=multipage_sections,
@@ -75,27 +79,127 @@ def chunk_by_title(
     )
 
     pre_chunks = PreChunkCombiner(
-        _ByTitlePreChunker.iter_pre_chunks(elements, opts), opts=opts
+        PreChunker.iter_pre_chunks(elements, opts), opts=opts
     ).iter_combined_pre_chunks()
 
     return [chunk for pre_chunk in pre_chunks for chunk in pre_chunk.iter_chunks()]
 
 
-class _ByTitlePreChunker(BasePreChunker):
-    """Pre-chunker for the "by_title" chunking strategy.
+class _ByTitleChunkingOptions(ChunkingOptions):
+    """Adds the by-title-specific chunking options to the base case.
 
-    The "by-title" strategy specifies breaking on section boundaries; a `Title` element indicates a
-    new "section", hence the "by-title" designation.
+    `by_title`-specific options:
+
+    multipage_sections
+        Indicates that page-boundaries should not be respected while chunking, i.e. elements
+        appearing on two different pages can appear in the same chunk.
     """
 
+    def __init__(
+        self,
+        *,
+        max_characters: Optional[int] = None,
+        combine_text_under_n_chars: Optional[int] = None,
+        multipage_sections: Optional[bool] = None,
+        new_after_n_chars: Optional[int] = None,
+        overlap: Optional[int] = None,
+        overlap_all: Optional[bool] = None,
+    ):
+        super().__init__(
+            combine_text_under_n_chars=combine_text_under_n_chars,
+            max_characters=max_characters,
+            new_after_n_chars=new_after_n_chars,
+            overlap=overlap,
+            overlap_all=overlap_all,
+        )
+        self._multipage_sections_arg = multipage_sections
+
+    @classmethod
+    def new(
+        cls,
+        *,
+        max_characters: Optional[int] = None,
+        combine_text_under_n_chars: Optional[int] = None,
+        multipage_sections: Optional[bool] = None,
+        new_after_n_chars: Optional[int] = None,
+        overlap: Optional[int] = None,
+        overlap_all: Optional[bool] = None,
+    ) -> Self:
+        """Return instance or raises `ValueError` on invalid arguments like overlap > max_chars."""
+        self = cls(
+            max_characters=max_characters,
+            combine_text_under_n_chars=combine_text_under_n_chars,
+            multipage_sections=multipage_sections,
+            new_after_n_chars=new_after_n_chars,
+            overlap=overlap,
+            overlap_all=overlap_all,
+        )
+        self._validate()
+        return self
+
     @lazyproperty
-    def _boundary_predicates(self) -> tuple[BoundaryPredicate, ...]:
-        """The semantic-boundary detectors to be applied to break pre-chunks."""
+    def boundary_predicates(self) -> tuple[BoundaryPredicate, ...]:
+        """The semantic-boundary detectors to be applied to break pre-chunks.
+
+        For the `by_title` strategy these are sections indicated by a title (section-heading), an
+        explicit section metadata item (only present for certain document types), and optionally
+        page boundaries.
+        """
 
         def iter_boundary_predicates() -> Iterator[BoundaryPredicate]:
             yield is_title
             yield is_in_next_section()
-            if not self._opts.multipage_sections:
+            if not self.multipage_sections:
                 yield is_on_next_page()
 
         return tuple(iter_boundary_predicates())
+
+    @lazyproperty
+    def combine_text_under_n_chars(self) -> int:
+        """Combine consecutive text pre-chunks if former is smaller than this and both will fit.
+
+        - Does not combine table chunks with text chunks even if they would both fit in the
+          chunking window.
+        - Does not combine text chunks if together they would exceed the chunking window.
+        - Defaults to `max_characters` when not specified.
+        - Is reduced to `new_after_n_chars` when it exceeds that value.
+        """
+        max_characters = self.hard_max
+        soft_max = self.soft_max
+        arg_value = self._combine_text_under_n_chars_arg
+
+        # -- `combine_text_under_n_chars` defaults to `max_characters` when not specified --
+        combine_text_under_n_chars = max_characters if arg_value is None else arg_value
+
+        # -- `new_after_n_chars` takes precendence on conflict with `combine_text_under_n_chars` --
+        return soft_max if combine_text_under_n_chars > soft_max else combine_text_under_n_chars
+
+    @lazyproperty
+    def multipage_sections(self) -> bool:
+        """When False, break pre-chunks on page-boundaries."""
+        arg_value = self._multipage_sections_arg
+        return CHUNK_MULTI_PAGE_DEFAULT if arg_value is None else bool(arg_value)
+
+    def _validate(self) -> None:
+        """Raise ValueError if request option-set is invalid."""
+        # -- start with base-class validations --
+        super()._validate()
+
+        if (combine_text_under_n_chars_arg := self._combine_text_under_n_chars_arg) is not None:
+            # -- `combine_text_under_n_chars == 0` is valid (suppresses chunk combination)
+            # -- but a negative value is not
+            if combine_text_under_n_chars_arg < 0:
+                raise ValueError(
+                    f"'combine_text_under_n_chars' argument must be >= 0,"
+                    f" got {combine_text_under_n_chars_arg}"
+                )
+
+            # -- `combine_text_under_n_chars` > `max_characters` can produce behavior confusing to
+            # -- users. The chunking behavior would be no different than when
+            # -- `combine_text_under_n_chars == max_characters`, but if `max_characters` is left to
+            # -- default (500) then it can look like chunk-combining isn't working.
+            if combine_text_under_n_chars_arg > self.hard_max:
+                raise ValueError(
+                    f"'combine_text_under_n_chars' argument must not exceed `max_characters`"
+                    f" value, got {combine_text_under_n_chars_arg} > {self.hard_max}"
+                )