unstructured/setup.cfg
Steve Canny eb1b022ff8
feat(chunking): add overlap on chunk-splits (#2305)
There are two distinct overlap operations with completely different
implementations. This is "intra-chunk" overlap, applying overlap to
chunks resulting from text-splitting an oversized element.

So if an oversized element had text "abcd efgh ijkl mnop qrst" and was
split at 15 chars with overlap of 5, it would produce "abcd efgh ijkl"
and "ijkl mnop qrst". Any inter-chunk overlap from the prior chunk and
applied at the beginning of the string (before "abcd") is handled in a
separate operation in the next PR.
2023-12-22 20:35:18 +00:00

31 lines
618 B
INI

[metadata]
license_files = LICENSE.md
[flake8]
ignore = E203,W503
max-line-length = 100
exclude =
.venv
unstructured-inference
per-file-ignores =
*: T20
[tool:pytest]
filterwarnings =
ignore::DeprecationWarning
python_classes = Test Describe
python_functions = test_ it_ they_ but_ and_
markers =
chipper: mark a test as running chipper, which tends to be slow and compute-heavy.
testpaths =
test_unstructured
test_unstructured_ingest
[autoflake]
expand_star_imports=true
ignore_pass_statements=false
recursive=true
quiet=true
remove_all_unused_imports=true
remove_unused_variables=true