mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-28 19:35:24 +00:00

There are two distinct overlap operations with completely different implementations. This is "intra-chunk" overlap, applying overlap to chunks resulting from text-splitting an oversized element. So if an oversized element had text "abcd efgh ijkl mnop qrst" and was split at 15 chars with overlap of 5, it would produce "abcd efgh ijkl" and "ijkl mnop qrst". Any inter-chunk overlap from the prior chunk and applied at the beginning of the string (before "abcd") is handled in a separate operation in the next PR.
31 lines
618 B
INI
31 lines
618 B
INI
[metadata]
|
|
license_files = LICENSE.md
|
|
|
|
[flake8]
|
|
ignore = E203,W503
|
|
max-line-length = 100
|
|
exclude =
|
|
.venv
|
|
unstructured-inference
|
|
per-file-ignores =
|
|
*: T20
|
|
|
|
[tool:pytest]
|
|
filterwarnings =
|
|
ignore::DeprecationWarning
|
|
python_classes = Test Describe
|
|
python_functions = test_ it_ they_ but_ and_
|
|
markers =
|
|
chipper: mark a test as running chipper, which tends to be slow and compute-heavy.
|
|
testpaths =
|
|
test_unstructured
|
|
test_unstructured_ingest
|
|
|
|
[autoflake]
|
|
expand_star_imports=true
|
|
ignore_pass_statements=false
|
|
recursive=true
|
|
quiet=true
|
|
remove_all_unused_imports=true
|
|
remove_unused_variables=true
|