mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-11-03 19:43:24 +00:00 
			
		
		
		
	There are two distinct overlap operations with completely different implementations. This is "intra-chunk" overlap, applying overlap to chunks resulting from text-splitting an oversized element. So if an oversized element had text "abcd efgh ijkl mnop qrst" and was split at 15 chars with overlap of 5, it would produce "abcd efgh ijkl" and "ijkl mnop qrst". Any inter-chunk overlap from the prior chunk and applied at the beginning of the string (before "abcd") is handled in a separate operation in the next PR.
		
			
				
	
	
		
			31 lines
		
	
	
		
			618 B
		
	
	
	
		
			INI
		
	
	
	
	
	
			
		
		
	
	
			31 lines
		
	
	
		
			618 B
		
	
	
	
		
			INI
		
	
	
	
	
	
[metadata]
 | 
						|
license_files = LICENSE.md
 | 
						|
 | 
						|
[flake8]
 | 
						|
ignore = E203,W503
 | 
						|
max-line-length = 100
 | 
						|
exclude =
 | 
						|
    .venv
 | 
						|
    unstructured-inference
 | 
						|
per-file-ignores =
 | 
						|
    *: T20
 | 
						|
 | 
						|
[tool:pytest]
 | 
						|
filterwarnings =
 | 
						|
    ignore::DeprecationWarning
 | 
						|
python_classes = Test Describe
 | 
						|
python_functions = test_ it_ they_ but_ and_
 | 
						|
markers =
 | 
						|
    chipper: mark a test as running chipper, which tends to be slow and compute-heavy.
 | 
						|
testpaths =
 | 
						|
    test_unstructured
 | 
						|
    test_unstructured_ingest
 | 
						|
 | 
						|
[autoflake]
 | 
						|
expand_star_imports=true
 | 
						|
ignore_pass_statements=false
 | 
						|
recursive=true
 | 
						|
quiet=true
 | 
						|
remove_all_unused_imports=true
 | 
						|
remove_unused_variables=true
 |