mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-18 22:46:44 +00:00

**Summary** Pagination of HTML documents is currently unused. The `Page` class and concept were deeply embedding in the legacy organization of HTML partitioning code due to the legacy `Document` (= pages of elements) domain model. Remove this concept from the code such that elements are available directly from the partitioner. **Additional Context** - Pagination can be re-added later if we decide we want it again. A re-implementation would be much simpler and much lower impact to the structure of the code and introduce much less additional complexity, similar to the approach we take in `partition_docx()`.
31 lines
623 B
INI
31 lines
623 B
INI
[metadata]
|
|
license_files = LICENSE.md
|
|
|
|
[flake8]
|
|
ignore = E203,E704,W503
|
|
max-line-length = 100
|
|
exclude =
|
|
.venv
|
|
unstructured-inference
|
|
per-file-ignores =
|
|
*: T20
|
|
|
|
[tool:pytest]
|
|
filterwarnings =
|
|
ignore::DeprecationWarning
|
|
python_classes = Test Describe
|
|
python_functions = test_ it_ they_ but_ and_
|
|
markers =
|
|
chipper: mark a test as running chipper, which tends to be slow and compute-heavy.
|
|
testpaths =
|
|
test_unstructured
|
|
test_unstructured_ingest
|
|
|
|
[autoflake]
|
|
expand_star_imports=true
|
|
ignore_pass_statements=false
|
|
recursive=true
|
|
quiet=true
|
|
remove_all_unused_imports=true
|
|
remove_unused_variables=true
|