mirror of
https://github.com/docling-project/docling.git
synced 2025-07-03 23:28:14 +00:00
31 lines
946 B
Python
31 lines
946 B
Python
from pathlib import Path
|
|
|
|
from docling.backend.html_backend import HTMLDocumentBackend
|
|
from docling.datamodel.base_models import InputFormat
|
|
from docling.datamodel.document import InputDocument, SectionHeaderItem
|
|
|
|
|
|
def test_heading_levels():
|
|
in_path = Path("tests/data/wiki_duck.html")
|
|
in_doc = InputDocument(
|
|
path_or_stream=in_path,
|
|
format=InputFormat.HTML,
|
|
backend=HTMLDocumentBackend,
|
|
)
|
|
backend = HTMLDocumentBackend(
|
|
in_doc=in_doc,
|
|
path_or_stream=in_path,
|
|
)
|
|
doc = backend.convert()
|
|
|
|
found_lvl_2 = found_lvl_3 = False
|
|
for item, _ in doc.iterate_items():
|
|
if isinstance(item, SectionHeaderItem):
|
|
if item.text == "Etymology":
|
|
found_lvl_2 = True
|
|
assert item.level == 2
|
|
elif item.text == "Feeding":
|
|
found_lvl_3 = True
|
|
assert item.level == 3
|
|
assert found_lvl_2 and found_lvl_3
|