from pathlib import Path from docling.backend.msword_backend import MsWordDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument, SectionHeaderItem def test_heading_levels(): in_path = Path("tests/data/word_sample.docx") in_doc = InputDocument( path_or_stream=in_path, format=InputFormat.DOCX, backend=MsWordDocumentBackend, ) backend = MsWordDocumentBackend( in_doc=in_doc, path_or_stream=in_path, ) doc = backend.convert() found_lvl_1 = found_lvl_2 = False for item, _ in doc.iterate_items(): if isinstance(item, SectionHeaderItem): if item.text == "Let\u2019s swim!": found_lvl_1 = True assert item.level == 1 elif item.text == "Let\u2019s eat": found_lvl_2 = True assert item.level == 2 assert found_lvl_1 and found_lvl_2