mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-06 07:12:37 +00:00

*Reviewer:* May be quicker to review commit by commit as they are quite distinct and well-groomed to each focus on a single clean-up task. Clean up odds-and-ends in the docx partitioner in preparation for adding nested-tables support in a closely following PR. 1. Remove obsolete TODOs now in GitHub issues, which is probably where they belong in future anyway. 2. Remove local DOCX "workaround" code that has been implemented upstream and is now obsolete. 3. "Clean" the docx tests, introducing strict typing, extracting a fixture or two, and generally tightening things up. 4. Extract docx-local versions of `unstructured.partition.common.convert_ms_office_table_to_text()` which will be the base for adding nested-table support. More information on why this is required in that commit.
37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
from typing import Iterator, Sequence
|
|
|
|
from docx.blkcntnr import BlockItemContainer
|
|
from docx.enum.section import WD_SECTION
|
|
from docx.oxml.section import CT_SectPr
|
|
from docx.table import Table
|
|
from docx.text.paragraph import Paragraph
|
|
|
|
class Section:
|
|
_sectPr: CT_SectPr
|
|
@property
|
|
def different_first_page_header_footer(self) -> bool: ...
|
|
@property
|
|
def even_page_footer(self) -> _Footer: ...
|
|
@property
|
|
def even_page_header(self) -> _Header: ...
|
|
@property
|
|
def first_page_footer(self) -> _Footer: ...
|
|
@property
|
|
def first_page_header(self) -> _Header: ...
|
|
@property
|
|
def footer(self) -> _Footer: ...
|
|
@property
|
|
def header(self) -> _Header: ...
|
|
def iter_inner_content(self) -> Iterator[Paragraph | Table]: ...
|
|
@property
|
|
def start_type(self) -> WD_SECTION: ...
|
|
|
|
class Sections(Sequence[Section]): ...
|
|
|
|
class _BaseHeaderFooter(BlockItemContainer):
|
|
@property
|
|
def is_linked_to_previous(self) -> bool: ...
|
|
|
|
class _Footer(_BaseHeaderFooter): ...
|
|
class _Header(_BaseHeaderFooter): ...
|