# pyright: reportPrivateUsage=false import os import pathlib from typing import Dict, List import pytest from lxml import etree from lxml import html as lxml_html from unstructured.documents import html from unstructured.documents.base import Page from unstructured.documents.elements import ( ListItem, NarrativeText, Table, Text, Title, ) from unstructured.documents.html import ( HEADING_TAGS, LIST_ITEM_TAGS, SECTION_TAGS, TABLE_TAGS, TEXT_TAGS, HTMLAddress, HTMLDocument, HTMLNarrativeText, HTMLTable, HTMLText, HTMLTitle, TagsMixin, _parse_HTMLTable_from_table_elem, ) DIRECTORY = pathlib.Path(__file__).parent.resolve() TAGS = ( ( "