import os from lxml import etree import pytest from unstructured.documents.base import Page from unstructured.documents.elements import Address, ListItem, NarrativeText, Text, Title from unstructured.documents.html import ( LIST_ITEM_TAGS, HTMLDocument, HTMLNarrativeText, HTMLTitle, TEXT_TAGS, TABLE_TAGS, HEADING_TAGS, TagsMixin, ) import unstructured.documents.html as html TAGS = ( "