from unstructured.partition.html import partition_html def test_alternative_image_text_can_be_included(): # language=HTML html = """
ALT TEXT Logo
""" _, image_to_text_alt_mode = partition_html( text=html, image_alt_mode="to_text", html_parser_version="v2", ) assert "ALT TEXT Logo" in image_to_text_alt_mode.text _, image_none_alt_mode = partition_html( text=html, image_alt_mode=None, html_parser_version="v2", ) assert "ALT TEXT Logo" not in image_none_alt_mode.text def test_alternative_image_text_can_be_included_when_nested_in_paragraph(): # language=HTML html = """

ALT TEXT Logo

""" _, paragraph_to_text_alt_mode = partition_html( text=html, image_alt_mode="to_text", html_parser_version="v2", ) assert "ALT TEXT Logo" in paragraph_to_text_alt_mode.text _, paragraph_none_alt_mode = partition_html( text=html, image_alt_mode=None, html_parser_version="v2", ) assert "ALT TEXT Logo" not in paragraph_none_alt_mode.text def test_attr_and_html_inside_table_cell_is_kept(): # language=HTML html = """
Some text
""" page, table = partition_html( text=html, image_alt_mode="to_text", html_parser_version="v2", ) assert '' in table.metadata.text_as_html # class is removed assert 'colspan="2"' in table.metadata.text_as_html