mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 02:53:31 +00:00
feat: table evaluations for fixed html table generation (#3196)
Update to the evaluation script to handle correct HTML syntax for tables. See https://github.com/Unstructured-IO/unstructured-inference/pull/355 for details. This change: - modifies transforming HTML tables to evaluation internal `cells` format - fixes the indexing of the output (internal format cells) when HTML cells use spans
This commit is contained in:
parent
dadc9c6d0b
commit
29e64eb281
@ -1,4 +1,4 @@
|
||||
## 0.14.6-dev5
|
||||
## 0.14.6-dev6
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
* **table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
||||
* **fsspec connector path/permissions bug** V2 fsspec connectors were failing when defined relative filepaths had leading slash. This strips that slash to guarantee the relative path never has it.
|
||||
* **Dropbox connector internal file path bugs** Dropbox source connector currently raises exceptions when indexing files due to two issues: a path formatting idiosyncrasy of the Dropbox library and a divergence in the definition of the Dropbox libraries fs.info method, expecting a 'url' parameter rather than 'path'.
|
||||
* **update table metric evaluation to handle corrected HTML syntax for tables** This change is connected to the update in [unstructured-inference change](https://github.com/Unstructured-IO/unstructured-inference/pull/355) - fixes transforming HTML table to deckerd and internal cells format.
|
||||
|
||||
## 0.14.5
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ def test_table_eval_processor_simple():
|
||||
{
|
||||
"type": "Table",
|
||||
"metadata": {
|
||||
"text_as_html": """<table><thead><th>r1c1</th><th>r1c2</th></thead>
|
||||
"text_as_html": """<table><thead><tr><th>r1c1</th><th>r1c2</th></tr></thead>
|
||||
<tbody><tr><td>r2c1</td><td>r2c2</td></tr></tbody></table>"""
|
||||
},
|
||||
}
|
||||
@ -201,14 +201,62 @@ def test_table_eval_processor_when_wrong_source_type():
|
||||
@pytest.mark.parametrize(
|
||||
"text_as_html",
|
||||
[
|
||||
"""<table><thead><th>r1c1</th><th>r1c2</th></thead>
|
||||
<tbody><tr><td>r2c1</td><td>r2c2</td></tr><tr><td>r3c1</td>
|
||||
<td>r3c2</td></tr></tbody></table>""",
|
||||
"""<table><tr><th>r1c1</th><th>r1c2</th></tr>
|
||||
<tbody><tr><td>r2c1</td><td>r2c2</td></tr><tr><td>r3c1</td>
|
||||
<td>r3c2</td></tr></tbody></table>""",
|
||||
"""<table><tr><td>r1c1</td><td>r1c2</td></tr><tr><td>r2c1</td>
|
||||
<td>r2c2</td></tr><tr><td>r3c1</td><td>r3c2</td></tr></tbody></table>""",
|
||||
"""
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>r1c1</th>
|
||||
<th>r1c2</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>r2c1</td>
|
||||
<td>r2c2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>r3c1</td>
|
||||
<td>r3c2</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
""",
|
||||
"""
|
||||
<table>
|
||||
<tr>
|
||||
<th>r1c1</th>
|
||||
<th>r1c2</th>
|
||||
</tr>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>r2c1</td>
|
||||
<td>r2c2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>r3c1</td>
|
||||
<td>r3c2</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
""",
|
||||
"""
|
||||
<table>
|
||||
</tbody>
|
||||
<tr>
|
||||
<td>r1c1</td>
|
||||
<td>r1c2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>r2c1</td>
|
||||
<td>r2c2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>r3c1</td>
|
||||
<td>r3c2</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
""",
|
||||
],
|
||||
)
|
||||
def test_table_eval_processor_various_table_html_structures(text_as_html):
|
||||
@ -285,8 +333,21 @@ def test_table_eval_processor_non_str_values_in_table():
|
||||
{
|
||||
"type": "Table",
|
||||
"metadata": {
|
||||
"text_as_html": """<table><thead><th>11</th><th>12</th></thead>
|
||||
<tbody><tr><td>21</td><td>22</td></tr></tbody></table>"""
|
||||
"text_as_html": """
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>11</th>
|
||||
<th>12</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>21</td>
|
||||
<td>22</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>"""
|
||||
},
|
||||
}
|
||||
]
|
||||
@ -341,19 +402,38 @@ def test_table_eval_processor_non_str_values_in_table():
|
||||
assert result.element_col_level_content_acc == 1.0
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="This is expected to fail as table eval metrics does not cover merged cells"
|
||||
)
|
||||
def test_table_eval_processor_merged_cells():
|
||||
prediction = [
|
||||
{
|
||||
"type": "Table",
|
||||
"metadata": {
|
||||
"text_as_html": """
|
||||
<table><thead><th rowspan="2">r1c1</th><th>r1c2</th><th colspan="2">r1c3</th></tr>
|
||||
<tr><th>r2c2</th><th>r2c3</th><th>r2c4</th><</thead>
|
||||
<tbody><tr><td>r3c1</td><td>r3c2</td><td colspan="2" rowspan="2">r3c3</td></tr>
|
||||
<tr><td>r4c1</td><td>r4c2</td></tr></tbody></table>"""
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th rowspan="2">r1c1</th>
|
||||
<th>r1c2</th>
|
||||
<th colspan="2">r1c3</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>r2c2</th>
|
||||
<th>r2c3</th>
|
||||
<th>r2c4</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>r3c1</td>
|
||||
<td>r3c2</td>
|
||||
<td colspan="2" rowspan="2">r3c3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>r4c1</td>
|
||||
<td>r4c2</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
"""
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@ -159,7 +159,7 @@ def test_calculate_edit_distance_with_filename(filename, expected_score, expecte
|
||||
),
|
||||
(
|
||||
"""Sometimes sentences have a dash - like this one!
|
||||
A hyphen connects 2 words with no gap: easy-peasy.""",
|
||||
A hyphen connects 2 words with no gap: easy-peasy.""",
|
||||
{
|
||||
"sometimes": 1,
|
||||
"sentences": 1,
|
||||
@ -222,24 +222,334 @@ def test_calculate_percent_missing_text(output_text, source_text, expected_perce
|
||||
)
|
||||
|
||||
|
||||
def test_cells_extraction_from_prediction_when_simple_example():
|
||||
example_element = {
|
||||
"type": "Table",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><thead><th>Month A.</th></thead><tr><td>22</td></tr></table>",
|
||||
"table_as_cells": [
|
||||
@pytest.mark.parametrize(
|
||||
("table_as_cells", "expected_extraction"),
|
||||
[
|
||||
pytest.param(
|
||||
[
|
||||
{"x": 0, "y": 0, "w": 1, "h": 1, "content": "Month A."},
|
||||
{"x": 0, "y": 1, "w": 1, "h": 1, "content": "22"},
|
||||
],
|
||||
[
|
||||
{"row_index": 0, "col_index": 0, "content": "Month A."},
|
||||
{"row_index": 1, "col_index": 0, "content": "22"},
|
||||
],
|
||||
id="Simple table, 1 head cell, 1 body cell, no spans",
|
||||
),
|
||||
pytest.param(
|
||||
[
|
||||
{"x": 0, "y": 0, "w": 1, "h": 1, "content": "Month A."},
|
||||
{"x": 1, "y": 0, "w": 1, "h": 1, "content": "Month B."},
|
||||
{"x": 2, "y": 0, "w": 1, "h": 1, "content": "Month C."},
|
||||
{"x": 0, "y": 1, "w": 1, "h": 1, "content": "11"},
|
||||
{"x": 1, "y": 1, "w": 1, "h": 1, "content": "12"},
|
||||
{"x": 2, "y": 1, "w": 1, "h": 1, "content": "13"},
|
||||
{"x": 0, "y": 2, "w": 1, "h": 1, "content": "21"},
|
||||
{"x": 1, "y": 2, "w": 1, "h": 1, "content": "22"},
|
||||
{"x": 2, "y": 2, "w": 1, "h": 1, "content": "23"},
|
||||
],
|
||||
[
|
||||
{"row_index": 0, "col_index": 0, "content": "Month A."},
|
||||
{"row_index": 0, "col_index": 1, "content": "Month B."},
|
||||
{"row_index": 0, "col_index": 2, "content": "Month C."},
|
||||
{"row_index": 1, "col_index": 0, "content": "11"},
|
||||
{"row_index": 1, "col_index": 1, "content": "12"},
|
||||
{"row_index": 1, "col_index": 2, "content": "13"},
|
||||
{"row_index": 2, "col_index": 0, "content": "21"},
|
||||
{"row_index": 2, "col_index": 1, "content": "22"},
|
||||
{"row_index": 2, "col_index": 2, "content": "23"},
|
||||
],
|
||||
id="Simple table, 3 head cell, 5 body cell, no spans",
|
||||
),
|
||||
# +----------+---------------------+----------+
|
||||
# | | h1col23 | h1col4 |
|
||||
# | h12col1 |----------+----------+----------|
|
||||
# | | h2col2 | h2col34 |
|
||||
# |----------|----------+----------+----------+
|
||||
# | r3col1 | r3col2 | |
|
||||
# |----------+----------| r34col34 |
|
||||
# | r4col12 | |
|
||||
# +----------+----------+----------+----------+
|
||||
pytest.param(
|
||||
[
|
||||
{
|
||||
"y": 0,
|
||||
"x": 0,
|
||||
"w": 2,
|
||||
"h": 1,
|
||||
"content": "h12col1",
|
||||
},
|
||||
{
|
||||
"y": 0,
|
||||
"x": 1,
|
||||
"w": 1,
|
||||
"h": 2,
|
||||
"content": "h1col23",
|
||||
},
|
||||
{
|
||||
"y": 0,
|
||||
"x": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "h1col4",
|
||||
},
|
||||
{
|
||||
"y": 1,
|
||||
"x": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "h2col2",
|
||||
},
|
||||
{
|
||||
"y": 1,
|
||||
"x": 2,
|
||||
"w": 1,
|
||||
"h": 2,
|
||||
"content": "h2col34",
|
||||
},
|
||||
{
|
||||
"y": 2,
|
||||
"x": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "r3col1",
|
||||
},
|
||||
{
|
||||
"y": 2,
|
||||
"x": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "r3col2",
|
||||
},
|
||||
{
|
||||
"y": 2,
|
||||
"x": 2,
|
||||
"w": 2,
|
||||
"h": 2,
|
||||
"content": "r34col34",
|
||||
},
|
||||
{
|
||||
"y": 3,
|
||||
"x": 0,
|
||||
"w": 1,
|
||||
"h": 2,
|
||||
"content": "r4col12",
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 0,
|
||||
"content": "h12col1",
|
||||
},
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 1,
|
||||
"content": "h1col23",
|
||||
},
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 3,
|
||||
"content": "h1col4",
|
||||
},
|
||||
{
|
||||
"row_index": 1,
|
||||
"col_index": 1,
|
||||
"content": "h2col2",
|
||||
},
|
||||
{
|
||||
"row_index": 1,
|
||||
"col_index": 2,
|
||||
"content": "h2col34",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 0,
|
||||
"content": "r3col1",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 1,
|
||||
"content": "r3col2",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 2,
|
||||
"content": "r34col34",
|
||||
},
|
||||
{
|
||||
"row_index": 3,
|
||||
"col_index": 0,
|
||||
"content": "r4col12",
|
||||
},
|
||||
],
|
||||
id="various spans, with 2 row header",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_cells_table_extraction_from_prediction(table_as_cells, expected_extraction):
|
||||
example_element = {
|
||||
"type": "Table",
|
||||
"metadata": {"table_as_cells": table_as_cells},
|
||||
}
|
||||
assert extract_cells_from_table_as_cells(example_element) == expected_extraction
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("text_as_html", "expected_extraction"),
|
||||
[
|
||||
pytest.param(
|
||||
"""
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Month A.</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>22</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>"
|
||||
""",
|
||||
[
|
||||
{"row_index": 0, "col_index": 0, "content": "Month A."},
|
||||
{"row_index": 1, "col_index": 0, "content": "22"},
|
||||
],
|
||||
id="Simple table, 1 head cell, 1 body cell, no spans",
|
||||
),
|
||||
pytest.param(
|
||||
"""
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Month A.</th>
|
||||
<th>Month B.</th>
|
||||
<th>Month C.</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>11</td>
|
||||
<td>12</td>
|
||||
<td>13</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>21</td>
|
||||
<td>22</td>
|
||||
<td>23</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>"
|
||||
""",
|
||||
[
|
||||
{"row_index": 0, "col_index": 0, "content": "Month A."},
|
||||
{"row_index": 0, "col_index": 1, "content": "Month B."},
|
||||
{"row_index": 0, "col_index": 2, "content": "Month C."},
|
||||
{"row_index": 1, "col_index": 0, "content": "11"},
|
||||
{"row_index": 1, "col_index": 1, "content": "12"},
|
||||
{"row_index": 1, "col_index": 2, "content": "13"},
|
||||
{"row_index": 2, "col_index": 0, "content": "21"},
|
||||
{"row_index": 2, "col_index": 1, "content": "22"},
|
||||
{"row_index": 2, "col_index": 2, "content": "23"},
|
||||
],
|
||||
id="Simple table, 3 head cell, 5 body cell, no spans",
|
||||
),
|
||||
# +----------+---------------------+----------+
|
||||
# | | h1col23 | h1col4 |
|
||||
# | h12col1 |----------+----------+----------|
|
||||
# | | h2col2 | h2col34 |
|
||||
# |----------|----------+----------+----------+
|
||||
# | r3col1 | r3col2 | |
|
||||
# |----------+----------| r34col34 |
|
||||
# | r4col12 | |
|
||||
# +----------+----------+----------+----------+
|
||||
pytest.param(
|
||||
"""
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th rowspan="2">h12col1</th>
|
||||
<th colspan="2">h1col23</th>
|
||||
<th>h1col4</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>h2col2</th>
|
||||
<th colspan="2">h2col34</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>r3col1</td>
|
||||
<td>r3col2</td>
|
||||
<td colspan="2" rowspan="2">r34col34</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">r4col12</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
""",
|
||||
[
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 0,
|
||||
"content": "h12col1",
|
||||
},
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 1,
|
||||
"content": "h1col23",
|
||||
},
|
||||
{
|
||||
"row_index": 0,
|
||||
"col_index": 3,
|
||||
"content": "h1col4",
|
||||
},
|
||||
{
|
||||
"row_index": 1,
|
||||
"col_index": 1,
|
||||
"content": "h2col2",
|
||||
},
|
||||
{
|
||||
"row_index": 1,
|
||||
"col_index": 2,
|
||||
"content": "h2col34",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 0,
|
||||
"content": "r3col1",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 1,
|
||||
"content": "r3col2",
|
||||
},
|
||||
{
|
||||
"row_index": 2,
|
||||
"col_index": 2,
|
||||
"content": "r34col34",
|
||||
},
|
||||
{
|
||||
"row_index": 3,
|
||||
"col_index": 0,
|
||||
"content": "r4col12",
|
||||
},
|
||||
],
|
||||
id="various spans, with 2 row header",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_html_table_extraction_from_prediction(text_as_html, expected_extraction):
|
||||
example_element = {
|
||||
"type": "Table",
|
||||
"metadata": {
|
||||
"text_as_html": text_as_html,
|
||||
},
|
||||
}
|
||||
expected_extraction = [
|
||||
{"row_index": 0, "col_index": 0, "content": "Month A."},
|
||||
{"row_index": 1, "col_index": 0, "content": "22"},
|
||||
]
|
||||
|
||||
assert extract_cells_from_text_as_html(example_element) == expected_extraction
|
||||
assert extract_cells_from_table_as_cells(example_element) == expected_extraction
|
||||
|
||||
|
||||
def test_cells_extraction_from_prediction_when_missing_prediction():
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.14.6-dev5" # pragma: no cover
|
||||
__version__ = "0.14.6-dev6" # pragma: no cover
|
||||
|
||||
@ -11,8 +11,34 @@ EMPTY_CELL = {
|
||||
}
|
||||
|
||||
|
||||
def _convert_table_from_html(content: str) -> List[Dict[str, Any]]:
|
||||
"""Convert html format to table structure.
|
||||
def _move_cells_for_spanned_cells(cells: List[Dict[str, Any]]):
|
||||
"""Move cells to the right if spanned cells have an influence on the rendering.
|
||||
|
||||
Args:
|
||||
cells: List of cells in the table in Deckerd format.
|
||||
|
||||
Returns:
|
||||
List of cells in the table in Deckerd format with cells moved to the right if spanned.
|
||||
"""
|
||||
sorted_cells = sorted(cells, key=lambda x: (x["y"], x["x"]))
|
||||
cells_occupied_by_spanned = set()
|
||||
for cell in sorted_cells:
|
||||
if cell["w"] > 1 or cell["h"] > 1:
|
||||
for i in range(cell["y"], cell["y"] + cell["h"]):
|
||||
for j in range(cell["x"], cell["x"] + cell["w"]):
|
||||
if (i, j) != (cell["y"], cell["x"]):
|
||||
cells_occupied_by_spanned.add((i, j))
|
||||
while (cell["y"], cell["x"]) in cells_occupied_by_spanned:
|
||||
cell_y, cell_x = cell["y"], cell["x"]
|
||||
cells_to_the_right = [c for c in sorted_cells if c["y"] == cell_y and c["x"] >= cell_x]
|
||||
for cell_to_move in cells_to_the_right:
|
||||
cell_to_move["x"] += 1
|
||||
cells_occupied_by_spanned.remove((cell_y, cell_x))
|
||||
return sorted_cells
|
||||
|
||||
|
||||
def _html_table_to_deckerd(content: str) -> List[Dict[str, Any]]:
|
||||
"""Convert html format to Deckerd table structure.
|
||||
|
||||
Args:
|
||||
content: The html content with a table to extract.
|
||||
@ -20,33 +46,38 @@ def _convert_table_from_html(content: str) -> List[Dict[str, Any]]:
|
||||
Returns:
|
||||
A list of dictionaries where each dictionary represents a cell in the table.
|
||||
"""
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
table = soup.find("table")
|
||||
rows = table.findAll(["tr", "thead"])
|
||||
rows = table.findAll(["tr"])
|
||||
table_data = []
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
headers = row.findAll("th")
|
||||
data_row = row.findAll("td")
|
||||
cells = row.findAll(["th", "td"])
|
||||
for j, cell_data in enumerate(cells):
|
||||
cell = {
|
||||
"y": i,
|
||||
"x": j,
|
||||
"w": int(cell_data.attrs.get("colspan", 1)),
|
||||
"h": int(cell_data.attrs.get("rowspan", 1)),
|
||||
"content": cell_data.text,
|
||||
}
|
||||
table_data.append(cell)
|
||||
return _move_cells_for_spanned_cells(table_data)
|
||||
|
||||
if headers:
|
||||
for j, header in enumerate(headers):
|
||||
cell = {
|
||||
"row_index": i,
|
||||
"col_index": j,
|
||||
"content": header.text,
|
||||
}
|
||||
table_data.append(cell)
|
||||
|
||||
if data_row:
|
||||
for k, data in enumerate(data_row):
|
||||
cell = {
|
||||
"row_index": i,
|
||||
"col_index": k,
|
||||
"content": data.text,
|
||||
}
|
||||
table_data.append(cell)
|
||||
return table_data
|
||||
def _convert_table_from_html(content: str) -> List[Dict[str, Any]]:
|
||||
"""Convert html format to table structure. As a middle step it converts
|
||||
html to the Deckerd format as it's more convenient to work with.
|
||||
|
||||
Args:
|
||||
content: The html content with a table to extract.
|
||||
|
||||
Returns:
|
||||
A list of dictionaries where each dictionary represents a cell in the table.
|
||||
"""
|
||||
deckerd_cells = _html_table_to_deckerd(content)
|
||||
return _convert_table_from_deckerd(deckerd_cells)
|
||||
|
||||
|
||||
def _convert_table_from_deckerd(content: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
@ -151,11 +182,15 @@ def extract_cells_from_text_as_html(element: Dict[str, Any]) -> List[Dict[str, A
|
||||
"metadata": {
|
||||
"text_as_html": "<table>
|
||||
<thead>
|
||||
<th>Month A.</th>
|
||||
<tr>
|
||||
<th>Month A.</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tr>
|
||||
<td>22</td><
|
||||
/tr>
|
||||
</tbody>
|
||||
<tr>
|
||||
<td>22</td><
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>"
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user