mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-27 15:13:35 +00:00
fix: disable table_as_cells output by default (#3093)
This PR changes the output of table elements: now by default the table
elements' `metadata.table_as_cells` is `None`. The data will only be
populated when the env `EXTRACT_TABLE_AS_CELLS` is set to `true`.
The original design of the `table_as_cells` is for evaluate table
extraction performance. The format itself is not as readable as the
`table_as_html` metadata for human or RAG consumption. Therefore by
default this data is not needed.
Since this output is meant for evaluation use this PR choose to use an
environment variable to control if it should be present in the
partitioned results. This approach avoids adding parameters to the
`partition` function call. Adding a new parameter to the `partition`
interface increases the complexity of the interface and adds more
maintenance cost since there is a long chain of function calls to pass
down this parameter to where it is needed.
## test
running the following code snippet on main vs. this PR
```python
from unstructured.partition.auto import partition
elements = partition("example-docs/layout-parser-paper-with-table.pdf", strategy="hi_res", skip_infer_table_types=[])
table_cells = [element.metadata.table_as_cells, None) for element in elements if element.category == "Table"]
```
on main branch `table_cells` contains cell structured data but on this
branch it is a list of `None`
However if we first set in terminal:
```bash
export EXTRACT_TABLE_AS_CELLS=true
```
then run the same code again with this PR the `table_cells` would
contain actual data, the same as on main branch.
---------
Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: badGarnet <badGarnet@users.noreply.github.com>
This commit is contained in:
parent
809c7e515a
commit
32df4ee1c6
@ -9,11 +9,13 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Add backward compatibility for the deprecated pdf_infer_table_structure parameter**.
|
||||
* **Add the missing `form_extraction_skip_tables` argument to the `partition_pdf_or_image` call**.
|
||||
* **Turn off XML resolve entities** Sets `resolve_entities=False` for XML parsing with `lxml`
|
||||
to avoid text being dynamically injected into the XML document.
|
||||
* **Add backward compatibility for the deprecated pdf_infer_table_structure parameter**.
|
||||
* **Add the missing `form_extraction_skip_tables` argument to the `partition_pdf_or_image` call**.
|
||||
to avoid text being dynamically injected into the XML document.
|
||||
* **Chromadb change from Add to Upsert using element_id to make idempotent**
|
||||
* **Diable `table_as_cells` output by default** to reduce overhead in partition; now `table_as_cells` is only produced when the env `EXTACT_TABLE_AS_CELLS` is `true`
|
||||
* **Reduce excessive logging** Change per page ocr info level logging into detail level trace logging
|
||||
* **Replace try block in `document_to_element_list` for handling HTMLDocument** Use `getattr(element, "type", "")` to get the `type` attribute of an element when it exists. This is more explicit way to handle the special case for HTML documents and prevents other types of attribute error from being silenced by the try block
|
||||
|
||||
|
||||
@ -49,176 +49,6 @@
|
||||
"text": "Dataset | Base Model\" Large Model | Notes PubLayNet [38] P/M M Layouts of modern scientific documents PRImA [3) M - Layouts of scanned modern magazines and scientific reports Newspaper [17] P - Layouts of scanned US newspapers from the 20th century \u2018TableBank (18) P P Table region on modern scientific and business document HJDataset (31) | F/M - Layouts of history Japanese documents",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><thead><th>Dataset</th><th>| Base Model!|</th><th>Large Model</th><th>| Notes</th></thead><tr><td>PubLayNet [33]</td><td>P/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr><tr><td>PRImA [3]</td><td>M</td><td></td><td>Layouts of scanned modern magazines and scientific reports</td></tr><tr><td>Newspaper [17]</td><td>P</td><td></td><td>Layouts of scanned US newspapers from the 20th century</td></tr><tr><td>TableBank [18]</td><td>P</td><td></td><td>Table region on modern scientific and business document</td></tr><tr><td>HIDataset [31]</td><td>P/M</td><td></td><td>Layouts of history Japanese documents</td></tr></table>",
|
||||
"table_as_cells": [
|
||||
{
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Dataset"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "PubLayNet [33]"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "PRImA [3]"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Newspaper [17]"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "TableBank [18]"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "HIDataset [31]"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "| Base Model!|"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "P/M"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "M"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "P"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "P"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "P/M"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Large Model"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "M"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "| Notes"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of modern scientific documents"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of scanned modern magazines and scientific reports"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of scanned US newspapers from the 20th century"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Table region on modern scientific and business document"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of history Japanese documents"
|
||||
}
|
||||
],
|
||||
"filetype": "image/jpeg",
|
||||
"languages": [
|
||||
"eng"
|
||||
|
||||
@ -841,134 +841,6 @@
|
||||
"text": "Dataset Base Model1 Large Model Notes PubLayNet [38] PRImA [3] Newspaper [17] TableBank [18] HJDataset [31] F / M M F F F / M M - - F - Layouts of modern scienti\ufb01c documents Layouts of scanned modern magazines and scienti\ufb01c reports Layouts of scanned US newspapers from the 20th century Table region on modern scienti\ufb01c and business document Layouts of history Japanese documents",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><thead><th>Dataset</th><th>| Base Model'|</th><th>| Notes</th></thead><tr><td>PubLayNet B8]|</td><td>F/M</td><td>Layouts of modern scientific documents</td></tr><tr><td>PRImA</td><td>M</td><td>Layouts of scanned modern magazines and scientific report</td></tr><tr><td>Newspaper</td><td>F</td><td>Layouts of scanned US newspapers from the 20th century</td></tr><tr><td>TableBank</td><td>F</td><td>Table region on modern scientific and business document</td></tr><tr><td>HJDataset</td><td>F/M</td><td>Layouts of history Japanese documents</td></tr></table>",
|
||||
"table_as_cells": [
|
||||
{
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Dataset"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "PubLayNet B8]|"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "PRImA"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Newspaper"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "TableBank"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "HJDataset"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "| Base Model'|"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "F/M"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "M"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "F"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "F"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "F/M"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "| Notes"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of modern scientific documents"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of scanned modern magazines and scientific report"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of scanned US newspapers from the 20th century"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Table region on modern scientific and business document"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Layouts of history Japanese documents"
|
||||
}
|
||||
],
|
||||
"filetype": "application/pdf",
|
||||
"languages": [
|
||||
"eng"
|
||||
@ -1520,260 +1392,6 @@
|
||||
"text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio in x and y direction block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is in(block2) Whether block1 is inside of block2 block1.intersect(block2) Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs. block1.union(block2) Return the union region of block1 and block2. Coordinate type to be determined based on the inputs. block1.relative to(block2) Convert the absolute coordinates of block1 to relative coordinates to block2 block1.condition on(block2) Calculate the absolute coordinates of block1 given the canvas block2\u2019s absolute coordinates block.crop image(image) Obtain the image segments in the block region",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><thead><th>block.pad(top, bottom,</th><th>right,</th><th>left)</th><th>Enlarge the current block according to the input</th></thead><tr><td>block.scale(fx, fy)</td><td></td><td></td><td>Scale the current block given the ratio in x and y direction</td></tr><tr><td>block.shift(dx, dy)</td><td></td><td></td><td>Move the current block with the shift distances in x and y direction</td></tr><tr><td>block1.is_in(block2)</td><td></td><td></td><td>Whether block] is inside of block2</td></tr><tr><td>block1. intersect (block2)</td><td></td><td></td><td>Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs</td></tr><tr><td>block1.union(block2)</td><td></td><td></td><td>Return the union region of blockl and block2. Coordinate type to be determined based on the inputs</td></tr><tr><td>block1.relative_to(block2)</td><td></td><td></td><td>Convert the absolute coordinates of block to relative coordinates to block2</td></tr><tr><td>block1.condition_on(block2)</td><td></td><td></td><td>Calculate the absolute coordinates of blockl given the canvas block2\u2019s absolute coordinates</td></tr><tr><td>block. crop_image (image)</td><td></td><td></td><td>Obtain the image segments in the block region</td></tr></table>",
|
||||
"table_as_cells": [
|
||||
{
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block.pad(top, bottom,"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block.scale(fx, fy)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block.shift(dx, dy)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block1.is_in(block2)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block1. intersect (block2)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block1.union(block2)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 6,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block1.relative_to(block2)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 7,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block1.condition_on(block2)"
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 8,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "block. crop_image (image)"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "right,"
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 6,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 7,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 1,
|
||||
"y": 8,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "left)"
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 6,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 7,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 2,
|
||||
"y": 8,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": ""
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 0,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Enlarge the current block according to the input"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 1,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Scale the current block given the ratio in x and y direction"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 2,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Move the current block with the shift distances in x and y direction"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 3,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Whether block] is inside of block2"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 4,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 5,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Return the union region of blockl and block2. Coordinate type to be determined based on the inputs"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 6,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Convert the absolute coordinates of block to relative coordinates to block2"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 7,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Calculate the absolute coordinates of blockl given the canvas block2\u2019s absolute coordinates"
|
||||
},
|
||||
{
|
||||
"x": 3,
|
||||
"y": 8,
|
||||
"w": 1,
|
||||
"h": 1,
|
||||
"content": "Obtain the image segments in the block region"
|
||||
}
|
||||
],
|
||||
"filetype": "application/pdf",
|
||||
"languages": [
|
||||
"eng"
|
||||
|
||||
@ -253,7 +253,7 @@ def supplement_element_with_table_extraction(
|
||||
"""Supplement the existing layout with table extraction. Any Table elements
|
||||
that are extracted will have a metadata fields "text_as_html" where
|
||||
the table's text content is rendered into a html string and "table_as_cells"
|
||||
with the raw table cells output from table agent
|
||||
with the raw table cells output from table agent if env_config.EXTRACT_TABLE_AS_CELLS is True
|
||||
"""
|
||||
from unstructured_inference.models.tables import cells_to_html
|
||||
|
||||
@ -279,13 +279,15 @@ def supplement_element_with_table_extraction(
|
||||
tatr_cells = tables_agent.predict(
|
||||
cropped_image, ocr_tokens=table_tokens, result_format="cells"
|
||||
)
|
||||
text_as_html = cells_to_html(tatr_cells)
|
||||
simple_table_cells = [
|
||||
SimpleTableCell.from_table_transformer_cell(cell).to_dict() for cell in tatr_cells
|
||||
]
|
||||
|
||||
text_as_html = cells_to_html(tatr_cells)
|
||||
element.text_as_html = text_as_html
|
||||
element.table_as_cells = simple_table_cells
|
||||
|
||||
if env_config.EXTRACT_TABLE_AS_CELLS:
|
||||
simple_table_cells = [
|
||||
SimpleTableCell.from_table_transformer_cell(cell).to_dict() for cell in tatr_cells
|
||||
]
|
||||
element.table_as_cells = simple_table_cells
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
@ -116,6 +116,11 @@ class ENVConfig:
|
||||
"""
|
||||
return self._get_int("EXTRACT_IMAGE_BLOCK_CROP_VERTICAL_PAD", 0)
|
||||
|
||||
@property
|
||||
def EXTRACT_TABLE_AS_CELLS(self) -> bool:
|
||||
"""adds `table_as_cells` to a Table element's metadata when it is True"""
|
||||
return self._get_bool("EXTRACT_TABLE_AS_CELLS", False)
|
||||
|
||||
@property
|
||||
def OCR_LAYOUT_SUBREGION_THRESHOLD(self) -> float:
|
||||
"""threshold to determine if an OCR region is a sub-region of a given block
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user