mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-04 23:03:11 +00:00
122 lines
5.3 KiB
JSON
122 lines
5.3 KiB
JSON
![]() |
[
|
||
|
{
|
||
|
"element_id": "3a6b156a81764e17be128264241f8136",
|
||
|
"metadata": {
|
||
|
"category_depth": 0,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "897a8a47377c4ad6aab839a929879537",
|
||
|
"text_as_html": "<div class=\"Page\" data-page-number=\"1\" id=\"3a6b156a81764e17be128264241f8136\" />"
|
||
|
},
|
||
|
"text": "",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "45b3d0053468484ba1c7b53998115412",
|
||
|
"metadata": {
|
||
|
"category_depth": 1,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "3a6b156a81764e17be128264241f8136",
|
||
|
"text_as_html": "<header class=\"Header\" id=\"45b3d0053468484ba1c7b53998115412\" />"
|
||
|
},
|
||
|
"text": "",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "c95473e8a3704fc2b418697f9fddb27b",
|
||
|
"metadata": {
|
||
|
"category_depth": 2,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "45b3d0053468484ba1c7b53998115412",
|
||
|
"text_as_html": "<h1 class=\"Title\" id=\"c95473e8a3704fc2b418697f9fddb27b\">Header </h1>"
|
||
|
},
|
||
|
"text": "Header",
|
||
|
"type": "Title"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "379cbfdc16d44bd6a59e6cfabe6438d5",
|
||
|
"metadata": {
|
||
|
"category_depth": 2,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "45b3d0053468484ba1c7b53998115412",
|
||
|
"text_as_html": "<time class=\"CalendarDate\" id=\"379cbfdc16d44bd6a59e6cfabe6438d5\">Date: October 30, 2023 </time>"
|
||
|
},
|
||
|
"text": "Date: October 30, 2023",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "637c2f6935fb4353a5f73025ce04619d",
|
||
|
"metadata": {
|
||
|
"category_depth": 1,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "3a6b156a81764e17be128264241f8136",
|
||
|
"text_as_html": "<form class=\"Form\" id=\"637c2f6935fb4353a5f73025ce04619d\"> <label class=\"FormField\" for=\"company-name\" id=\"50027cccbe1948c9853ce0de37b635c2\">From field name </label><input class=\"FormFieldValue\" id=\"0032242af75c4b37984ea7fea9aac74c\" value=\"Example value\" /></form>"
|
||
|
},
|
||
|
"text": "From field name",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "592422373ed741b68a077e2003f8ed81",
|
||
|
"metadata": {
|
||
|
"category_depth": 1,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "3a6b156a81764e17be128264241f8136",
|
||
|
"text_as_html": "<section class=\"Section\" id=\"592422373ed741b68a077e2003f8ed81\" />"
|
||
|
},
|
||
|
"text": "",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "dc3792d4422e444f90876b56d0cfb20d",
|
||
|
"metadata": {
|
||
|
"category_depth": 2,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "592422373ed741b68a077e2003f8ed81",
|
||
|
"text_as_html": "<table class=\"Table\" id=\"dc3792d4422e444f90876b56d0cfb20d\"> <thead class=\"TableHeader\" id=\"50a5548a87e84024af590b3d2830d140\"> <tr class=\"TableRow\" id=\"5e473d7742474412be72dc4e2c45bd4a\"> <th class=\"TableCellHeader\" id=\"01800309aa42411c98ae30f85b23f399\">Description </th><th class=\"TableCellHeader\" id=\"c2765b63d08946a2851955e79e301de4\">Row header </th></tr></thead><tbody class=\"TableBody\" id=\"e0a9a8ffdd7148ad8b4a274b073d340a\"> <tr class=\"TableRow\" id=\"77e829974632455191330b0b8545d1e3\"> <td class=\"TableCell\" id=\"7fee12d4c5554b7da778d6f8fdec8a57\">Value description </td><td class=\"TableCell\" id=\"5a7a33b0c57b4eb881a35bce9f87c831\"> <span class=\"Currency\" id=\"87220f9d62c3482e92e7de72a26869cd\">50 $ </span><span class=\"Measurement\" id=\"0095b9efb90a4cca991e73547c7165f1\">(1.32 %) </span></td></tr></tbody></table>"
|
||
|
},
|
||
|
"text": "Description Row header Value description 50 $ (1.32 %)",
|
||
|
"type": "Table"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "1032242af75c4b37984ea7fea9aac74c",
|
||
|
"metadata": {
|
||
|
"category_depth": 1,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "3a6b156a81764e17be128264241f8136",
|
||
|
"text_as_html": "<section class=\"Section\" id=\"1032242af75c4b37984ea7fea9aac74c\" />"
|
||
|
},
|
||
|
"text": "",
|
||
|
"type": "UncategorizedText"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "2a4e2c4a689f4f9a8c180b6b521e45c3",
|
||
|
"metadata": {
|
||
|
"category_depth": 2,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "1032242af75c4b37984ea7fea9aac74c",
|
||
|
"text_as_html": "<h2 class=\"Subtitle\" id=\"2a4e2c4a689f4f9a8c180b6b521e45c3\">2. Subtitle </h2>"
|
||
|
},
|
||
|
"text": "2. Subtitle",
|
||
|
"type": "Title"
|
||
|
},
|
||
|
{
|
||
|
"element_id": "5591f7a4df01447e82515ce45f686fbe",
|
||
|
"metadata": {
|
||
|
"category_depth": 2,
|
||
|
"filename": "example.pdf",
|
||
|
"page_number": 1,
|
||
|
"parent_id": "1032242af75c4b37984ea7fea9aac74c",
|
||
|
"text_as_html": "<p class=\"NarrativeText\" id=\"5591f7a4df01447e82515ce45f686fbe\">Paragraph text </p>"
|
||
|
},
|
||
|
"text": "Paragraph text",
|
||
|
"type": "NarrativeText"
|
||
|
}
|
||
|
]
|