mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2026-01-07 12:50:54 +00:00
92 lines
2.2 KiB
JSON
92 lines
2.2 KiB
JSON
[
|
||
{
|
||
"type": "Title",
|
||
"element_id": "0540311f6c077fe8f797080918b8d74b",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 1
|
||
},
|
||
"text": "My First Heading"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "399af454cb1368b8257ed406b430de84",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 1
|
||
},
|
||
"text": "My first paragraph."
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "b4cf0d13edfa976816649971bd640a66",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 1
|
||
},
|
||
"text": "Some CP1252-specific characters:"
|
||
},
|
||
{
|
||
"type": "UncategorizedText",
|
||
"element_id": "ada7c3084f437d31d297f85da3941a55",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 2
|
||
},
|
||
"text": "¡\t¢\t£\t¤\t¥\t¦\t§\t¨\t©\tª\t«\t¬\tSHY\t®\t¯"
|
||
},
|
||
{
|
||
"type": "UncategorizedText",
|
||
"element_id": "dda5e8c4d245c1954ecb64e5dfea598d",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 3
|
||
},
|
||
"text": "°\t±\t²\t³\t´\tµ\t¶\t·\t¸\t¹\tº\t»\t¼\t½\t¾\t¿"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "85df09b375e5813aefa3b5f30c8ddff8",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 4
|
||
},
|
||
"text": "À\tÁ\tÂ\tÃ\tÄ\tÅ\tÆ\tÇ\tÈ\tÉ\tÊ\tË\tÌ\tÍ\tÎ\tÏ"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "2726d2569cd7a6cecb79a6e46bb0b2b3",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 5
|
||
},
|
||
"text": "Ð\tÑ\tÒ\tÓ\tÔ\tÕ\tÖ\t×\tØ\tÙ\tÚ\tÛ\tÜ\tÝ\tÞ\tß"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "2b01f3e428520f6e47d8513292688cf6",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 6
|
||
},
|
||
"text": "à\tá\tâ\tã\tä\tå\tæ\tç\tè\té\tê\të\tì\tí\tî\tï"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "5ed256e41bfb169af5f50524b9593a16",
|
||
"metadata": {
|
||
"data_source": {},
|
||
"filetype": "text/html",
|
||
"page_number": 7
|
||
},
|
||
"text": "ð\tñ\tò\tó\tô\tõ\tö\t÷\tø\tù\tú\tû\tü\tý\tþ\tÿ"
|
||
}
|
||
] |