mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-13 03:55:55 +00:00

### Description Replacing PR [1383](https://github.com/Unstructured-IO/unstructured/pull/1383) --------- Co-authored-by: Trevor Bossert <alanboss@gmail.com> Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com>
94 lines
2.2 KiB
JSON
94 lines
2.2 KiB
JSON
[
|
||
{
|
||
"type": "Title",
|
||
"element_id": "0540311f6c077fe8f797080918b8d74b",
|
||
"metadata": {
|
||
"data_source": {
|
||
"url": "example-docs/fake-html-cp1252.html",
|
||
"permissions_data": [
|
||
{
|
||
"mode": 33188
|
||
}
|
||
]
|
||
},
|
||
"filetype": "text/html",
|
||
"languages": [
|
||
"por",
|
||
"cat",
|
||
"eng",
|
||
"vie"
|
||
],
|
||
"page_number": 1
|
||
},
|
||
"text": "My First Heading"
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "399af454cb1368b8257ed406b430de84",
|
||
"metadata": {
|
||
"data_source": {
|
||
"url": "example-docs/fake-html-cp1252.html",
|
||
"permissions_data": [
|
||
{
|
||
"mode": 33188
|
||
}
|
||
]
|
||
},
|
||
"filetype": "text/html",
|
||
"languages": [
|
||
"por",
|
||
"cat",
|
||
"eng",
|
||
"vie"
|
||
],
|
||
"page_number": 1
|
||
},
|
||
"text": "My first paragraph."
|
||
},
|
||
{
|
||
"type": "Title",
|
||
"element_id": "b4cf0d13edfa976816649971bd640a66",
|
||
"metadata": {
|
||
"data_source": {
|
||
"url": "example-docs/fake-html-cp1252.html",
|
||
"permissions_data": [
|
||
{
|
||
"mode": 33188
|
||
}
|
||
]
|
||
},
|
||
"filetype": "text/html",
|
||
"languages": [
|
||
"por",
|
||
"cat",
|
||
"eng",
|
||
"vie"
|
||
],
|
||
"page_number": 1
|
||
},
|
||
"text": "Some CP1252-specific characters:"
|
||
},
|
||
{
|
||
"type": "NarrativeText",
|
||
"element_id": "24dc2ef4b79651e2c0434d2724b48fc1",
|
||
"metadata": {
|
||
"data_source": {
|
||
"url": "example-docs/fake-html-cp1252.html",
|
||
"permissions_data": [
|
||
{
|
||
"mode": 33188
|
||
}
|
||
]
|
||
},
|
||
"filetype": "text/html",
|
||
"languages": [
|
||
"por",
|
||
"cat",
|
||
"eng",
|
||
"vie"
|
||
],
|
||
"page_number": 1
|
||
},
|
||
"text": "¡\t¢\t£\t¤\t¥\t¦\t§\t¨\t©\tª\t«\t¬\tSHY\t®\t¯\n°\t±\t²\t³\t´\tµ\t¶\t·\t¸\t¹\tº\t»\t¼\t½\t¾\t¿\nÀ\tÁ\tÂ\tÃ\tÄ\tÅ\tÆ\tÇ\tÈ\tÉ\tÊ\tË\tÌ\tÍ\tÎ\tÏ\nÐ\tÑ\tÒ\tÓ\tÔ\tÕ\tÖ\t×\tØ\tÙ\tÚ\tÛ\tÜ\tÝ\tÞ\tß\nà\tá\tâ\tã\tä\tå\tæ\tç\tè\té\tê\të\tì\tí\tî\tï\nð\tñ\tò\tó\tô\tõ\tö\t÷\tø\tù\tú\tû\tü\tý\tþ\tÿ"
|
||
}
|
||
] |