2023-07-24 10:06:13 -07:00
|
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
"type": "Title",
|
|
|
|
|
"element_id": "0540311f6c077fe8f797080918b8d74b",
|
|
|
|
|
"metadata": {
|
2023-10-23 11:51:52 -04:00
|
|
|
|
"data_source": {
|
|
|
|
|
"url": "example-docs/fake-html-cp1252.html",
|
|
|
|
|
"permissions_data": [
|
|
|
|
|
{
|
|
|
|
|
"mode": 33188
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"filetype": "text/html",
|
2023-10-10 20:47:56 -05:00
|
|
|
|
"languages": [
|
2023-10-12 13:31:23 -04:00
|
|
|
|
"por",
|
|
|
|
|
"cat",
|
|
|
|
|
"eng",
|
|
|
|
|
"vie"
|
2023-10-10 20:47:56 -05:00
|
|
|
|
],
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"page_number": 1
|
|
|
|
|
},
|
|
|
|
|
"text": "My First Heading"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"type": "Title",
|
|
|
|
|
"element_id": "399af454cb1368b8257ed406b430de84",
|
|
|
|
|
"metadata": {
|
2023-10-23 11:51:52 -04:00
|
|
|
|
"data_source": {
|
|
|
|
|
"url": "example-docs/fake-html-cp1252.html",
|
|
|
|
|
"permissions_data": [
|
|
|
|
|
{
|
|
|
|
|
"mode": 33188
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"filetype": "text/html",
|
2023-10-10 20:47:56 -05:00
|
|
|
|
"languages": [
|
2023-10-12 13:31:23 -04:00
|
|
|
|
"por",
|
|
|
|
|
"cat",
|
|
|
|
|
"eng",
|
|
|
|
|
"vie"
|
2023-10-10 20:47:56 -05:00
|
|
|
|
],
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"page_number": 1
|
|
|
|
|
},
|
|
|
|
|
"text": "My first paragraph."
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"type": "Title",
|
|
|
|
|
"element_id": "b4cf0d13edfa976816649971bd640a66",
|
|
|
|
|
"metadata": {
|
2023-10-23 11:51:52 -04:00
|
|
|
|
"data_source": {
|
|
|
|
|
"url": "example-docs/fake-html-cp1252.html",
|
|
|
|
|
"permissions_data": [
|
|
|
|
|
{
|
|
|
|
|
"mode": 33188
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"filetype": "text/html",
|
2023-10-10 20:47:56 -05:00
|
|
|
|
"languages": [
|
2023-10-12 13:31:23 -04:00
|
|
|
|
"por",
|
|
|
|
|
"cat",
|
|
|
|
|
"eng",
|
|
|
|
|
"vie"
|
2023-10-10 20:47:56 -05:00
|
|
|
|
],
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"page_number": 1
|
|
|
|
|
},
|
|
|
|
|
"text": "Some CP1252-specific characters:"
|
|
|
|
|
},
|
|
|
|
|
{
|
2023-08-25 00:14:48 -04:00
|
|
|
|
"type": "NarrativeText",
|
|
|
|
|
"element_id": "24dc2ef4b79651e2c0434d2724b48fc1",
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"metadata": {
|
2023-10-23 11:51:52 -04:00
|
|
|
|
"data_source": {
|
|
|
|
|
"url": "example-docs/fake-html-cp1252.html",
|
|
|
|
|
"permissions_data": [
|
|
|
|
|
{
|
|
|
|
|
"mode": 33188
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-07-24 10:06:13 -07:00
|
|
|
|
"filetype": "text/html",
|
2023-10-10 20:47:56 -05:00
|
|
|
|
"languages": [
|
2023-10-12 13:31:23 -04:00
|
|
|
|
"por",
|
|
|
|
|
"cat",
|
|
|
|
|
"eng",
|
|
|
|
|
"vie"
|
2023-10-10 20:47:56 -05:00
|
|
|
|
],
|
2023-08-25 00:14:48 -04:00
|
|
|
|
"page_number": 1
|
2023-07-24 10:06:13 -07:00
|
|
|
|
},
|
2023-08-25 00:14:48 -04:00
|
|
|
|
"text": "¡\t¢\t£\t¤\t¥\t¦\t§\t¨\t©\tª\t«\t¬\tSHY\t®\t¯\n°\t±\t²\t³\t´\tµ\t¶\t·\t¸\t¹\tº\t»\t¼\t½\t¾\t¿\nÀ\tÁ\tÂ\tÃ\tÄ\tÅ\tÆ\tÇ\tÈ\tÉ\tÊ\tË\tÌ\tÍ\tÎ\tÏ\nÐ\tÑ\tÒ\tÓ\tÔ\tÕ\tÖ\t×\tØ\tÙ\tÚ\tÛ\tÜ\tÝ\tÞ\tß\nà\tá\tâ\tã\tä\tå\tæ\tç\tè\té\tê\të\tì\tí\tî\tï\nð\tñ\tò\tó\tô\tõ\tö\t÷\tø\tù\tú\tû\tü\tý\tþ\tÿ"
|
2023-07-24 10:06:13 -07:00
|
|
|
|
}
|
|
|
|
|
]
|