mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00

also removes mimetype when base64 is not included in image metadata --------- Co-authored-by: ryannikolaidis <ryannikolaidis@users.noreply.github.com>
631 lines
16 KiB
JSON
631 lines
16 KiB
JSON
[
|
|
{
|
|
"element_id": "cd153f73463db45ea02bd9ba6ce4168e",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Page with every block",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "098442d39ccc8a9731627be8a843d02a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Notion Tip: Tag pages to let collaborators know what they can expect to use the page for. You can add one or many tags to any page in a wiki.",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "868a2b2294814990d664cf13ffd1e2a7",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Heading 2",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "af888c9a9a14c9c6616cf54ac230c20a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "This is some new text",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "99388232115e119009419bd8b07c93b9",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"formatted"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"b"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"text"
|
|
],
|
|
"link_urls": [
|
|
"/9ba4d6da8a574cfc81ebceac1fde52bd"
|
|
]
|
|
},
|
|
"text": "Some/less → more formatted text with other content and stuff 2023-08-07 : @Roman Isecke",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "91b9abcc226cbe676d827950030c6702",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"text_as_html": "<table><tr><td>column 1</td><td>column 2</td><td>pages</td></tr><tr><td>c1r1 content</td><td>c2r1 table <br/> 2023-08-08T09:00:00.000-04:00<br/> cell</td><td>Page with every block</td></tr><tr><td>c1r2 more content</td><td>c2r2 table cell</td><td>Untitled</td></tr><tr><td>this is some green text</td><td>this is an equation</td><td>Untitled</td></tr><tr><td>text1 text2 Multiline cell</td><td>Another cell</td><td>Untitled</td></tr></table>"
|
|
},
|
|
"text": "column 1 column 2 pages c1r1 content c2r1 table \n 2023-08-08T09:00:00.000-04:00\n cell Page with every block c1r2 more content c2r2 table cell Untitled this is some green text this is an equation Untitled text1 text2 Multiline cell Another cell Untitled",
|
|
"type": "Table"
|
|
},
|
|
{
|
|
"element_id": "0b73b1397f01db39dc98a983bd3aeb3d",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "E = {mc^2}",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "7535c23e3c0bda50ea38df65f7a64bca",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Numbered list",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "155061ede32096c81085eabf421f9fe0",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "A number child",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "1ff4a64dcc74b4cbdf4270776c2adab0",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "A number grandchild",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "9e0342a8c3a010f7802d874fa447f72b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "great",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "240e4a3a9b5843192b03086325da2169",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "super great",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "d1e6a3da60ba834365b2230689c4d8a6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "with test text",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "db78c6b732dc265e380889e394c6354f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Bullet one",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "f31b201c44870108f395a238bff36413",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "A child bullet",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "5929608d0a4d2f055635bbab72df26ec",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "A grandchild bullet",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "1e93d6f8cf7c8af51ddf222be77b4882",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "great",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "c53244024b7b1e86b20bcc1489d9dc4a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "super great",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "3602b0a8a126be064654623590163f49",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Bullet two",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "27d5b17e90250d77a76da1f6d93f8e8b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "I quote myself testings Notion",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "8831856d3670d91d6fa2121af0694022",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"https://www.notion.so/icons/airplane_brown.svg"
|
|
],
|
|
"link_urls": [
|
|
"https://www.notion.so/icons/airplane_brown.svg"
|
|
]
|
|
},
|
|
"text": "https://www.notion.so/icons/airplane_brown.svg I call this out",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "df59e087da5910b2cb1c98801bb24c85",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"https://www.wikipedia.org/"
|
|
],
|
|
"link_urls": [
|
|
"https://www.wikipedia.org/"
|
|
]
|
|
},
|
|
"text": "https://www.wikipedia.org/",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "0f215d56b4a1fc900dc2dad40b7df66f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk"
|
|
],
|
|
"link_urls": [
|
|
"https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk"
|
|
]
|
|
},
|
|
"text": "https://play-lh.googleusercontent.com/KwUBNPbMTk9jDXYS2AeX3illtVRTkrKVh5xR1Mg4WHd0CG2tV4mrh1z3kXi5z_warlk",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "5da75c186c36d3117e60f08d49e66085",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Child Database:",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "a82757a2b9004569ab1761d061847bd3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"Analytics"
|
|
],
|
|
"link_urls": [
|
|
"https://www.notion.so/d1fad658f1cf4eedb0b5ee72b9f0b530"
|
|
]
|
|
},
|
|
"text": "Analytics",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "29a6be22a8770f106f54f4abcdc1de68",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Child Page:",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "d07d54a1ce286a7679952d4e4ce82c8e",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"link_texts": [
|
|
"Untitled"
|
|
],
|
|
"link_urls": [
|
|
"https://www.notion.so/9ba4d6da8a574cfc81ebceac1fde52bd"
|
|
]
|
|
},
|
|
"text": "Untitled",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "d4c02f5b35a00e87ef7be603d82c5df3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "s = \"this is some code\"",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "59aab31c8b60641b906a81db51c596a6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "This is my code caption",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "7fc741d4226b15a910af95ff3fde6253",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "This is some text",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "f67f0aef4f1ceb0fa98491872aa741ac",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "This is text in next column",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "f08a88064f2c33164502652db93fad32",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Final text in column",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "fa3e9d761730605036aaf854d9edd5b4",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Heading 1 content",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "387c4d334f8e9650a56b3b444b2ad5f6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"image_url": "https://media4.giphy.com/media/26FPsOhR3tyQRTc2Y/giphy.gif?cid=7941fdc68sl3vdqajgosqug9hfhg3zq3t5yoflyy9p7y66q0&ep=v1_gifs_trending&rid=giphy.gif&ct=g",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "",
|
|
"type": "Image"
|
|
},
|
|
{
|
|
"element_id": "60d9f47b086264ea72277b741e3b2bdd",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "d3d87fc6-61cc-4bb5-89ed-e9dff0df1526",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "b39f61345657ccc5e201c20a6a90fad7",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Stuff todo",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "b95452fe8c6616a1ce1311457526c302",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "more stuff todo",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "a7c3ee9360b2020e28aa31835ef5283c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "More things to do",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "349f058fcce7e32bb68b620841f40c9e",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-04T18:31:00.000Z",
|
|
"date_modified": "2023-08-17T18:48:00.000Z"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Something to do",
|
|
"type": "NarrativeText"
|
|
}
|
|
] |