mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-13 08:57:34 +00:00
Part two of: https://github.com/Unstructured-IO/unstructured/pull/2842 Main changes compared to part one: * hash computation includes element's sequence number on page, page number, document filename and its text * there are more test for deterministic behavior of IDs returned by partitioning functions + their uniqueness (guaranteed at the document level, and high probability across multiple documents) This PR addresses the following issue: https://github.com/Unstructured-IO/unstructured/issues/2461
778 lines
23 KiB
JSON
778 lines
23 KiB
JSON
[
|
|
{
|
|
"element_id": "bbf79449efd468940e9d4dec0a1bd616",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "a24a2601a0154646b662032959e18fcb",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "48723ccd2d681ec7065ffe86f60d2eb1",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5681402f3d53987a356c23f900ee843f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "2677a40f217655faa8328d5172002cf6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "63e0681ab46ba97fc9094da6c04bfb03",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "74b84618bb90b661d60eef562242c09f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "192b49818c9f63966481782cee6098ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "3152368b27dd71fd69107ed92b118095",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "379279d638c2311dd960c82ed43892d4",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5e8a2c39eb484d18f982b3a972ac8e63",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "988151a26f5792af834d1ef75972a000",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "efbed9a2ef407ce6f01f2c0b19649c61",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "fbfa660cc333ebf76ea4e428d4f9b2c7",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "c04cb8aec98e23a9d76ee495b1ee0cf3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 1Testdoc2 List Item 1 Nested Item ATestdoc2 List Item 1 Nested Item B",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "ffeca29c21a76f85df30ba3c33934884",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 2",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "51da759c3236af8ba8db78bf3073eb48",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 3",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "2debb77da92a8aed2b77daff6a1426f0",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 4",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "420d0c6f47e22d2f00fd6ca7f204bb8c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 5",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "1f1406229cf15df139a62c1298b92c4b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"link_start_indexes": [
|
|
0
|
|
],
|
|
"link_texts": [
|
|
"This is the link for unstructured . io."
|
|
],
|
|
"link_urls": [
|
|
"https://www.unstructured.io/"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "This is the link for unstructured . io.",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "72fbf216913c586ac1285c07446e55e1",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 1"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Checklist Item 1",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "a095687b9b0c3eb008b40836f06c90a9",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 2 (checked)"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Checklist Item 2 (checked)",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "311e476c48960b926b12a07222ee4b55",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 3"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Checklist Item 3",
|
|
"type": "ListItem"
|
|
},
|
|
{
|
|
"element_id": "707db83e91a01d5406d7690ff64672a6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "00a741394474c8951363eaac7d3c9c9a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 bold text"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"strong"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 bold text",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "f513bfdce18c859eca131221854e8241",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 italic text"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"em"
|
|
],
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 italic text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "ddd01d57e091c1b0c476c5474a761c0f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 1 Sized Text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "cb3c9031e973f35fcc6306ef80514c68",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 2 Sized Text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "0009184908aef19595b65219a242eaea",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 3 Sized Text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5402f9dd0ca7da2db3e2e75fd3bc2c72",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 4 Sized Text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "0a17c30db07459d93002bdfea7fee6aa",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 5 Sized Text",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "24a342664a77bae50ab8b4a58f37d21f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000",
|
|
"record_locator": {
|
|
"page_id": "1802252",
|
|
"url": "https://unstructured-ingest-test.atlassian.net"
|
|
},
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"text_as_html": "<table><tr><td>Testdoc2 Table: Column 1 Row 0</td><td>Testdoc2 Table: Column 2 Row 0</td><td>Testdoc2 Table: Column 3 Row 0</td></tr><tr><td>Testdoc2 Table: Column 1 Row 1</td><td>Testdoc2 Table: Column 2 Row 1</td><td>Testdoc2 Table: Column 3 Row 1</td></tr><tr><td>Testdoc2 Table: Column 1 Row 2</td><td>Testdoc2 Table: Column 2 Row 2</td><td>Testdoc2 Table: Column 3 Row 2</td></tr></table>"
|
|
},
|
|
"text": "Testdoc2 Table: Column 1 Row 0 Testdoc2 Table: Column 2 Row 0 Testdoc2 Table: Column 3 Row 0 Testdoc2 Table: Column 1 Row 1 Testdoc2 Table: Column 2 Row 1 Testdoc2 Table: Column 3 Row 1 Testdoc2 Table: Column 1 Row 2 Testdoc2 Table: Column 2 Row 2 Testdoc2 Table: Column 3 Row 2",
|
|
"type": "Table"
|
|
}
|
|
] |