mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-19 07:02:38 +00:00

### Description * If the contents of a doc were updated by the process of reading/downloading it, this was not being persisted. To fix this, the data being passed around was updated to use a multiprocessing safe dict rather than the json string. Now that dict is updated after the `get_file` method is called. * Wikipedia connector was updated to use a static filename rather than one requiring a call to fetch data. * The read config param `re_download` was not being leveraged by the source node, this was fixed. * Added fix: chunking and embedding order reversed so chunking runs before embeddings --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com>
775 lines
23 KiB
JSON
775 lines
23 KiB
JSON
[
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "e7f29a76338c0f4d24e30d18e6f336ee",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "83cd16582f7c6143822c0954f7f00350",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 1Testdoc2 List Item 1 Nested Item ATestdoc2 List Item 1 Nested Item B"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "4e97e8a96031986042c3bec526dd273f",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 2"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "629e4dee8b4acc5b39782b4d012ab83c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 3"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "d177691364d515d6eaa385205a64664c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 4"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "f7a3306959cba883aca00f29ca138b6a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 List Item 5"
|
|
},
|
|
{
|
|
"type": "NarrativeText",
|
|
"element_id": "3d15d9222ffb2770ceede5b6532e842a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"link_urls": [
|
|
"https://www.unstructured.io/"
|
|
],
|
|
"link_texts": [
|
|
"This is the link for unstructured . io."
|
|
]
|
|
},
|
|
"text": "This is the link for unstructured . io."
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "fcb6283714c2b7640835964291e59ebd",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 1"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
]
|
|
},
|
|
"text": "Testdoc2 Checklist Item 1"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "899f082b917d5e6e380f15705db5923a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 2 (checked)"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
]
|
|
},
|
|
"text": "Testdoc2 Checklist Item 2 (checked)"
|
|
},
|
|
{
|
|
"type": "ListItem",
|
|
"element_id": "07005b9f1774ceabf978b2cc8afb7183",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 Checklist Item 3"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"span"
|
|
]
|
|
},
|
|
"text": "Testdoc2 Checklist Item 3"
|
|
},
|
|
{
|
|
"type": "UncategorizedText",
|
|
"element_id": "86e0e9ce51725074a3dc41ed3f10c13c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃 😃"
|
|
},
|
|
{
|
|
"type": "NarrativeText",
|
|
"element_id": "72fa9ecafbf0df76a3b307485ce4c98b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 bold text"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"strong"
|
|
]
|
|
},
|
|
"text": "Testdoc2 bold text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "552ed5322965c5e1e8b235b0373f2470",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"emphasized_text_contents": [
|
|
"Testdoc2 italic text"
|
|
],
|
|
"emphasized_text_tags": [
|
|
"em"
|
|
]
|
|
},
|
|
"text": "Testdoc2 italic text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "6e11265369ab068c620fc2a2b7b858f5",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 1 Sized Text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "4da0847af03e9440530539d6da679e85",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 2 Sized Text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "2003a49a54c45aae0cd7640f47ea3f41",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 3 Sized Text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "ab09ddfa08278cd006e9ea2201c8989d",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 4 Sized Text"
|
|
},
|
|
{
|
|
"type": "Title",
|
|
"element_id": "4ad754b028bec050fbff5ce2f50c60d1",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1
|
|
},
|
|
"text": "Testdoc2 Heading 5 Sized Text"
|
|
},
|
|
{
|
|
"type": "Table",
|
|
"element_id": "a164cd72991a3856b7bbc6d52d8b04bf",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net/wiki/rest/api/content/1802252",
|
|
"version": "1",
|
|
"record_locator": {
|
|
"url": "https://unstructured-ingest-test.atlassian.net",
|
|
"page_id": "1802252"
|
|
},
|
|
"date_created": "2023-07-11T17:01:39.240000",
|
|
"date_modified": "2023-07-11T17:01:47.340000"
|
|
},
|
|
"filetype": "text/html",
|
|
"languages": [
|
|
"eng",
|
|
"fra"
|
|
],
|
|
"page_number": 1,
|
|
"text_as_html": "<table><br><tbody><br><tr><td>Testdoc2 Table: Column 1 Row 0</td><td>Testdoc2 Table: Column 2 Row 0</td><td>Testdoc2 Table: Column 3 Row 0</td></tr><br><tr><td>Testdoc2 Table: Column 1 Row 1</td><td>Testdoc2 Table: Column 2 Row 1</td><td>Testdoc2 Table: Column 3 Row 1</td></tr><br><tr><td>Testdoc2 Table: Column 1 Row 2</td><td>Testdoc2 Table: Column 2 Row 2</td><td>Testdoc2 Table: Column 3 Row 2</td></tr><br></tbody><br></table>"
|
|
},
|
|
"text": "Testdoc2 Table: Column 1 Row 0 Testdoc2 Table: Column 2 Row 0 Testdoc2 Table: Column 3 Row 0 Testdoc2 Table: Column 1 Row 1 Testdoc2 Table: Column 2 Row 1 Testdoc2 Table: Column 3 Row 1 Testdoc2 Table: Column 1 Row 2 Testdoc2 Table: Column 2 Row 2 Testdoc2 Table: Column 3 Row 2"
|
|
}
|
|
] |