Roman Isecke 9049e4e2be
feat/remove ingest code, use new dep for tests (#3595)
### Description
Alternative to https://github.com/Unstructured-IO/unstructured/pull/3572
but maintaining all ingest tests, running them by pulling in the latest
version of unstructured-ingest.

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com>
Co-authored-by: Christine Straub <christinemstraub@gmail.com>
Co-authored-by: christinestraub <christinestraub@users.noreply.github.com>
2024-10-15 10:01:34 -05:00

134 lines
4.0 KiB
JSON

[
{
"type": "NarrativeText",
"element_id": "fc01503614e0f12f585427cccf81cf86",
"text": "This is a test document to use for unit tests.",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
},
{
"type": "Address",
"element_id": "d06d10c0722ac08a2488076a48e858d5",
"text": "Doylestown, PA 18901",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
},
{
"type": "Title",
"element_id": "a190164de573571375ecf759a5027a3a",
"text": "Important points:",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
},
{
"type": "ListItem",
"element_id": "64c58eb106608ad05424e47bbcdef7dc",
"text": "Hamburgers are delicious",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
},
{
"type": "ListItem",
"element_id": "9210d7882755a60fc82272a0e93df94f",
"text": "Dogs are the best",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
},
{
"type": "ListItem",
"element_id": "970dd4ac687529e2d2126e0a51cb7c27",
"text": "I love fuzzy blankets",
"metadata": {
"languages": [
"eng"
],
"filetype": "text/plain",
"data_source": {
"url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt",
"version": "CKyIrMaE0/8CEAE=",
"record_locator": {
"protocol": "gs",
"remote_file_path": "gs://utic-test-ingest-fixtures/",
"file_id": "utic-test-ingest-fixtures/nested-1/fake-text.txt/1687304893301804"
},
"date_created": "1687304893.303",
"date_modified": "1687304893.303"
}
}
}
]