mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-19 15:06:21 +00:00

Part two of: https://github.com/Unstructured-IO/unstructured/pull/2842 Main changes compared to part one: * hash computation includes element's sequence number on page, page number, document filename and its text * there are more test for deterministic behavior of IDs returned by partitioning functions + their uniqueness (guaranteed at the document level, and high probability across multiple documents) This PR addresses the following issue: https://github.com/Unstructured-IO/unstructured/issues/2461
222 lines
6.9 KiB
JSON
222 lines
6.9 KiB
JSON
[
|
|
{
|
|
"element_id": "7bfcdc3e216dc1d2e573b4efab455d18",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueID_IssueKey:10014 JCTP3-1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "9d9103d0314b4c9bb51a097653ae1d07",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ProjectID_Key:JCTP3 Jira Connector Test Project 3 - Company Managed Project",
|
|
"type": "UncategorizedText"
|
|
},
|
|
{
|
|
"element_id": "29e3e8926b9e151df8fc78973d94b0ba",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueType:Epic",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "15f297e77c283726bb77539728da8614",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Status:Backlog",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "df3875010dfdb0ea72f1af7c5c14650c",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "53c212837b2e9aa5b69b4e4c94b54889",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "AssigneeID_Name:712020:7bc7fdcb-67e7-435d-b4a2-128aee12820c Unstructured Devops",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "a18ff655d0a8ba1b38022a949ed77f8b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "996a97c2df19e101c0aff95f12028fc5",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Labels:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "33f1a531773e625dcb27e464cdc976a7",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Components:Component 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "7f2076f66f218e7fbd8120f84075d9cf",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Epic Summary 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "3c0409b9ad95c938a902f0dd8e070952",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T13:38:35.057000+00:00",
|
|
"date_modified": "2023-08-24T13:39:02.055000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP3-1"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP3-1"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "{}",
|
|
"type": "UncategorizedText"
|
|
}
|
|
] |