mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-12 11:35:53 +00:00

Part two of: https://github.com/Unstructured-IO/unstructured/pull/2842 Main changes compared to part one: * hash computation includes element's sequence number on page, page number, document filename and its text * there are more test for deterministic behavior of IDs returned by partitioning functions + their uniqueness (guaranteed at the document level, and high probability across multiple documents) This PR addresses the following issue: https://github.com/Unstructured-IO/unstructured/issues/2461
222 lines
6.8 KiB
JSON
222 lines
6.8 KiB
JSON
[
|
|
{
|
|
"element_id": "a7ec2ee1af7f114ff9e2a00b66b5ef55",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueID_IssueKey:10010 JCTP2-8",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "9989f2fa4c34258ae238f13f1dd877e6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ProjectID_Key:JCTP2 Jira Connector Test Project 2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "96b6d9962c05dc8115558f7dc7384dd1",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueType:Story",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "e1de451a9a2af1fb582036b631f9d830",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Status:To Do",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5ab1f246aabc5fb85e209645f6d09bae",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "8e58807bf939580d4b7888414ea28b4b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "AssigneeID_Name:{} {}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "709467dfca0d665e9a81af2bf38db803",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "4619102ac43045d2e715359456ce66d6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Labels:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "37f7917e6e4c3e4dbefe4440dd9e06a3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Components:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5094f5369052df365e73f7d0ff98eac8",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Test Epic 1 Story 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "6cedd793183c3dc9bdfbe4b4b3e8bf63",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:35:48.407000+00:00",
|
|
"date_modified": "2023-09-29T05:55:11.066000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP2-8"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP2-8"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "{}",
|
|
"type": "UncategorizedText"
|
|
}
|
|
] |