mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-09 01:55:55 +00:00

Part two of: https://github.com/Unstructured-IO/unstructured/pull/2842 Main changes compared to part one: * hash computation includes element's sequence number on page, page number, document filename and its text * there are more test for deterministic behavior of IDs returned by partitioning functions + their uniqueness (guaranteed at the document level, and high probability across multiple documents) This PR addresses the following issue: https://github.com/Unstructured-IO/unstructured/issues/2461
242 lines
7.5 KiB
JSON
242 lines
7.5 KiB
JSON
[
|
|
{
|
|
"element_id": "86b863df493bfe1a395588309b1ab645",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueID_IssueKey:10013 JCTP1-4",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "f6259739e42defb34d8473ee8b3401f6",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ProjectID_Key:JCTP1 Jira Connector Test Project 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "91aa3ff14a38d4bb194ab2d9387535ad",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "IssueType:Subtask",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "8664d519c8ace2f9573d0c6e717c23cc",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Status:To Do",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "4d3d6d44e4557c0134fd2cd3e8983034",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "c3bb975235167773d7c38a0d984a73b0",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "AssigneeID_Name:{} {}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "a3041f5689bd5679c836bcb9664aa175",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "7fe2e4d345ccec1f879a55f7026e6145",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Labels:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "fe96efda353a99b334b0ceb6e1589ce9",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Components:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5d25089f1fb81076d611c094dc149f7b",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "Unstructured Devops This is a Child Issue",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "9b5f42a9303d62ca994cb12f6c59c438",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "ChildIssue1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "5b31a14b000fc0d60e306836fc3e1fc3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-24T12:04:35.328000+00:00",
|
|
"date_modified": "2023-08-24T12:04:47.543000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-4"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-4"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"eng"
|
|
]
|
|
},
|
|
"text": "{}",
|
|
"type": "UncategorizedText"
|
|
}
|
|
] |