mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-08 17:46:54 +00:00

Part two of: https://github.com/Unstructured-IO/unstructured/pull/2842 Main changes compared to part one: * hash computation includes element's sequence number on page, page number, document filename and its text * there are more test for deterministic behavior of IDs returned by partitioning functions + their uniqueness (guaranteed at the document level, and high probability across multiple documents) This PR addresses the following issue: https://github.com/Unstructured-IO/unstructured/issues/2461
310 lines
10 KiB
JSON
310 lines
10 KiB
JSON
[
|
|
{
|
|
"element_id": "8bb27565515a5fc96b49682bd7cff1e2",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "IssueID_IssueKey:10001 JCTP1-2",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "a9bd78b873bae6ac64df530a7cffa847",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "ProjectID_Key:JCTP1 Jira Connector Test Project 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "7e4b9f5cca704edcb437d564006f0672",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "IssueType:Task",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "e0db0f809761aa86049061a174b6c218",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Status:In Progress",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "a2302cb75b3b9ab7e7a6409c1e36f1b3",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "f83f7034bb3a16046b21327ba819ead1",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "AssigneeID_Name:{} {}",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "1fdb66948435c930ceacb65e0e2459bc",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "93c8d0267249689d76efb5a9b394e809",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Labels:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "3a205f821288f860556b1acdcc372a87",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Components:",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "e81b58a64832db53a5488054d11935af",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Unstructured Devops My comment 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "1bd3a8edcb04f3f959278fa5739a8516",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Test in progress 1",
|
|
"type": "Title"
|
|
},
|
|
{
|
|
"element_id": "995db311c130ad79db9deaf9b5eac361",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Cur deinde Metrodori liberos commendas? Quicquid porro animo cernimus, id omne oritur a sensibus; Ergo ita: non posse honeste vivi, nisi honeste vivatur? [Quis Aristidem non mortuum diligit?|http://loripsum.net/] Quid, de quo nulla dissensio est? [Bork|http://loripsum.net/]",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "b012df8eb64d2a25fa58803cd43c0a0a",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Atqui pugnantibus et contrariis studiis consiliisque semper utens nihil quieti videre, nihil tranquilli potest. Habes, inquam, Cato, formam eorum, de quibus loquor, philosophorum. [Ecce aliud simile dissimile.|http://loripsum.net/] Ad quorum et cognitionem et usum iam corroborati natura ipsa praeeunte deducimur. Itaque et manendi in vita et migrandi ratio omnis iis rebus, quas supra dixi, metienda. Quicquid enim a sapientia proficiscitur, id continuo debet expletum esse omnibus suis partibus; At ille non pertimuit saneque fidenter: Istis quidem ipsis verbis, inquit; Utrum igitur tibi litteram videor an totas paginas commovere?",
|
|
"type": "NarrativeText"
|
|
},
|
|
{
|
|
"element_id": "772033b76bcae6b73affe86b9ae514ea",
|
|
"metadata": {
|
|
"data_source": {
|
|
"date_created": "2023-08-22T11:29:43.033000+00:00",
|
|
"date_modified": "2023-08-24T12:03:31.591000+00:00",
|
|
"record_locator": {
|
|
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
|
"issue_key": "JCTP1-2"
|
|
},
|
|
"url": "https://unstructured-jira-connector-test.atlassian.net/browse/JCTP1-2"
|
|
},
|
|
"filetype": "text/plain",
|
|
"languages": [
|
|
"fra",
|
|
"cat",
|
|
"ita"
|
|
]
|
|
},
|
|
"text": "Duo Reges: constructio interrete. Nunc omni virtuti vitium contrario nomine opponitur. Non autem hoc: igitur ne illud quidem. Ergo hoc quidem apparet, nos ad agendum esse natos. Quo tandem modo? Quod si ita se habeat, non possit beatam praestare vitam sapientia. Propter nos enim illam, non propter eam nosmet ipsos diligimus.",
|
|
"type": "NarrativeText"
|
|
}
|
|
] |