mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-25 08:19:22 +00:00
feat: allow extraction of camel cased element type names (#3938)
This PR allows element types with CamelCase names to be extractable using `extract_image_block_types` variable. Before: specify `extract_image_block_types=["NarrativeText"]` (or any casing for `NarrativeText`) would raise a warning that it doesn't match any available types and not image would be extracted for this element type Now: specify `extract_image_block_types=["NarrativeText"]` would extract images for this element type ## testing ```python from unstructured.partition.auto import partition f = "example-docs/pdf/embedded-images-tables.pdf" elements = partition(f, strategy="hi_res", extract_image_block_types=["narrativetext"]) ``` Without this PR no figures would be extracted. With this PR a local folder would be created to contain images of the narrative text elements in path like `./figures/figure-1-1.jpg` --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
This commit is contained in:
parent
2addb19473
commit
43b682ad3f
@ -1,7 +1,9 @@
|
||||
## 0.16.24-dev1
|
||||
## 0.16.24-dev2
|
||||
|
||||
### Enhancements
|
||||
|
||||
- **`extract_image_block_types` now also works for CamelCase elemenet type names**. Previously `NarrativeText` and similar CamelCase element types can't be extracted using the mentioned parameter in `partition`. Now figures for those elements can be extracted like `Image` and `Table` elements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
@ -231,6 +231,7 @@ def test_pad_bbox():
|
||||
(["table", "image"], ["Table", "Image"]),
|
||||
(["unknown"], ["Unknown"]),
|
||||
(["Table", "image", "UnknOwn"], ["Table", "Image", "Unknown"]),
|
||||
(["NarrativeText", "narrativetext"], ["NarrativeText", "NarrativeText"]),
|
||||
],
|
||||
)
|
||||
def test_check_element_types_to_extract(input_types, expected):
|
||||
|
@ -24,6 +24,37 @@
|
||||
"role": "reader",
|
||||
"allowFileDiscovery": false
|
||||
},
|
||||
{
|
||||
"id": "10619079449796831495",
|
||||
"displayName": "fuse-team",
|
||||
"type": "group",
|
||||
"kind": "drive#permission",
|
||||
"emailAddress": "fuse-team@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false
|
||||
},
|
||||
{
|
||||
"id": "03887347926440898356",
|
||||
"displayName": "michal.martyniak",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
|
||||
"emailAddress": "michal.martyniak@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "13662041828528429192",
|
||||
"displayName": "rob",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
|
||||
"emailAddress": "rob@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "18298851591250030956",
|
||||
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
|
||||
|
@ -24,6 +24,37 @@
|
||||
"role": "reader",
|
||||
"allowFileDiscovery": false
|
||||
},
|
||||
{
|
||||
"id": "10619079449796831495",
|
||||
"displayName": "fuse-team",
|
||||
"type": "group",
|
||||
"kind": "drive#permission",
|
||||
"emailAddress": "fuse-team@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false
|
||||
},
|
||||
{
|
||||
"id": "03887347926440898356",
|
||||
"displayName": "michal.martyniak",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
|
||||
"emailAddress": "michal.martyniak@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "13662041828528429192",
|
||||
"displayName": "rob",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
|
||||
"emailAddress": "rob@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "18298851591250030956",
|
||||
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -29,6 +29,37 @@
|
||||
"role": "reader",
|
||||
"allowFileDiscovery": false
|
||||
},
|
||||
{
|
||||
"id": "10619079449796831495",
|
||||
"displayName": "fuse-team",
|
||||
"type": "group",
|
||||
"kind": "drive#permission",
|
||||
"emailAddress": "fuse-team@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false
|
||||
},
|
||||
{
|
||||
"id": "03887347926440898356",
|
||||
"displayName": "michal.martyniak",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
|
||||
"emailAddress": "michal.martyniak@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "13662041828528429192",
|
||||
"displayName": "rob",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
|
||||
"emailAddress": "rob@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "18298851591250030956",
|
||||
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
|
||||
@ -96,6 +127,37 @@
|
||||
"role": "reader",
|
||||
"allowFileDiscovery": false
|
||||
},
|
||||
{
|
||||
"id": "10619079449796831495",
|
||||
"displayName": "fuse-team",
|
||||
"type": "group",
|
||||
"kind": "drive#permission",
|
||||
"emailAddress": "fuse-team@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false
|
||||
},
|
||||
{
|
||||
"id": "03887347926440898356",
|
||||
"displayName": "michal.martyniak",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
|
||||
"emailAddress": "michal.martyniak@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "13662041828528429192",
|
||||
"displayName": "rob",
|
||||
"type": "user",
|
||||
"kind": "drive#permission",
|
||||
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
|
||||
"emailAddress": "rob@unstructured.io",
|
||||
"role": "writer",
|
||||
"deleted": false,
|
||||
"pendingOwner": false
|
||||
},
|
||||
{
|
||||
"id": "18298851591250030956",
|
||||
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
|
||||
|
@ -4,7 +4,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -14,7 +14,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "IssueID_IssueKey:10000 JCTP1-1",
|
||||
@ -25,7 +26,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -35,7 +36,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "ProjectID_Key:JCTP1 Jira Connector Test Project 1",
|
||||
@ -46,7 +48,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -56,18 +58,19 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "IssueType:Task",
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
"element_id": "a1a11be1a987330ca2f6c979a0d40eec",
|
||||
"element_id": "d4f56c4a7b3b451828f77bf4be193b91",
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -77,10 +80,11 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Status:In Progress",
|
||||
"text": "Status:To Do",
|
||||
"type": "Title"
|
||||
},
|
||||
{
|
||||
@ -88,7 +92,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -98,7 +102,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
|
||||
@ -109,7 +114,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -119,7 +124,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "AssigneeID_Name:712020:7bc7fdcb-67e7-435d-b4a2-128aee12820c Unstructured Devops",
|
||||
@ -130,7 +136,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -140,7 +146,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
|
||||
@ -151,7 +158,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -161,7 +168,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Labels:Label1 Label2",
|
||||
@ -172,7 +180,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -182,7 +190,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Components:",
|
||||
@ -193,7 +202,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -203,7 +212,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Unstructured Devops My comment 1 Unstructured Devops My attachment image lorem ipsum:",
|
||||
@ -214,7 +224,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -224,7 +234,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "!image",
|
||||
@ -235,7 +246,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -245,7 +256,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "20230823",
|
||||
@ -256,7 +268,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -266,7 +278,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "143650.png|width=83.33333333333333%!",
|
||||
@ -277,7 +290,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -287,7 +300,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Test todo 1",
|
||||
@ -298,7 +312,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -308,7 +322,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. [Nam quid possumus facere melius?|http://loripsum.net/] Ita relinquet duas, de quibus etiam atque etiam consideret. Quo modo autem philosophus loquitur? Quid est enim aliud esse versutum? His enim rebus detractis negat se reperire in asotorum vita quod reprehendat. Non est ista, inquam, Piso, magna dissensio. Duo Reges: constructio interrete. In eo enim positum est id, quod dicimus esse expetendum. Traditur, inquit, ab Epicuro ratio neglegendi doloris. Negat enim summo bono afferre incrementum diem. Aberat omnis dolor, qui si adesset, nec molliter ferret et tamen medicis plus quam philosophis uteretur.",
|
||||
@ -319,7 +334,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -329,7 +344,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "Sedulo, inquam, faciam. Ergo, si semel tristior effectus est, hilara vita amissa est? Quamquam tu hanc copiosiorem etiam soles dicere. An eum locum libenter invisit, ubi Demosthenes et Aeschines inter se decertare soliti sunt? _Quippe: habes enim a rhetoribus;_ Non minor, inquit, voluptas percipitur ex vilissimis rebus quam ex pretiosissimis. Ut in geometria, prima si dederis, danda sunt omnia. Negat enim summo bono afferre incrementum diem.",
|
||||
@ -340,7 +356,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -350,7 +366,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "[Et nemo nimium beatus est;|http://loripsum.net/] Nam et complectitur verbis, quod vult, et dicit plane, quod intellegam; Ab his oratores, ab his imperatores ac rerum publicarum principes extiterunt. Ergo adhuc, quantum equidem intellego, causa non videtur fuisse mutandi nominis. Quis enim redargueret? Ita fit cum gravior, tum etiam splendidior oratio. Sed ut iis bonis erigimur, quae expectamus, sic laetamur iis, quae recordamur. _Bork_ Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. [Eiuro, inquit adridens, iniquum, hac quidem de re;|http://loripsum.net/] Si quae forte-possumus.",
|
||||
@ -361,7 +378,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -371,7 +388,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "https://unstructured",
|
||||
@ -382,7 +400,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -392,7 +410,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "jira",
|
||||
@ -403,7 +422,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -413,7 +432,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "connector",
|
||||
@ -424,7 +444,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:29:37.774000+00:00",
|
||||
"date_modified": "2023-08-24T12:05:04.690000+00:00",
|
||||
"date_modified": "2025-02-21T13:25:46.017000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP1-1"
|
||||
@ -434,7 +454,8 @@
|
||||
"filetype": "text/plain",
|
||||
"languages": [
|
||||
"cat",
|
||||
"eng"
|
||||
"eng",
|
||||
"fra"
|
||||
]
|
||||
},
|
||||
"text": "test.atlassian.net/rest/api/2/attachment/10000",
|
||||
|
@ -4,7 +4,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -24,7 +24,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -44,7 +44,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -64,7 +64,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -84,7 +84,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -104,7 +104,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -124,7 +124,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -144,7 +144,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -164,7 +164,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -184,7 +184,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
@ -204,7 +204,7 @@
|
||||
"metadata": {
|
||||
"data_source": {
|
||||
"date_created": "2023-08-22T11:35:25.467000+00:00",
|
||||
"date_modified": "2023-08-22T11:35:30.285000+00:00",
|
||||
"date_modified": "2025-02-24T10:44:56.243000+00:00",
|
||||
"record_locator": {
|
||||
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
|
||||
"issue_key": "JCTP2-7"
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.16.24-dev1" # pragma: no cover
|
||||
__version__ = "0.16.24-dev2" # pragma: no cover
|
||||
|
@ -239,11 +239,13 @@ def check_element_types_to_extract(
|
||||
"ex. ['Table', 'Image']",
|
||||
)
|
||||
|
||||
available_element_types = list(ElementType.to_dict().values())
|
||||
available_element_types = {e_type.lower(): e_type for e_type in ElementType.to_dict().values()}
|
||||
normalized_extract_image_block_types = []
|
||||
for el_type in extract_image_block_types:
|
||||
normalized_el_type = el_type.lower().capitalize()
|
||||
if normalized_el_type not in available_element_types:
|
||||
normalized_el_type = available_element_types.get(
|
||||
el_type.lower(), el_type.lower().capitalize()
|
||||
)
|
||||
if normalized_el_type not in available_element_types.values():
|
||||
logger.warning(f"The requested type ({el_type}) doesn't match any available type")
|
||||
normalized_extract_image_block_types.append(normalized_el_type)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user