feat: allow extraction of camel cased element type names (#3938)

This PR allows element types with CamelCase names to be extractable
using `extract_image_block_types` variable.

Before: specify `extract_image_block_types=["NarrativeText"]` (or any
casing for `NarrativeText`) would raise a warning that it doesn't match
any available types and not image would be extracted for this element
type

Now: specify `extract_image_block_types=["NarrativeText"]` would extract
images for this element type

## testing

```python
from unstructured.partition.auto import partition
f = "example-docs/pdf/embedded-images-tables.pdf"
elements = partition(f, strategy="hi_res", extract_image_block_types=["narrativetext"])
```

Without this PR no figures would be extracted. With this PR a local
folder would be created to contain images of the narrative text elements
in path like `./figures/figure-1-1.jpg`

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
This commit is contained in:
Yao You 2025-03-03 19:33:05 -06:00 committed by GitHub
parent 2addb19473
commit 43b682ad3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 3496 additions and 60 deletions

View File

@ -1,7 +1,9 @@
## 0.16.24-dev1
## 0.16.24-dev2
### Enhancements
- **`extract_image_block_types` now also works for CamelCase elemenet type names**. Previously `NarrativeText` and similar CamelCase element types can't be extracted using the mentioned parameter in `partition`. Now figures for those elements can be extracted like `Image` and `Table` elements
### Features
### Fixes

View File

@ -231,6 +231,7 @@ def test_pad_bbox():
(["table", "image"], ["Table", "Image"]),
(["unknown"], ["Unknown"]),
(["Table", "image", "UnknOwn"], ["Table", "Image", "Unknown"]),
(["NarrativeText", "narrativetext"], ["NarrativeText", "NarrativeText"]),
],
)
def test_check_element_types_to_extract(input_types, expected):

View File

@ -24,6 +24,37 @@
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "10619079449796831495",
"displayName": "fuse-team",
"type": "group",
"kind": "drive#permission",
"emailAddress": "fuse-team@unstructured.io",
"role": "writer",
"deleted": false
},
{
"id": "03887347926440898356",
"displayName": "michal.martyniak",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
"emailAddress": "michal.martyniak@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "13662041828528429192",
"displayName": "rob",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
"emailAddress": "rob@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",

View File

@ -24,6 +24,37 @@
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "10619079449796831495",
"displayName": "fuse-team",
"type": "group",
"kind": "drive#permission",
"emailAddress": "fuse-team@unstructured.io",
"role": "writer",
"deleted": false
},
{
"id": "03887347926440898356",
"displayName": "michal.martyniak",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
"emailAddress": "michal.martyniak@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "13662041828528429192",
"displayName": "rob",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
"emailAddress": "rob@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",

View File

@ -29,6 +29,37 @@
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "10619079449796831495",
"displayName": "fuse-team",
"type": "group",
"kind": "drive#permission",
"emailAddress": "fuse-team@unstructured.io",
"role": "writer",
"deleted": false
},
{
"id": "03887347926440898356",
"displayName": "michal.martyniak",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
"emailAddress": "michal.martyniak@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "13662041828528429192",
"displayName": "rob",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
"emailAddress": "rob@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
@ -96,6 +127,37 @@
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "10619079449796831495",
"displayName": "fuse-team",
"type": "group",
"kind": "drive#permission",
"emailAddress": "fuse-team@unstructured.io",
"role": "writer",
"deleted": false
},
{
"id": "03887347926440898356",
"displayName": "michal.martyniak",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWWmJCSvduWur55hl36IxwKs5FJ1FWMoK6KtFlNUHDBU-McvlI=s64",
"emailAddress": "michal.martyniak@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "13662041828528429192",
"displayName": "rob",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjV31Wb5kVmEk66cog1KN-N_twpHHoDttcCQ9pRvE1cz1-FLHQ=s64",
"emailAddress": "rob@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",

View File

@ -4,7 +4,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -14,7 +14,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "IssueID_IssueKey:10000 JCTP1-1",
@ -25,7 +26,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -35,7 +36,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "ProjectID_Key:JCTP1 Jira Connector Test Project 1",
@ -46,7 +48,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -56,18 +58,19 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "IssueType:Task",
"type": "Title"
},
{
"element_id": "a1a11be1a987330ca2f6c979a0d40eec",
"element_id": "d4f56c4a7b3b451828f77bf4be193b91",
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -77,10 +80,11 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Status:In Progress",
"text": "Status:To Do",
"type": "Title"
},
{
@ -88,7 +92,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -98,7 +102,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Priority:{'self': 'https://unstructured-jira-connector-test.atlassian.net/rest/api/2/priority/3', 'iconUrl': 'https://unstructured-jira-connector-test.atlassian.net/images/icons/priorities/medium.svg', 'name': 'Medium', 'id': '3'}",
@ -109,7 +114,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -119,7 +124,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "AssigneeID_Name:712020:7bc7fdcb-67e7-435d-b4a2-128aee12820c Unstructured Devops",
@ -130,7 +136,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -140,7 +146,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "ReporterAdr_Name:devops+jira-connector@unstructured.io Unstructured Devops",
@ -151,7 +158,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -161,7 +168,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Labels:Label1 Label2",
@ -172,7 +180,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -182,7 +190,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Components:",
@ -193,7 +202,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -203,7 +212,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Unstructured Devops My comment 1 Unstructured Devops My attachment image lorem ipsum:",
@ -214,7 +224,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -224,7 +234,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "!image",
@ -235,7 +246,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -245,7 +256,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "20230823",
@ -256,7 +268,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -266,7 +278,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "143650.png|width=83.33333333333333%!",
@ -277,7 +290,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -287,7 +300,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Test todo 1",
@ -298,7 +312,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -308,7 +322,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. [Nam quid possumus facere melius?|http://loripsum.net/] Ita relinquet duas, de quibus etiam atque etiam consideret. Quo modo autem philosophus loquitur? Quid est enim aliud esse versutum? His enim rebus detractis negat se reperire in asotorum vita quod reprehendat. Non est ista, inquam, Piso, magna dissensio. Duo Reges: constructio interrete. In eo enim positum est id, quod dicimus esse expetendum. Traditur, inquit, ab Epicuro ratio neglegendi doloris. Negat enim summo bono afferre incrementum diem. Aberat omnis dolor, qui si adesset, nec molliter ferret et tamen medicis plus quam philosophis uteretur.",
@ -319,7 +334,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -329,7 +344,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "Sedulo, inquam, faciam. Ergo, si semel tristior effectus est, hilara vita amissa est? Quamquam tu hanc copiosiorem etiam soles dicere. An eum locum libenter invisit, ubi Demosthenes et Aeschines inter se decertare soliti sunt? _Quippe: habes enim a rhetoribus;_ Non minor, inquit, voluptas percipitur ex vilissimis rebus quam ex pretiosissimis. Ut in geometria, prima si dederis, danda sunt omnia. Negat enim summo bono afferre incrementum diem.",
@ -340,7 +356,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -350,7 +366,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "[Et nemo nimium beatus est;|http://loripsum.net/] Nam et complectitur verbis, quod vult, et dicit plane, quod intellegam; Ab his oratores, ab his imperatores ac rerum publicarum principes extiterunt. Ergo adhuc, quantum equidem intellego, causa non videtur fuisse mutandi nominis. Quis enim redargueret? Ita fit cum gravior, tum etiam splendidior oratio. Sed ut iis bonis erigimur, quae expectamus, sic laetamur iis, quae recordamur. _Bork_ Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. [Eiuro, inquit adridens, iniquum, hac quidem de re;|http://loripsum.net/] Si quae forte-possumus.",
@ -361,7 +378,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -371,7 +388,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "https://unstructured",
@ -382,7 +400,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -392,7 +410,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "jira",
@ -403,7 +422,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -413,7 +432,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "connector",
@ -424,7 +444,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:29:37.774000+00:00",
"date_modified": "2023-08-24T12:05:04.690000+00:00",
"date_modified": "2025-02-21T13:25:46.017000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP1-1"
@ -434,7 +454,8 @@
"filetype": "text/plain",
"languages": [
"cat",
"eng"
"eng",
"fra"
]
},
"text": "test.atlassian.net/rest/api/2/attachment/10000",

View File

@ -4,7 +4,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -24,7 +24,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -44,7 +44,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -64,7 +64,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -84,7 +84,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -104,7 +104,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -124,7 +124,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -144,7 +144,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -164,7 +164,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -184,7 +184,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"
@ -204,7 +204,7 @@
"metadata": {
"data_source": {
"date_created": "2023-08-22T11:35:25.467000+00:00",
"date_modified": "2023-08-22T11:35:30.285000+00:00",
"date_modified": "2025-02-24T10:44:56.243000+00:00",
"record_locator": {
"base_url": "https://unstructured-jira-connector-test.atlassian.net",
"issue_key": "JCTP2-7"

View File

@ -1 +1 @@
__version__ = "0.16.24-dev1" # pragma: no cover
__version__ = "0.16.24-dev2" # pragma: no cover

View File

@ -239,11 +239,13 @@ def check_element_types_to_extract(
"ex. ['Table', 'Image']",
)
available_element_types = list(ElementType.to_dict().values())
available_element_types = {e_type.lower(): e_type for e_type in ElementType.to_dict().values()}
normalized_extract_image_block_types = []
for el_type in extract_image_block_types:
normalized_el_type = el_type.lower().capitalize()
if normalized_el_type not in available_element_types:
normalized_el_type = available_element_types.get(
el_type.lower(), el_type.lower().capitalize()
)
if normalized_el_type not in available_element_types.values():
logger.warning(f"The requested type ({el_type}) doesn't match any available type")
normalized_extract_image_block_types.append(normalized_el_type)