mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2026-01-06 12:21:30 +00:00
### Summary Closes #1534 and #1535 Detects document language using `langdetect` package. Creates new kwargs for user to set the document language (`languages`) or detect the language at the element level instead of the default document level (`detect_language_per_element`) --------- Co-authored-by: shreyanid <42684285+shreyanid@users.noreply.github.com> Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: Coniferish <Coniferish@users.noreply.github.com> Co-authored-by: cragwolfe <crag@unstructured.io> Co-authored-by: Austin Walker <austin@unstructured.io>
32 lines
1.2 KiB
JSON
32 lines
1.2 KiB
JSON
[
|
|
{
|
|
"type": "NarrativeText",
|
|
"element_id": "007ec3bff83ee17497e490b86a36e0dd",
|
|
"metadata": {
|
|
"data_source": {
|
|
"url": "https://outlook.office365.com/owa/?ItemID=AAMkAGE2MmEwNzFlLWVjYzAtNDNhZS04ZGM1LTFjYmMzZDhiMmI0MABGAAAAAADc1MfJYetSQ6QZntYrI9k4BwDZYn%2FlfnvLSqIcW%2FYsN8ebAAATzq5sAADZYn%2FlfnvLSqIcW%2FYsN8ebAAATzrolAAA%3D&exvsurl=1&viewmodel=ReadMessageItem",
|
|
"version": "CQAAABYAAADZYn/lfnvLSqIcW/YsN8ebAAATxicu",
|
|
"record_locator": {
|
|
"message_id": "AAMkAGE2MmEwNzFlLWVjYzAtNDNhZS04ZGM1LTFjYmMzZDhiMmI0MABGAAAAAADc1MfJYetSQ6QZntYrI9k4BwDZYn-lfnvLSqIcW-YsN8ebAAATzq5sAADZYn-lfnvLSqIcW-YsN8ebAAATzrolAAA=",
|
|
"user_email": "devops@unstructuredio.onmicrosoft.com"
|
|
},
|
|
"date_created": "2023-07-10T03:39:04",
|
|
"date_modified": "2023-07-15T22:36:12"
|
|
},
|
|
"filename": "4a16a411f162ebbb.eml",
|
|
"last_modified": "2023-07-09T20:38:47-07:00",
|
|
"filetype": "message/rfc822",
|
|
"languages": [
|
|
"eng"
|
|
],
|
|
"sent_from": [
|
|
"David Potter <potterdavidm@gmail.com>"
|
|
],
|
|
"sent_to": [
|
|
"devops@unstructuredio.onmicrosoft.com"
|
|
],
|
|
"subject": "message for subfolder"
|
|
},
|
|
"text": "this is a message for the subfolder"
|
|
}
|
|
] |