mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-08 14:39:27 +00:00
fix: handling for emails without datetimes (#724)
* add empty filetype * add empty handling to partition * changelog and version * handling for when there is no datetime * changelog and version
This commit is contained in:
parent
b354e8eec6
commit
3f80301964
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
|||||||
|
## 0.7.5-dev0
|
||||||
|
|
||||||
|
### Enhancements
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
* Adds handling for emails that do not have a datetime to extract.
|
||||||
|
|
||||||
## 0.7.4
|
## 0.7.4
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|||||||
@ -64,6 +64,10 @@ def test_extract_datetimetz():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_datetimetz_works_with_no_date():
|
||||||
|
assert extract.extract_datetimetz("NO DATE HERE") is None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("text", "expected"),
|
("text", "expected"),
|
||||||
[
|
[
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.7.4" # pragma: no cover
|
__version__ = "0.7.5-dev0" # pragma: no cover
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
from unstructured.nlp.patterns import (
|
from unstructured.nlp.patterns import (
|
||||||
EMAIL_ADDRESS_PATTERN,
|
EMAIL_ADDRESS_PATTERN,
|
||||||
@ -75,9 +75,12 @@ def extract_mapi_id(text: str) -> List[str]:
|
|||||||
return mapi_ids
|
return mapi_ids
|
||||||
|
|
||||||
|
|
||||||
def extract_datetimetz(text: str) -> datetime.datetime:
|
def extract_datetimetz(text: str) -> Optional[datetime.datetime]:
|
||||||
date_string = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
|
date_extractions = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
|
||||||
return datetime.datetime.strptime(date_string[0], "%a, %d %b %Y %H:%M:%S %z")
|
if len(date_extractions) > 0:
|
||||||
|
return datetime.datetime.strptime(date_extractions[0], "%a, %d %b %Y %H:%M:%S %z")
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def extract_us_phone_number(text: str):
|
def extract_us_phone_number(text: str):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user