fix: handling for emails without datetimes (#724)

* add empty filetype

* add empty handling to partition

* changelog and version

* handling for when there is no datetime

* changelog and version
This commit is contained in:
Matt Robinson 2023-06-12 13:11:04 -04:00 committed by GitHub
parent b354e8eec6
commit 3f80301964
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 5 deletions

View File

@ -1,3 +1,13 @@
## 0.7.5-dev0
### Enhancements
### Features
### Fixes
* Adds handling for emails that do not have a datetime to extract.
## 0.7.4
### Enhancements

View File

@ -64,6 +64,10 @@ def test_extract_datetimetz():
)
def test_extract_datetimetz_works_with_no_date():
assert extract.extract_datetimetz("NO DATE HERE") is None
@pytest.mark.parametrize(
("text", "expected"),
[

View File

@ -1 +1 @@
__version__ = "0.7.4" # pragma: no cover
__version__ = "0.7.5-dev0" # pragma: no cover

View File

@ -1,6 +1,6 @@
import datetime
import re
from typing import List
from typing import List, Optional
from unstructured.nlp.patterns import (
EMAIL_ADDRESS_PATTERN,
@ -75,9 +75,12 @@ def extract_mapi_id(text: str) -> List[str]:
return mapi_ids
def extract_datetimetz(text: str) -> datetime.datetime:
date_string = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
return datetime.datetime.strptime(date_string[0], "%a, %d %b %Y %H:%M:%S %z")
def extract_datetimetz(text: str) -> Optional[datetime.datetime]:
date_extractions = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
if len(date_extractions) > 0:
return datetime.datetime.strptime(date_extractions[0], "%a, %d %b %Y %H:%M:%S %z")
else:
return None
def extract_us_phone_number(text: str):