mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-13 00:47:32 +00:00
fix: handling for emails without datetimes (#724)
* add empty filetype * add empty handling to partition * changelog and version * handling for when there is no datetime * changelog and version
This commit is contained in:
parent
b354e8eec6
commit
3f80301964
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,3 +1,13 @@
|
||||
## 0.7.5-dev0
|
||||
|
||||
### Enhancements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
* Adds handling for emails that do not have a datetime to extract.
|
||||
|
||||
## 0.7.4
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -64,6 +64,10 @@ def test_extract_datetimetz():
|
||||
)
|
||||
|
||||
|
||||
def test_extract_datetimetz_works_with_no_date():
|
||||
assert extract.extract_datetimetz("NO DATE HERE") is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("text", "expected"),
|
||||
[
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.7.4" # pragma: no cover
|
||||
__version__ = "0.7.5-dev0" # pragma: no cover
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import datetime
|
||||
import re
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from unstructured.nlp.patterns import (
|
||||
EMAIL_ADDRESS_PATTERN,
|
||||
@ -75,9 +75,12 @@ def extract_mapi_id(text: str) -> List[str]:
|
||||
return mapi_ids
|
||||
|
||||
|
||||
def extract_datetimetz(text: str) -> datetime.datetime:
|
||||
date_string = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
|
||||
return datetime.datetime.strptime(date_string[0], "%a, %d %b %Y %H:%M:%S %z")
|
||||
def extract_datetimetz(text: str) -> Optional[datetime.datetime]:
|
||||
date_extractions = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
|
||||
if len(date_extractions) > 0:
|
||||
return datetime.datetime.strptime(date_extractions[0], "%a, %d %b %Y %H:%M:%S %z")
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def extract_us_phone_number(text: str):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user