mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-11-04 03:53:45 +00:00 
			
		
		
		
	fix: handling for emails without datetimes (#724)
* add empty filetype * add empty handling to partition * changelog and version * handling for when there is no datetime * changelog and version
This commit is contained in:
		
							parent
							
								
									b354e8eec6
								
							
						
					
					
						commit
						3f80301964
					
				
							
								
								
									
										10
									
								
								CHANGELOG.md
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								CHANGELOG.md
									
									
									
									
									
								
							@ -1,3 +1,13 @@
 | 
			
		||||
## 0.7.5-dev0
 | 
			
		||||
 | 
			
		||||
### Enhancements
 | 
			
		||||
 | 
			
		||||
### Features
 | 
			
		||||
 | 
			
		||||
### Fixes
 | 
			
		||||
 | 
			
		||||
* Adds handling for emails that do not have a datetime to extract.
 | 
			
		||||
 | 
			
		||||
## 0.7.4
 | 
			
		||||
 | 
			
		||||
### Enhancements
 | 
			
		||||
 | 
			
		||||
@ -64,6 +64,10 @@ def test_extract_datetimetz():
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_extract_datetimetz_works_with_no_date():
 | 
			
		||||
    assert extract.extract_datetimetz("NO DATE HERE") is None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    ("text", "expected"),
 | 
			
		||||
    [
 | 
			
		||||
 | 
			
		||||
@ -1 +1 @@
 | 
			
		||||
__version__ = "0.7.4"  # pragma: no cover
 | 
			
		||||
__version__ = "0.7.5-dev0"  # pragma: no cover
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,6 @@
 | 
			
		||||
import datetime
 | 
			
		||||
import re
 | 
			
		||||
from typing import List
 | 
			
		||||
from typing import List, Optional
 | 
			
		||||
 | 
			
		||||
from unstructured.nlp.patterns import (
 | 
			
		||||
    EMAIL_ADDRESS_PATTERN,
 | 
			
		||||
@ -75,9 +75,12 @@ def extract_mapi_id(text: str) -> List[str]:
 | 
			
		||||
    return mapi_ids
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def extract_datetimetz(text: str) -> datetime.datetime:
 | 
			
		||||
    date_string = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
 | 
			
		||||
    return datetime.datetime.strptime(date_string[0], "%a, %d %b %Y %H:%M:%S %z")
 | 
			
		||||
def extract_datetimetz(text: str) -> Optional[datetime.datetime]:
 | 
			
		||||
    date_extractions = re.findall(EMAIL_DATETIMETZ_PATTERN, text)
 | 
			
		||||
    if len(date_extractions) > 0:
 | 
			
		||||
        return datetime.datetime.strptime(date_extractions[0], "%a, %d %b %Y %H:%M:%S %z")
 | 
			
		||||
    else:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def extract_us_phone_number(text: str):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user