mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 18:14:51 +00:00 
			
		
		
		
	 ef5091f276
			
		
	
	
		ef5091f276
		
			
		
	
	
	
	
		
			
			* added UUID option for element_id arg in element constructor and updated unit tests * updated CHANGELOG and bumped to dev2
		
			
				
	
	
		
			53 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			53 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import uuid
 | |
| from functools import partial
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| from unstructured.cleaners.core import clean_prefix
 | |
| from unstructured.cleaners.translate import translate_text
 | |
| from unstructured.documents.email_elements import UUID, EmailElement, Name, NoID
 | |
| 
 | |
| 
 | |
| def test_text_id():
 | |
|     name_element = Name(name="Example", text="hello there!")
 | |
|     assert name_element.id == "c69509590d81db2f37f9d75480c8efed"
 | |
| 
 | |
| 
 | |
| def test_text_uuid():
 | |
|     name_element = Name(name="Example", text="hello there!", element_id=UUID())
 | |
|     assert isinstance(name_element.id, uuid.UUID)
 | |
| 
 | |
| 
 | |
| def test_element_defaults_to_blank_id():
 | |
|     element = EmailElement()
 | |
|     assert isinstance(element.id, NoID)
 | |
| 
 | |
| 
 | |
| def test_text_element_apply_cleaners():
 | |
|     name_element = Name(name="[2] Example docs", text="[1] A Textbook on Crocodile Habitats")
 | |
| 
 | |
|     name_element.apply(partial(clean_prefix, pattern=r"\[\d{1,2}\]"))
 | |
|     assert str(name_element) == "Example docs: A Textbook on Crocodile Habitats"
 | |
| 
 | |
| 
 | |
| def test_name_element_apply_multiple_cleaners():
 | |
|     cleaners = [
 | |
|         partial(clean_prefix, pattern=r"\[\d{1,2}\]"),
 | |
|         partial(translate_text, target_lang="ru"),
 | |
|     ]
 | |
|     name_element = Name(
 | |
|         name="[1] A Textbook on Crocodile Habitats",
 | |
|         text="[1] A Textbook on Crocodile Habitats",
 | |
|     )
 | |
|     name_element.apply(*cleaners)
 | |
|     assert (
 | |
|         str(name_element)
 | |
|         == "Учебник по крокодильным средам обитания: Учебник по крокодильным средам обитания"
 | |
|     )
 | |
| 
 | |
| 
 | |
| def test_apply_raises_if_func_does_not_produce_string():
 | |
|     name_element = Name(name="Example docs", text="[1] A Textbook on Crocodile Habitats")
 | |
|     with pytest.raises(ValueError):
 | |
|         name_element.apply(lambda s: 1)
 |