| 
									
										
										
										
											2022-06-29 14:35:19 -04:00
										 |  |  | import pytest | 
					
						
							| 
									
										
										
										
											2023-02-27 17:30:54 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-29 14:35:19 -04:00
										 |  |  | from unstructured.documents.base import Document, Page | 
					
						
							|  |  |  | from unstructured.documents.elements import NarrativeText, Title | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class MockDocument(Document): | 
					
						
							| 
									
										
										
										
											2022-11-14 13:34:42 -06:00
										 |  |  |     def __init__(self): | 
					
						
							|  |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2022-06-29 14:35:19 -04:00
										 |  |  |         elements = [ | 
					
						
							|  |  |  |             Title(text="This is a narrative."), | 
					
						
							|  |  |  |             NarrativeText(text="This is a narrative."), | 
					
						
							|  |  |  |             NarrativeText(text="This is a narrative."), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         page = Page(number=0) | 
					
						
							|  |  |  |         page.elements = elements | 
					
						
							| 
									
										
										
										
											2022-11-14 13:34:42 -06:00
										 |  |  |         self._pages = [page] | 
					
						
							| 
									
										
										
										
											2022-06-29 14:35:19 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_get_narrative(): | 
					
						
							|  |  |  |     document = MockDocument() | 
					
						
							|  |  |  |     narrative = document.get_narrative() | 
					
						
							|  |  |  |     for element in narrative: | 
					
						
							|  |  |  |         assert isinstance(element, NarrativeText) | 
					
						
							|  |  |  |     document.print_narrative() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.mark.parametrize("index", [0, 1, 2]) | 
					
						
							|  |  |  | def test_split(index): | 
					
						
							|  |  |  |     document = MockDocument() | 
					
						
							|  |  |  |     elements = document.pages[0].elements | 
					
						
							|  |  |  |     split_before_doc = document.before_element(elements[index]) | 
					
						
							|  |  |  |     before_elements = split_before_doc.pages[0].elements if split_before_doc.pages else [] | 
					
						
							|  |  |  |     split_after_doc = document.after_element(elements[index]) | 
					
						
							|  |  |  |     after_elements = split_after_doc.pages[0].elements if split_after_doc.pages else [] | 
					
						
							|  |  |  |     expected_before_elements = document.pages[0].elements[:index] | 
					
						
							|  |  |  |     next_index = index + 1 | 
					
						
							|  |  |  |     expected_after_elements = document.pages[0].elements[next_index:] | 
					
						
							|  |  |  |     assert all(a.id == b.id for a, b in zip(before_elements, expected_before_elements)) | 
					
						
							|  |  |  |     assert all(a.id == b.id for a, b in zip(after_elements, expected_after_elements)) |