| 
									
										
										
										
											2023-10-23 11:51:52 -04:00
										 |  |  | from unstructured.ingest.connector.local import LocalIngestDoc, SimpleLocalConfig | 
					
						
							|  |  |  | from unstructured.ingest.connector.registry import ( | 
					
						
							|  |  |  |     create_ingest_doc_from_dict, | 
					
						
							|  |  |  |     create_ingest_doc_from_json, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | doc = LocalIngestDoc( | 
					
						
							|  |  |  |     path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf", | 
					
						
							|  |  |  |     connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"), | 
					
						
							|  |  |  |     processor_config=ProcessorConfig(), | 
					
						
							|  |  |  |     read_config=ReadConfig(), | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | doc.update_source_metadata() | 
					
						
							|  |  |  | serialized_json = doc.to_json() | 
					
						
							| 
									
										
										
										
											2023-10-25 18:04:27 -04:00
										 |  |  | serialized_dict = doc.to_dict() | 
					
						
							| 
									
										
										
										
											2023-10-23 11:51:52 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_manual_deserialization(): | 
					
						
							|  |  |  |     deserialized_doc = LocalIngestDoc.from_json(serialized_json) | 
					
						
							|  |  |  |     assert doc == deserialized_doc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_registry_from_json(): | 
					
						
							|  |  |  |     deserialized_doc = create_ingest_doc_from_json(serialized_json) | 
					
						
							|  |  |  |     assert doc == deserialized_doc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_registry_from_dict(): | 
					
						
							|  |  |  |     deserialized_doc = create_ingest_doc_from_dict(serialized_dict) | 
					
						
							|  |  |  |     assert doc == deserialized_doc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_source_metadata_serialization(): | 
					
						
							|  |  |  |     doc = LocalIngestDoc( | 
					
						
							|  |  |  |         path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf", | 
					
						
							|  |  |  |         connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"), | 
					
						
							|  |  |  |         processor_config=ProcessorConfig(), | 
					
						
							|  |  |  |         read_config=ReadConfig(), | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     serialized_json = doc.to_dict() | 
					
						
							|  |  |  |     assert not serialized_json["_source_metadata"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     doc.update_source_metadata() | 
					
						
							|  |  |  |     serialized_json_w_meta = doc.to_dict() | 
					
						
							|  |  |  |     assert serialized_json_w_meta["_source_metadata"] |