mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-30 17:38:13 +00:00 
			
		
		
		
	 2e97494613
			
		
	
	
		2e97494613
		
			
		
	
	
	
	
		
			
			Connector data source versions should always be string values, however we were using the integer checksum value for the version for fsspec connectors. This casts that value to a string. ## Changes * Cast the checksum value to a string when assigning the version value for fsspec connectors. * Adds test to validate that these connectors will assign a string value when an integer checksum is fetched. ## Testing Unit test added.
		
			
				
	
	
		
			26 lines
		
	
	
		
			960 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			26 lines
		
	
	
		
			960 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| from unittest.mock import MagicMock, patch
 | |
| 
 | |
| from fsspec import AbstractFileSystem
 | |
| 
 | |
| from unstructured.ingest.connector.fsspec.fsspec import FsspecIngestDoc, SimpleFsspecConfig
 | |
| from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
 | |
| 
 | |
| 
 | |
| @patch("fsspec.get_filesystem_class")
 | |
| def test_version_is_string(mock_get_filesystem_class):
 | |
|     """
 | |
|     Test that the version is a string even when the filesystem checksum is an integer.
 | |
|     """
 | |
|     mock_fs = MagicMock(spec=AbstractFileSystem)
 | |
|     mock_fs.checksum.return_value = 1234567890
 | |
|     mock_fs.info.return_value = {"etag": ""}
 | |
|     mock_get_filesystem_class.return_value = lambda **kwargs: mock_fs
 | |
|     config = SimpleFsspecConfig("s3://my-bucket", access_config={})
 | |
|     doc = FsspecIngestDoc(
 | |
|         processor_config=ProcessorConfig(),
 | |
|         read_config=ReadConfig(),
 | |
|         connector_config=config,
 | |
|         remote_file_path="test.txt",
 | |
|     )
 | |
|     assert isinstance(doc.source_metadata.version, str)
 |