mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-19 15:06:21 +00:00

Connector data source versions should always be string values, however we were using the integer checksum value for the version for fsspec connectors. This casts that value to a string. ## Changes * Cast the checksum value to a string when assigning the version value for fsspec connectors. * Adds test to validate that these connectors will assign a string value when an integer checksum is fetched. ## Testing Unit test added.
26 lines
960 B
Python
26 lines
960 B
Python
from unittest.mock import MagicMock, patch
|
|
|
|
from fsspec import AbstractFileSystem
|
|
|
|
from unstructured.ingest.connector.fsspec.fsspec import FsspecIngestDoc, SimpleFsspecConfig
|
|
from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
|
|
|
|
|
|
@patch("fsspec.get_filesystem_class")
|
|
def test_version_is_string(mock_get_filesystem_class):
|
|
"""
|
|
Test that the version is a string even when the filesystem checksum is an integer.
|
|
"""
|
|
mock_fs = MagicMock(spec=AbstractFileSystem)
|
|
mock_fs.checksum.return_value = 1234567890
|
|
mock_fs.info.return_value = {"etag": ""}
|
|
mock_get_filesystem_class.return_value = lambda **kwargs: mock_fs
|
|
config = SimpleFsspecConfig("s3://my-bucket", access_config={})
|
|
doc = FsspecIngestDoc(
|
|
processor_config=ProcessorConfig(),
|
|
read_config=ReadConfig(),
|
|
connector_config=config,
|
|
remote_file_path="test.txt",
|
|
)
|
|
assert isinstance(doc.source_metadata.version, str)
|