mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-12 19:45:56 +00:00

Update: The cli shell script works when sending documents to the free api, but the paid api is down, so waiting to test against it. - The first commit adds docstrings and fixes type hints. - The second commit reorganizes `test_unstructured_ingest` so it matches the structure of `unstructured/ingest`. - The third commit contains the primary changes for this PR. - The `.chunk()` method responsible for sending elements to the correct method is moved from `ChunkingConfig` to `Chunker` so that `ChunkingConfig` acts as a config object instead of containing implementation logic. `Chunker.chunk()` also now takes a json file instead of a list of elements. This is done to avoid redundant serialization if the file is to be sent to the api for chunking. --------- Co-authored-by: Ahmet Melek <39141206+ahmetmeleq@users.noreply.github.com>
26 lines
960 B
Python
26 lines
960 B
Python
from unittest.mock import MagicMock, patch
|
|
|
|
from fsspec import AbstractFileSystem
|
|
|
|
from unstructured.ingest.connector.fsspec.fsspec import FsspecIngestDoc, SimpleFsspecConfig
|
|
from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig
|
|
|
|
|
|
@patch("fsspec.get_filesystem_class")
|
|
def test_version_is_string(mock_get_filesystem_class):
|
|
"""
|
|
Test that the version is a string even when the filesystem checksum is an integer.
|
|
"""
|
|
mock_fs = MagicMock(spec=AbstractFileSystem)
|
|
mock_fs.checksum.return_value = 1234567890
|
|
mock_fs.info.return_value = {"etag": ""}
|
|
mock_get_filesystem_class.return_value = lambda **kwargs: mock_fs
|
|
config = SimpleFsspecConfig("s3://my-bucket", access_config={})
|
|
doc = FsspecIngestDoc(
|
|
processor_config=ProcessorConfig(),
|
|
read_config=ReadConfig(),
|
|
connector_config=config,
|
|
remote_file_path="test.txt",
|
|
)
|
|
assert isinstance(doc.source_metadata.version, str)
|