mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-08 17:46:54 +00:00

Update: The cli shell script works when sending documents to the free api, but the paid api is down, so waiting to test against it. - The first commit adds docstrings and fixes type hints. - The second commit reorganizes `test_unstructured_ingest` so it matches the structure of `unstructured/ingest`. - The third commit contains the primary changes for this PR. - The `.chunk()` method responsible for sending elements to the correct method is moved from `ChunkingConfig` to `Chunker` so that `ChunkingConfig` acts as a config object instead of containing implementation logic. `Chunker.chunk()` also now takes a json file instead of a list of elements. This is done to avoid redundant serialization if the file is to be sent to the api for chunking. --------- Co-authored-by: Ahmet Melek <39141206+ahmetmeleq@users.noreply.github.com>
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
import json
|
|
from dataclasses import Field, dataclass, fields
|
|
|
|
import pytest
|
|
|
|
from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
|
|
from unstructured.ingest.enhanced_dataclass.dataclasses import EnhancedField
|
|
|
|
|
|
@dataclass
|
|
class AuthData(EnhancedDataClassJsonMixin):
|
|
username: str
|
|
password: str = enhanced_field(sensitive=True)
|
|
date: int = enhanced_field(overload_name="time")
|
|
|
|
|
|
auth = AuthData(username="my name", password="top secret", date=3)
|
|
|
|
|
|
def test_enhanced_field():
|
|
fs = fields(AuthData)
|
|
for f in fs:
|
|
if f.name == "username":
|
|
assert isinstance(f, Field)
|
|
assert hasattr(f, "sensitive") is False
|
|
else:
|
|
assert isinstance(f, EnhancedField)
|
|
if f.name == "password":
|
|
assert f.sensitive is True
|
|
else:
|
|
assert not f.sensitive
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("apply_name_overload", "expected_dict"),
|
|
[
|
|
(True, {"username": "my name", "password": "THIS IS REDACTED", "time": 3}),
|
|
(False, {"username": "my name", "password": "THIS IS REDACTED", "date": 3}),
|
|
],
|
|
)
|
|
def test_to_json(apply_name_overload: bool, expected_dict: dict):
|
|
j = auth.to_json(
|
|
redact_sensitive=True,
|
|
redacted_text="THIS IS REDACTED",
|
|
apply_name_overload=apply_name_overload,
|
|
)
|
|
expected = json.dumps(expected_dict)
|
|
assert j == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("apply_name_overload", "expected_dict"),
|
|
[
|
|
(True, {"username": "my name", "password": "***REDACTED***", "time": 3}),
|
|
(False, {"username": "my name", "password": "***REDACTED***", "date": 3}),
|
|
],
|
|
)
|
|
def test_to_dict(apply_name_overload: bool, expected_dict: dict):
|
|
d = auth.to_dict(redact_sensitive=True, apply_name_overload=apply_name_overload)
|
|
assert d == expected_dict
|