2023-04-13 09:36:23 +02:00
|
|
|
from pathlib import Path
|
2023-05-11 18:28:56 +02:00
|
|
|
import dataclasses
|
2023-05-15 11:39:04 +02:00
|
|
|
import textwrap
|
2023-09-11 16:40:00 +01:00
|
|
|
import json
|
2023-05-11 18:28:56 +02:00
|
|
|
|
2023-05-10 13:46:13 +02:00
|
|
|
import pytest
|
2023-04-13 09:36:23 +02:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
from haystack.preview import Document
|
2023-09-11 16:40:00 +01:00
|
|
|
from haystack.preview.dataclasses.document import DocumentEncoder, DocumentDecoder
|
2023-04-13 09:36:23 +02:00
|
|
|
|
|
|
|
|
2023-05-11 18:28:56 +02:00
|
|
|
@pytest.mark.unit
|
|
|
|
def test_document_is_immutable():
|
2023-09-11 16:40:00 +01:00
|
|
|
doc = Document(text="test text")
|
2023-05-11 18:28:56 +02:00
|
|
|
with pytest.raises(dataclasses.FrozenInstanceError):
|
2023-09-11 16:40:00 +01:00
|
|
|
doc.text = "won't work"
|
2023-05-11 18:28:56 +02:00
|
|
|
|
|
|
|
|
2023-05-15 11:39:04 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"doc,doc_str",
|
|
|
|
[
|
|
|
|
(Document(text="test text"), "text: 'test text'"),
|
|
|
|
(Document(array=np.zeros((3, 7))), "array: (3, 7)"),
|
|
|
|
(
|
|
|
|
Document(dataframe=pd.DataFrame([["John", 25], ["Martha", 34]], columns=["name", "age"])),
|
|
|
|
"dataframe: (2, 2)",
|
|
|
|
),
|
|
|
|
(Document(blob=bytes("hello, test string".encode("utf-8"))), "blob: 18 bytes"),
|
|
|
|
(
|
|
|
|
Document(
|
|
|
|
text="test text",
|
|
|
|
array=np.zeros((3, 7)),
|
|
|
|
dataframe=pd.DataFrame([["John", 25], ["Martha", 34]], columns=["name", "age"]),
|
|
|
|
blob=bytes("hello, test string".encode("utf-8")),
|
|
|
|
),
|
|
|
|
"text: 'test text', array: (3, 7), dataframe: (2, 2), blob: 18 bytes",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_document_str(doc, doc_str):
|
|
|
|
assert f"Document(id={doc.id}, mimetype: 'text/plain', {doc_str})" == str(doc)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"doc1_data,doc2_data",
|
|
|
|
[
|
|
|
|
[{"text": "test text"}, {"text": "test text", "mime_type": "text/plain"}],
|
|
|
|
[{"text": "test text", "mime_type": "text/html"}, {"text": "test text", "mime_type": "text/plain"}],
|
|
|
|
[{"text": "test text"}, {"text": "test text", "metadata": {"path": Path(__file__)}}],
|
|
|
|
[
|
|
|
|
{"text": "test text", "metadata": {"path": Path(__file__).parent}},
|
|
|
|
{"text": "test text", "metadata": {"path": Path(__file__)}},
|
|
|
|
],
|
|
|
|
[{"text": "test text"}, {"text": "test text", "score": 200}],
|
|
|
|
[{"text": "test text", "score": 0}, {"text": "test text", "score": 200}],
|
|
|
|
[{"text": "test text"}, {"text": "test text", "embedding": np.array([1, 2, 3])}],
|
|
|
|
[
|
|
|
|
{"text": "test text", "embedding": np.array([100, 222, 345])},
|
|
|
|
{"text": "test text", "embedding": np.array([1, 2, 3])},
|
|
|
|
],
|
|
|
|
[{"array": np.array(range(10))}, {"array": np.array(range(10))}],
|
|
|
|
[{"dataframe": pd.DataFrame([1, 2, 3])}, {"dataframe": pd.DataFrame([1, 2, 3])}],
|
|
|
|
[{"blob": b"some bytes"}, {"blob": b"some bytes"}],
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_id_hash_keys_default_fields_equal_id(doc1_data, doc2_data):
|
|
|
|
doc1 = Document.from_dict(doc1_data)
|
|
|
|
doc2 = Document.from_dict(doc2_data)
|
|
|
|
assert doc1.id == doc2.id
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"doc1_data,doc2_data",
|
|
|
|
[
|
|
|
|
[{"text": "test text"}, {"text": "test text "}],
|
|
|
|
[{"array": np.array(range(10))}, {"array": np.array(range(11))}],
|
|
|
|
[{"dataframe": pd.DataFrame([1, 2, 3])}, {"dataframe": pd.DataFrame([1, 2, 3, 4])}],
|
|
|
|
[{"blob": b"some bytes"}, {"blob": "something else".encode()}],
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_id_hash_keys_default_fields_different_ids(doc1_data, doc2_data):
|
|
|
|
doc1 = Document.from_dict(doc1_data)
|
|
|
|
doc2 = Document.from_dict(doc2_data)
|
|
|
|
assert doc1.id != doc2.id
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_id_hash_keys_changes_id():
|
|
|
|
doc1 = Document(text="test text", metadata={"some-value": "value"})
|
|
|
|
doc2 = Document(text="test text", metadata={"some-value": "value"}, id_hash_keys=["text", "some-value"])
|
|
|
|
assert doc1.id != doc2.id
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_id_hash_keys_field_may_be_missing(caplog):
|
|
|
|
doc1 = Document(text="test text", id_hash_keys=["something"])
|
|
|
|
doc2 = Document(text="test text", id_hash_keys=["something else"])
|
|
|
|
assert doc1.id == doc2.id
|
|
|
|
assert "is missing the following id_hash_keys: ['something']." in caplog.text
|
|
|
|
assert "is missing the following id_hash_keys: ['something else']." in caplog.text
|
2023-05-15 11:39:04 +02:00
|
|
|
|
|
|
|
|
2023-05-10 16:33:47 +02:00
|
|
|
@pytest.mark.unit
|
|
|
|
def test_init_document_same_meta_as_main_fields():
|
|
|
|
"""
|
|
|
|
This is forbidden to prevent later issues with `Document.flatten()`
|
|
|
|
"""
|
|
|
|
with pytest.raises(ValueError, match="score"):
|
2023-09-11 16:40:00 +01:00
|
|
|
Document(text="test text", metadata={"score": "10/10"})
|
2023-05-10 16:33:47 +02:00
|
|
|
|
|
|
|
|
2023-05-11 18:28:56 +02:00
|
|
|
@pytest.mark.unit
|
2023-05-15 11:39:04 +02:00
|
|
|
def test_basic_equality_type_mismatch():
|
2023-09-11 16:40:00 +01:00
|
|
|
doc = Document(text="test text")
|
|
|
|
assert doc != "test text"
|
2023-05-11 18:28:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_basic_equality_id():
|
|
|
|
doc1 = Document(text="test text")
|
|
|
|
doc2 = Document(text="test text")
|
2023-05-11 18:28:56 +02:00
|
|
|
|
|
|
|
assert doc1 == doc2
|
|
|
|
|
2023-09-11 16:40:00 +01:00
|
|
|
object.__setattr__(doc1, "id", "1234")
|
|
|
|
object.__setattr__(doc2, "id", "5678")
|
2023-05-11 18:28:56 +02:00
|
|
|
|
2023-05-15 11:39:04 +02:00
|
|
|
assert doc1 != doc2
|
|
|
|
|
|
|
|
|
2023-05-11 18:28:56 +02:00
|
|
|
@pytest.mark.unit
|
|
|
|
def test_equality_with_metadata_with_objects():
|
|
|
|
class TestObject:
|
|
|
|
def __eq__(self, other):
|
|
|
|
if type(self) == type(other):
|
|
|
|
return True
|
|
|
|
|
|
|
|
doc1 = Document(
|
2023-09-11 16:40:00 +01:00
|
|
|
text="test text", metadata={"value": np.array([0, 1, 2]), "path": Path(__file__), "obj": TestObject()}
|
2023-05-11 18:28:56 +02:00
|
|
|
)
|
|
|
|
doc2 = Document(
|
2023-09-11 16:40:00 +01:00
|
|
|
text="test text", metadata={"value": np.array([0, 1, 2]), "path": Path(__file__), "obj": TestObject()}
|
2023-05-11 18:28:56 +02:00
|
|
|
)
|
|
|
|
assert doc1 == doc2
|
|
|
|
|
|
|
|
|
2023-05-10 13:46:13 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_empty_document_to_dict():
|
|
|
|
doc = Document()
|
|
|
|
assert doc.to_dict() == {
|
|
|
|
"id": doc._create_id(),
|
|
|
|
"text": None,
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"blob": None,
|
|
|
|
"mime_type": "text/plain",
|
2023-04-13 09:36:23 +02:00
|
|
|
"metadata": {},
|
2023-09-11 16:40:00 +01:00
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
2023-04-13 09:36:23 +02:00
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-05-10 13:46:13 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_empty_document_from_dict():
|
|
|
|
assert Document.from_dict({}) == Document()
|
2023-04-13 09:36:23 +02:00
|
|
|
|
|
|
|
|
2023-05-10 13:46:13 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_full_document_to_dict():
|
2023-04-13 09:36:23 +02:00
|
|
|
doc = Document(
|
2023-09-11 16:40:00 +01:00
|
|
|
text="test text",
|
|
|
|
array=np.array([1, 2, 3]),
|
|
|
|
dataframe=pd.DataFrame([10, 20, 30]),
|
|
|
|
blob=b"some bytes",
|
|
|
|
mime_type="application/pdf",
|
2023-04-13 09:36:23 +02:00
|
|
|
metadata={"some": "values", "test": 10},
|
|
|
|
id_hash_keys=["test"],
|
|
|
|
score=0.99,
|
|
|
|
embedding=np.zeros([10, 10]),
|
|
|
|
)
|
|
|
|
dictionary = doc.to_dict()
|
|
|
|
|
2023-09-11 16:40:00 +01:00
|
|
|
array = dictionary.pop("array")
|
|
|
|
assert array.shape == doc.array.shape and (array == doc.array).all()
|
|
|
|
|
|
|
|
dataframe = dictionary.pop("dataframe")
|
|
|
|
assert dataframe.equals(doc.dataframe)
|
|
|
|
|
|
|
|
blob = dictionary.pop("blob")
|
|
|
|
assert blob == doc.blob
|
|
|
|
|
2023-04-13 09:36:23 +02:00
|
|
|
embedding = dictionary.pop("embedding")
|
2023-09-11 16:40:00 +01:00
|
|
|
assert (embedding == doc.embedding).all()
|
2023-04-13 09:36:23 +02:00
|
|
|
|
|
|
|
assert dictionary == {
|
2023-09-11 16:40:00 +01:00
|
|
|
"id": doc.id,
|
|
|
|
"text": "test text",
|
|
|
|
"mime_type": "application/pdf",
|
2023-04-13 09:36:23 +02:00
|
|
|
"metadata": {"some": "values", "test": 10},
|
|
|
|
"id_hash_keys": ["test"],
|
|
|
|
"score": 0.99,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-05-10 13:46:13 +02:00
|
|
|
@pytest.mark.unit
|
2023-04-13 09:36:23 +02:00
|
|
|
def test_document_with_most_attributes_from_dict():
|
|
|
|
embedding = np.zeros([10, 10])
|
|
|
|
assert Document.from_dict(
|
|
|
|
{
|
2023-09-11 16:40:00 +01:00
|
|
|
"text": "test text",
|
|
|
|
"array": np.array([1, 2, 3]),
|
|
|
|
"dataframe": pd.DataFrame([10, 20, 30]),
|
|
|
|
"blob": b"some bytes",
|
|
|
|
"mime_type": "application/pdf",
|
2023-04-13 09:36:23 +02:00
|
|
|
"metadata": {"some": "values", "test": 10},
|
|
|
|
"id_hash_keys": ["test"],
|
|
|
|
"score": 0.99,
|
|
|
|
"embedding": embedding,
|
|
|
|
}
|
|
|
|
) == Document(
|
2023-09-11 16:40:00 +01:00
|
|
|
text="test text",
|
|
|
|
array=np.array([1, 2, 3]),
|
|
|
|
dataframe=pd.DataFrame([10, 20, 30]),
|
|
|
|
blob=b"some bytes",
|
|
|
|
mime_type="application/pdf",
|
2023-04-13 09:36:23 +02:00
|
|
|
metadata={"some": "values", "test": 10},
|
|
|
|
id_hash_keys=["test"],
|
|
|
|
score=0.99,
|
|
|
|
embedding=embedding,
|
|
|
|
)
|
2023-05-10 16:33:47 +02:00
|
|
|
|
|
|
|
|
2023-05-15 11:39:04 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_empty_document_to_json():
|
|
|
|
doc = Document()
|
|
|
|
assert doc.to_json() == json.dumps(
|
|
|
|
{
|
|
|
|
"id": doc.id,
|
|
|
|
"text": None,
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"metadata": {},
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
|
|
|
}
|
2023-05-15 11:39:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_empty_document_from_json():
|
|
|
|
assert Document.from_json("{}") == Document()
|
2023-05-15 11:39:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_full_document_to_json(tmp_path):
|
|
|
|
class TestClass:
|
|
|
|
def __repr__(self):
|
|
|
|
return "<the object>"
|
|
|
|
|
|
|
|
doc_1 = Document(
|
2023-09-11 16:40:00 +01:00
|
|
|
text="test text",
|
|
|
|
array=np.array([1, 2, 3]),
|
|
|
|
dataframe=pd.DataFrame([10, 20, 30]),
|
|
|
|
blob=b"some bytes",
|
|
|
|
mime_type="application/pdf",
|
2023-05-15 11:39:04 +02:00
|
|
|
metadata={"some object": TestClass(), "a path": tmp_path / "test.txt"},
|
2023-09-11 16:40:00 +01:00
|
|
|
id_hash_keys=["test"],
|
|
|
|
score=0.5,
|
2023-05-15 11:39:04 +02:00
|
|
|
embedding=np.array([1, 2, 3, 4]),
|
|
|
|
)
|
2023-09-11 16:40:00 +01:00
|
|
|
assert doc_1.to_json() == json.dumps(
|
|
|
|
{
|
|
|
|
"id": doc_1.id,
|
|
|
|
"text": "test text",
|
|
|
|
"array": [1, 2, 3],
|
|
|
|
"dataframe": '{"0":{"0":10,"1":20,"2":30}}',
|
|
|
|
"mime_type": "application/pdf",
|
|
|
|
"metadata": {"some object": "<the object>", "a path": str((tmp_path / "test.txt").absolute())},
|
|
|
|
"id_hash_keys": ["test"],
|
|
|
|
"score": 0.5,
|
|
|
|
"embedding": [1, 2, 3, 4],
|
|
|
|
}
|
|
|
|
)
|
2023-05-15 11:39:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_full_document_from_json(tmp_path):
|
2023-09-11 16:40:00 +01:00
|
|
|
class TestClass:
|
|
|
|
def __repr__(self):
|
|
|
|
return "'<the object>'"
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
return type(self) == type(other)
|
|
|
|
|
|
|
|
doc = Document.from_json(
|
|
|
|
json.dumps(
|
|
|
|
{
|
|
|
|
"text": "test text",
|
|
|
|
"array": [1, 2, 3],
|
|
|
|
"dataframe": '{"0":{"0":10,"1":20,"2":30}}',
|
|
|
|
"mime_type": "application/pdf",
|
|
|
|
"metadata": {"some object": "<the object>", "a path": str((tmp_path / "test.txt").absolute())},
|
|
|
|
"id_hash_keys": ["test"],
|
|
|
|
"score": 0.5,
|
|
|
|
"embedding": [1, 2, 3, 4],
|
|
|
|
}
|
|
|
|
)
|
2023-05-15 11:39:04 +02:00
|
|
|
)
|
2023-09-11 16:40:00 +01:00
|
|
|
assert doc == Document(
|
|
|
|
text="test text",
|
|
|
|
array=np.array([1, 2, 3]),
|
|
|
|
dataframe=pd.DataFrame([10, 20, 30]),
|
|
|
|
blob=None,
|
|
|
|
mime_type="application/pdf",
|
|
|
|
# Note the object serialization
|
|
|
|
metadata={"some object": "<the object>", "a path": str((tmp_path / "test.txt").absolute())},
|
|
|
|
id_hash_keys=["test"],
|
|
|
|
score=0.5,
|
|
|
|
embedding=np.array([1, 2, 3, 4]),
|
2023-05-15 11:39:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_to_json_custom_encoder(tmp_path):
|
|
|
|
class SerializableTestClass:
|
|
|
|
...
|
|
|
|
|
|
|
|
class TestEncoder(DocumentEncoder):
|
|
|
|
def default(self, obj):
|
|
|
|
if isinstance(obj, SerializableTestClass):
|
|
|
|
return "<<CUSTOM ENCODING>>"
|
|
|
|
return DocumentEncoder.default(self, obj)
|
|
|
|
|
2023-09-11 16:40:00 +01:00
|
|
|
doc = Document(text="test text", metadata={"some object": SerializableTestClass()})
|
2023-05-15 11:39:04 +02:00
|
|
|
doc_json = doc.to_json(indent=4, json_encoder=TestEncoder).strip()
|
|
|
|
|
2023-09-11 16:40:00 +01:00
|
|
|
assert doc_json == json.dumps(
|
|
|
|
{
|
|
|
|
"id": doc.id,
|
|
|
|
"text": "test text",
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"metadata": {"some object": "<<CUSTOM ENCODING>>"},
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
2023-05-15 11:39:04 +02:00
|
|
|
},
|
2023-09-11 16:40:00 +01:00
|
|
|
indent=4,
|
2023-05-15 11:39:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
|
|
|
def test_from_json_custom_decoder():
|
|
|
|
class TestClass:
|
|
|
|
def __eq__(self, other):
|
|
|
|
return type(self) == type(other)
|
|
|
|
|
|
|
|
class TestDecoder(DocumentDecoder):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(object_hook=self.object_hook)
|
|
|
|
|
|
|
|
def object_hook(self, dictionary):
|
|
|
|
if "metadata" in dictionary:
|
|
|
|
for key, value in dictionary["metadata"].items():
|
|
|
|
if value == "<<CUSTOM ENCODING>>":
|
|
|
|
dictionary["metadata"][key] = TestClass()
|
|
|
|
return dictionary
|
|
|
|
|
2023-09-11 16:40:00 +01:00
|
|
|
doc = Document(text="test text", metadata={"some object": TestClass()})
|
2023-05-15 11:39:04 +02:00
|
|
|
|
|
|
|
assert doc == Document.from_json(
|
2023-09-11 16:40:00 +01:00
|
|
|
json.dumps(
|
|
|
|
{
|
2023-09-22 11:09:59 +02:00
|
|
|
"id": doc.id,
|
2023-09-11 16:40:00 +01:00
|
|
|
"text": "test text",
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"metadata": {"some object": "<<CUSTOM ENCODING>>"},
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
|
|
|
}
|
|
|
|
),
|
2023-05-15 11:39:04 +02:00
|
|
|
json_decoder=TestDecoder,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_flatten_document_no_meta():
|
|
|
|
doc = Document(text="test text")
|
|
|
|
assert doc.flatten() == {
|
|
|
|
"id": doc.id,
|
|
|
|
"text": "test text",
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"blob": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
2023-05-10 16:33:47 +02:00
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-05-15 11:39:04 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_flatten_document_with_flat_meta():
|
|
|
|
doc = Document(text="test text", metadata={"some-key": "a value", "another-key": "another value!"})
|
|
|
|
assert doc.flatten() == {
|
|
|
|
"id": doc.id,
|
|
|
|
"text": "test text",
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"blob": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
2023-05-10 16:33:47 +02:00
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
2023-09-11 16:40:00 +01:00
|
|
|
"some-key": "a value",
|
|
|
|
"another-key": "another value!",
|
2023-05-10 16:33:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-05-15 11:39:04 +02:00
|
|
|
@pytest.mark.unit
|
2023-09-11 16:40:00 +01:00
|
|
|
def test_flatten_document_with_nested_meta():
|
|
|
|
doc = Document(text="test text", metadata={"some-key": "a value", "nested": {"key": 10, "key2": 50}})
|
|
|
|
assert doc.flatten() == {
|
|
|
|
"id": doc.id,
|
|
|
|
"text": "test text",
|
|
|
|
"array": None,
|
|
|
|
"dataframe": None,
|
|
|
|
"blob": None,
|
|
|
|
"mime_type": "text/plain",
|
|
|
|
"id_hash_keys": ["text", "array", "dataframe", "blob"],
|
2023-05-10 16:33:47 +02:00
|
|
|
"score": None,
|
|
|
|
"embedding": None,
|
2023-09-11 16:40:00 +01:00
|
|
|
"some-key": "a value",
|
|
|
|
"nested": {"key": 10, "key2": 50},
|
2023-05-10 16:33:47 +02:00
|
|
|
}
|