mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-22 00:11:14 +00:00
153 lines
4.3 KiB
Python
153 lines
4.3 KiB
Python
![]() |
from pathlib import Path
|
||
|
import hashlib
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
from haystack.preview import Document
|
||
|
from haystack.preview.dataclasses.document import _create_id
|
||
|
|
||
|
|
||
|
def test_default_text_document_to_dict():
|
||
|
assert Document(content="test content").to_dict() == {
|
||
|
"id": _create_id(classname=Document.__name__, content="test content"),
|
||
|
"content": "test content",
|
||
|
"content_type": "text",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
|
||
|
|
||
|
def test_default_text_document_from_dict():
|
||
|
assert Document.from_dict(
|
||
|
{
|
||
|
"id": _create_id(classname=Document.__name__, content="test content"),
|
||
|
"content": "test content",
|
||
|
"content_type": "text",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
) == Document(content="test content")
|
||
|
|
||
|
|
||
|
def test_default_table_document_to_dict():
|
||
|
df = pd.DataFrame([1, 2])
|
||
|
dictionary = Document(content=df, content_type="table").to_dict()
|
||
|
|
||
|
dataframe = dictionary.pop("content")
|
||
|
assert dataframe.equals(df)
|
||
|
|
||
|
assert dictionary == {
|
||
|
"id": _create_id(classname=Document.__name__, content=df),
|
||
|
"content_type": "table",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
|
||
|
|
||
|
def test_default_table_document_from_dict():
|
||
|
df = pd.DataFrame([1, 2])
|
||
|
assert Document.from_dict(
|
||
|
{
|
||
|
"id": _create_id(classname=Document.__name__, content=df),
|
||
|
"content": df,
|
||
|
"content_type": "table",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
) == Document(content=df, content_type="table")
|
||
|
|
||
|
|
||
|
def test_default_image_document_to_dict():
|
||
|
path = Path(__file__).parent / "test_files" / "apple.jpg"
|
||
|
assert Document(content=path, content_type="image").to_dict() == {
|
||
|
"id": _create_id(classname=Document.__name__, content=path),
|
||
|
"content": path,
|
||
|
"content_type": "image",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
|
||
|
|
||
|
def test_default_image_document_from_dict():
|
||
|
path = Path(__file__).parent / "test_files" / "apple.jpg"
|
||
|
assert Document.from_dict(
|
||
|
{
|
||
|
"id": _create_id(classname=Document.__name__, content=path),
|
||
|
"content": path,
|
||
|
"content_type": "image",
|
||
|
"metadata": {},
|
||
|
"id_hash_keys": [],
|
||
|
"score": None,
|
||
|
"embedding": None,
|
||
|
}
|
||
|
) == Document(content=path, content_type="image")
|
||
|
|
||
|
|
||
|
def test_document_with_most_attributes_to_dict():
|
||
|
"""
|
||
|
This tests also id_hash_keys
|
||
|
"""
|
||
|
doc = Document(
|
||
|
content="test content",
|
||
|
content_type="text",
|
||
|
metadata={"some": "values", "test": 10},
|
||
|
id_hash_keys=["test"],
|
||
|
score=0.99,
|
||
|
embedding=np.zeros([10, 10]),
|
||
|
)
|
||
|
dictionary = doc.to_dict()
|
||
|
|
||
|
embedding = dictionary.pop("embedding")
|
||
|
assert (embedding == np.zeros([10, 10])).all()
|
||
|
|
||
|
assert dictionary == {
|
||
|
"id": _create_id(
|
||
|
classname=Document.__name__,
|
||
|
content="test content",
|
||
|
id_hash_keys=["test"],
|
||
|
metadata={"some": "values", "test": 10},
|
||
|
),
|
||
|
"content": "test content",
|
||
|
"content_type": "text",
|
||
|
"metadata": {"some": "values", "test": 10},
|
||
|
"id_hash_keys": ["test"],
|
||
|
"score": 0.99,
|
||
|
}
|
||
|
|
||
|
|
||
|
def test_document_with_most_attributes_from_dict():
|
||
|
embedding = np.zeros([10, 10])
|
||
|
assert Document.from_dict(
|
||
|
{
|
||
|
"id": _create_id(
|
||
|
classname=Document.__name__,
|
||
|
content="test content",
|
||
|
id_hash_keys=["test"],
|
||
|
metadata={"some": "values", "test": 10},
|
||
|
),
|
||
|
"content": "test content",
|
||
|
"content_type": "text",
|
||
|
"metadata": {"some": "values", "test": 10},
|
||
|
"id_hash_keys": ["test"],
|
||
|
"score": 0.99,
|
||
|
"embedding": embedding,
|
||
|
}
|
||
|
) == Document(
|
||
|
content="test content",
|
||
|
content_type="text",
|
||
|
metadata={"some": "values", "test": 10},
|
||
|
id_hash_keys=["test"],
|
||
|
score=0.99,
|
||
|
embedding=embedding,
|
||
|
)
|