mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-28 00:08:41 +00:00
fix: fix un-flattening of metadata (#6318)
* fix un-flattening of metadata * test should pass * add relnote * change policy: raise an error if both meta and keys are passed * Update document.py * support python 3.8 * adjust wording in the error message
This commit is contained in:
parent
34ecff1d19
commit
ff3165b8b8
@ -137,15 +137,29 @@ class Document(metaclass=_BackwardCompatible):
|
|||||||
data["dataframe"] = pandas.read_json(io.StringIO(dataframe))
|
data["dataframe"] = pandas.read_json(io.StringIO(dataframe))
|
||||||
if blob := data.get("blob"):
|
if blob := data.get("blob"):
|
||||||
data["blob"] = ByteStream(data=bytes(blob["data"]), mime_type=blob["mime_type"])
|
data["blob"] = ByteStream(data=bytes(blob["data"]), mime_type=blob["mime_type"])
|
||||||
# Unflatten metadata if it was flattened
|
# Store metadata for a moment while we try un-flattening allegedly flatten metadata.
|
||||||
meta = {}
|
# We don't expect both a `meta=` keyword and flatten metadata keys so we'll raise a
|
||||||
|
# ValueError later if this is the case.
|
||||||
|
meta = data.pop("meta", {})
|
||||||
|
# Unflatten metadata if it was flattened. We assume any keyword argument that's not
|
||||||
|
# a document field is a metadata key. We treat legacy fields as document fields
|
||||||
|
# for backward compatibility.
|
||||||
|
flatten_meta = {}
|
||||||
legacy_fields = ["content_type", "id_hash_keys"]
|
legacy_fields = ["content_type", "id_hash_keys"]
|
||||||
field_names = legacy_fields + [f.name for f in fields(cls)]
|
document_fields = legacy_fields + [f.name for f in fields(cls)]
|
||||||
for key in list(data.keys()):
|
for key in list(data.keys()):
|
||||||
if key not in field_names:
|
if key not in document_fields:
|
||||||
meta[key] = data.pop(key)
|
flatten_meta[key] = data.pop(key)
|
||||||
|
|
||||||
return cls(**data, meta=meta)
|
# We don't support passing both flatten keys and the `meta` keyword parameter
|
||||||
|
if meta and flatten_meta:
|
||||||
|
raise ValueError(
|
||||||
|
"You can pass either the 'meta' parameter or flattened metadata keys as keyword arguments, "
|
||||||
|
"but currently you're passing both. Pass either the 'meta' parameter or flattened metadata keys."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Finally put back all the metadata
|
||||||
|
return cls(**data, meta={**meta, **flatten_meta})
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def content_type(self):
|
def content_type(self):
|
||||||
|
|||||||
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
|
||||||
|
preview:
|
||||||
|
- |
|
||||||
|
Fix a failure that occurred when creating a document passing the 'meta' keyword
|
||||||
|
to the constructor. Raise a specific ValueError if the 'meta' keyword is passed
|
||||||
|
along with metadata as keyword arguments, the two options are mutually exclusive
|
||||||
|
now.
|
||||||
@ -282,7 +282,7 @@ def test_from_dict_with_flat_meta():
|
|||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_from_dict_with_flat_and_non_flat_meta():
|
def test_from_dict_with_flat_and_non_flat_meta():
|
||||||
with pytest.raises(TypeError):
|
with pytest.raises(ValueError, match="Pass either the 'meta' parameter or flattened metadata keys"):
|
||||||
Document.from_dict(
|
Document.from_dict(
|
||||||
{
|
{
|
||||||
"content": "test text",
|
"content": "test text",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user