From ff3165b8b8bb0abaecdafe847d85648c6efbdaa8 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Thu, 16 Nov 2023 17:10:53 +0100 Subject: [PATCH] fix: fix un-flattening of metadata (#6318) * fix un-flattening of metadata * test should pass * add relnote * change policy: raise an error if both meta and keys are passed * Update document.py * support python 3.8 * adjust wording in the error message --- haystack/preview/dataclasses/document.py | 26 ++++++++++++++----- ...ent-flatten-metadata-ef61dbbae08b1db7.yaml | 8 ++++++ test/preview/dataclasses/test_document.py | 2 +- 3 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/fix-document-flatten-metadata-ef61dbbae08b1db7.yaml diff --git a/haystack/preview/dataclasses/document.py b/haystack/preview/dataclasses/document.py index 98db4d944..05e79b6d2 100644 --- a/haystack/preview/dataclasses/document.py +++ b/haystack/preview/dataclasses/document.py @@ -137,15 +137,29 @@ class Document(metaclass=_BackwardCompatible): data["dataframe"] = pandas.read_json(io.StringIO(dataframe)) if blob := data.get("blob"): data["blob"] = ByteStream(data=bytes(blob["data"]), mime_type=blob["mime_type"]) - # Unflatten metadata if it was flattened - meta = {} + # Store metadata for a moment while we try un-flattening allegedly flatten metadata. + # We don't expect both a `meta=` keyword and flatten metadata keys so we'll raise a + # ValueError later if this is the case. + meta = data.pop("meta", {}) + # Unflatten metadata if it was flattened. We assume any keyword argument that's not + # a document field is a metadata key. We treat legacy fields as document fields + # for backward compatibility. + flatten_meta = {} legacy_fields = ["content_type", "id_hash_keys"] - field_names = legacy_fields + [f.name for f in fields(cls)] + document_fields = legacy_fields + [f.name for f in fields(cls)] for key in list(data.keys()): - if key not in field_names: - meta[key] = data.pop(key) + if key not in document_fields: + flatten_meta[key] = data.pop(key) - return cls(**data, meta=meta) + # We don't support passing both flatten keys and the `meta` keyword parameter + if meta and flatten_meta: + raise ValueError( + "You can pass either the 'meta' parameter or flattened metadata keys as keyword arguments, " + "but currently you're passing both. Pass either the 'meta' parameter or flattened metadata keys." + ) + + # Finally put back all the metadata + return cls(**data, meta={**meta, **flatten_meta}) @property def content_type(self): diff --git a/releasenotes/notes/fix-document-flatten-metadata-ef61dbbae08b1db7.yaml b/releasenotes/notes/fix-document-flatten-metadata-ef61dbbae08b1db7.yaml new file mode 100644 index 000000000..ad516ff40 --- /dev/null +++ b/releasenotes/notes/fix-document-flatten-metadata-ef61dbbae08b1db7.yaml @@ -0,0 +1,8 @@ +--- + +preview: + - | + Fix a failure that occurred when creating a document passing the 'meta' keyword + to the constructor. Raise a specific ValueError if the 'meta' keyword is passed + along with metadata as keyword arguments, the two options are mutually exclusive + now. diff --git a/test/preview/dataclasses/test_document.py b/test/preview/dataclasses/test_document.py index 49c61541b..593a3f449 100644 --- a/test/preview/dataclasses/test_document.py +++ b/test/preview/dataclasses/test_document.py @@ -282,7 +282,7 @@ def test_from_dict_with_flat_meta(): @pytest.mark.unit def test_from_dict_with_flat_and_non_flat_meta(): - with pytest.raises(TypeError): + with pytest.raises(ValueError, match="Pass either the 'meta' parameter or flattened metadata keys"): Document.from_dict( { "content": "test text",