changed metadata to meta (#6605)

2025-12-18 10:38:36 +00:00 · 2023-12-21 17:09:58 +05:30 · 2023-12-21 17:09:58 +05:30 · 3d17e6ff76
commit 3d17e6ff76
parent fc88ef7076
37 changed files with 138 additions and 140 deletions
--- a/haystack/components/audio/whisper_local.py
+++ b/haystack/components/audio/whisper_local.py
@ -134,16 +134,16 @@ class LocalWhisperTranscriber:
            if not isinstance(source, ByteStream):
                path = Path(source)
                source = ByteStream.from_file_path(path)
-                source.metadata["file_path"] = path
+                source.meta["file_path"] = path
            else:
                # If we received a ByteStream instance that doesn't have the "file_path" metadata set,
                # we dump the bytes into a temporary file.
-                path = source.metadata.get("file_path")
+                path = source.meta.get("file_path")
                if path is None:
                    fp = tempfile.NamedTemporaryFile(delete=False)
                    path = Path(fp.name)
                    source.to_file(path)
-                    source.metadata["file_path"] = path
+                    source.meta["file_path"] = path
            transcription = self._model.transcribe(str(path), **kwargs)
            if not return_segments:
--- a/haystack/components/audio/whisper_remote.py
+++ b/haystack/components/audio/whisper_remote.py
@ -129,13 +129,13 @@ class RemoteWhisperTranscriber:
            if not isinstance(source, ByteStream):
                path = source
                source = ByteStream.from_file_path(Path(source))
-                source.metadata["file_path"] = path
+                source.meta["file_path"] = path
            file = io.BytesIO(source.data)
-            file.name = str(source.metadata["file_path"]) if "file_path" in source.metadata else "__fallback__.wav"
+            file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"
            content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
-            doc = Document(content=content["text"], meta=source.metadata)
+            doc = Document(content=content["text"], meta=source.meta)
            documents.append(doc)
        return {"documents": documents}
--- a/haystack/components/builders/answer_builder.py
+++ b/haystack/components/builders/answer_builder.py
@ -42,7 +42,7 @@ class AnswerBuilder:
        self,
        query: str,
        replies: List[str],
-        metadata: Optional[List[Dict[str, Any]]] = None,
+        meta: Optional[List[Dict[str, Any]]] = None,
        documents: Optional[List[Document]] = None,
        pattern: Optional[str] = None,
        reference_pattern: Optional[str] = None,
@ -52,7 +52,7 @@ class AnswerBuilder:
        :param query: The query used in the prompts for the Generator as a string.
        :param replies: The output of the Generator. A list of strings.
-        :param metadata: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
+        :param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
                            the generated answer will contain no metadata.
        :param documents: The documents used as input to the Generator. A list of `Document` objects. If
                          `documents` are specified, they are added to the `Answer` objects.
@ -74,10 +74,10 @@ class AnswerBuilder:
                                  If not specified, no parsing is done, and all documents are referenced.
                                  Default: `None`.
        """
-        if not metadata:
+        if not meta:
-            metadata = [{}] * len(replies)
+            meta = [{}] * len(replies)
-        elif len(replies) != len(metadata):
+        elif len(replies) != len(meta):
-            raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(metadata)}) must match.")
+            raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
        if pattern:
            AnswerBuilder._check_num_groups_in_regex(pattern)
@ -86,7 +86,7 @@ class AnswerBuilder:
        reference_pattern = reference_pattern or self.reference_pattern
        all_answers = []
-        for reply, meta in zip(replies, metadata):
+        for reply, metadata in zip(replies, meta):
            referenced_docs = []
            if documents:
                reference_idxs = []
@ -102,7 +102,7 @@ class AnswerBuilder:
                        logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
            answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
-            answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=meta)
+            answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata)
            all_answers.append(answer)
        return {"answers": all_answers}
--- a/haystack/components/builders/dynamic_prompt_builder.py
+++ b/haystack/components/builders/dynamic_prompt_builder.py
@ -53,7 +53,7 @@ class DynamicPromptBuilder:
    >> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
    and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic
-    capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
+    capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
    'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}}
@ -65,7 +65,7 @@ class DynamicPromptBuilder:
    print(res)
    >> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5
    days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates
-    closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
+    closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
    'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}}
    ```
@ -126,7 +126,7 @@ class DynamicPromptBuilder:
                                      "template_variables":{"query": "who's making a greeting?"}}})
    >> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
-    >> metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
+    >> meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
    >> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}}
    ```
@ -159,7 +159,7 @@ class DynamicPromptBuilder:
                                  "template_variables":{"query": "Where does the speaker live?"}}})
    >> {'llm': {'replies': ['The speaker lives in Berlin.'],
-    >> 'metadata': [{'model': 'gpt-3.5-turbo-0613',
+    >> 'meta': [{'model': 'gpt-3.5-turbo-0613',
    >> 'index': 0,
    >> 'finish_reason': 'stop',
    >> 'usage': {'prompt_tokens': 28,
--- a/haystack/components/converters/azure.py
+++ b/haystack/components/converters/azure.py
@ -104,11 +104,11 @@ class AzureOCRDocumentConverter:
            azure_output.append(result.to_dict())
            file_suffix = None
-            if "file_path" in bytestream.metadata:
+            if "file_path" in bytestream.meta:
-                file_suffix = Path(bytestream.metadata["file_path"]).suffix
+                file_suffix = Path(bytestream.meta["file_path"]).suffix
            document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix)
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document.meta = merged_metadata
            documents.append(document)
--- a/haystack/components/converters/html.py
+++ b/haystack/components/converters/html.py
@ -83,7 +83,7 @@ class HTMLToDocument:
                logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
                continue
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document = Document(content=text, meta=merged_metadata)
            documents.append(document)
--- a/haystack/components/converters/markdown.py
+++ b/haystack/components/converters/markdown.py
@ -83,7 +83,7 @@ class MarkdownToDocument:
                logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
                continue
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document = Document(content=text, meta=merged_metadata)
            documents.append(document)
--- a/haystack/components/converters/pypdf.py
+++ b/haystack/components/converters/pypdf.py
@ -111,7 +111,7 @@ class PyPDFToDocument:
                logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
                continue
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document.meta = merged_metadata
            documents.append(document)
--- a/haystack/components/converters/tika.py
+++ b/haystack/components/converters/tika.py
@ -77,7 +77,7 @@ class TikaDocumentConverter:
                logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
                continue
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document = Document(content=text, meta=merged_metadata)
            documents.append(document)
        return {"documents": documents}
--- a/haystack/components/converters/txt.py
+++ b/haystack/components/converters/txt.py
@ -63,13 +63,13 @@ class TextFileToDocument:
                logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
                continue
            try:
-                encoding = bytestream.metadata.get("encoding", self.encoding)
+                encoding = bytestream.meta.get("encoding", self.encoding)
                text = bytestream.data.decode(encoding)
            except Exception as e:
                logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e)
                continue
-            merged_metadata = {**bytestream.metadata, **metadata}
+            merged_metadata = {**bytestream.meta, **metadata}
            document = Document(content=text, meta=merged_metadata)
            documents.append(document)
--- a/haystack/components/converters/utils.py
+++ b/haystack/components/converters/utils.py
@ -15,6 +15,6 @@ def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStre
        return source
    if isinstance(source, (str, Path)):
        bs = ByteStream.from_file_path(Path(source))
-        bs.metadata["file_path"] = str(source)
+        bs.meta["file_path"] = str(source)
        return bs
    raise ValueError(f"Unsupported source type {type(source)}")
--- a/haystack/components/fetchers/link_content.py
+++ b/haystack/components/fetchers/link_content.py
@ -118,7 +118,7 @@ class LinkContentFetcher:
        # don't use multithreading if there's only one URL
        if len(urls) == 1:
            stream_metadata, stream = self.fetch(urls[0])
-            stream.metadata.update(stream_metadata)
+            stream.meta.update(stream_metadata)
            streams.append(stream)
        else:
            with ThreadPoolExecutor() as executor:
@ -126,7 +126,7 @@ class LinkContentFetcher:
            for stream_metadata, stream in results:  # type: ignore
                if stream_metadata is not None and stream is not None:
-                    stream.metadata.update(stream_metadata)
+                    stream.meta.update(stream_metadata)
                    streams.append(stream)
        return {"streams": streams}
--- a/haystack/components/generators/chat/hugging_face_tgi.py
+++ b/haystack/components/generators/chat/hugging_face_tgi.py
@ -241,7 +241,7 @@ class HuggingFaceTGIChatGenerator:
            self.streaming_callback(stream_chunk)  # type: ignore # streaming_callback is not None (verified in the run method)
        message = ChatMessage.from_assistant(chunk.generated_text)
-        message.metadata.update(
+        message.meta.update(
            {
                "finish_reason": chunk.details.finish_reason.value,
                "index": 0,
@ -264,7 +264,7 @@ class HuggingFaceTGIChatGenerator:
                prepared_prompt, details=True, **generation_kwargs
            )
            message = ChatMessage.from_assistant(tgr.generated_text)
-            message.metadata.update(
+            message.meta.update(
                {
                    "finish_reason": tgr.details.finish_reason.value,
                    "index": _i,
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@ -42,7 +42,7 @@ class GPTChatGenerator:
    >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
    >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
    >>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
-    >>metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
+    >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
    >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
    ```
@ -218,7 +218,7 @@ class GPTChatGenerator:
        :param chunks: The list of all chunks returned by the OpenAI API.
        """
        complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
-        complete_response.metadata.update(
+        complete_response.meta.update(
            {
                "model": chunk.model,
                "index": 0,
@ -239,7 +239,7 @@ class GPTChatGenerator:
        # message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str
        content = str(message.function_call) if choice.finish_reason == "function_call" else message.content
        chat_message = ChatMessage.from_assistant(content)
-        chat_message.metadata.update(
+        chat_message.meta.update(
            {
                "model": completion.model,
                "index": choice.index,
@ -264,9 +264,7 @@ class GPTChatGenerator:
        else:
            content = ""
        chunk_message = StreamingChunk(content)
-        chunk_message.metadata.update(
+        chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
            {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
        )
        return chunk_message
    def _check_finish_reason(self, message: ChatMessage) -> None:
@ -275,13 +273,13 @@ class GPTChatGenerator:
        If the `finish_reason` is `length` or `content_filter`, log a warning.
        :param message: The message returned by the LLM.
        """
-        if message.metadata["finish_reason"] == "length":
+        if message.meta["finish_reason"] == "length":
            logger.warning(
                "The completion for index %s has been truncated before reaching a natural stopping point. "
                "Increase the max_tokens parameter to allow for longer completions.",
-                message.metadata["index"],
+                message.meta["index"],
            )
-        if message.metadata["finish_reason"] == "content_filter":
+        if message.meta["finish_reason"] == "content_filter":
            logger.warning(
-                "The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
+                "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
            )
--- a/haystack/components/generators/hugging_face_tgi.py
+++ b/haystack/components/generators/hugging_face_tgi.py
@ -157,7 +157,7 @@ class HuggingFaceTGIGenerator:
        # Don't send URL as it is sensitive information
        return {"model": self.model}
-    @component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
+    @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
    def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
        """
        Invoke the text generation inference for the given prompt and generation parameters.
@ -204,15 +204,15 @@ class HuggingFaceTGIGenerator:
            chunks.append(stream_chunk)
            self.streaming_callback(stream_chunk)  # type: ignore # streaming_callback is not None (verified in the run method)
        metadata = {
-            "finish_reason": chunks[-1].metadata.get("finish_reason", None),
+            "finish_reason": chunks[-1].meta.get("finish_reason", None),
            "model": self.client.model,
            "usage": {
-                "completion_tokens": chunks[-1].metadata.get("generated_tokens", 0),
+                "completion_tokens": chunks[-1].meta.get("generated_tokens", 0),
                "prompt_tokens": prompt_token_count,
-                "total_tokens": prompt_token_count + chunks[-1].metadata.get("generated_tokens", 0),
+                "total_tokens": prompt_token_count + chunks[-1].meta.get("generated_tokens", 0),
            },
        }
-        return {"replies": ["".join([chunk.content for chunk in chunks])], "metadata": [metadata]}
+        return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}
    def _run_non_streaming(
        self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any]
@ -234,4 +234,4 @@ class HuggingFaceTGIGenerator:
                }
            )
            responses.append(tgr.generated_text)
-        return {"replies": responses, "metadata": all_metadata}
+        return {"replies": responses, "meta": all_metadata}
--- a/haystack/components/generators/openai.py
+++ b/haystack/components/generators/openai.py
@ -37,7 +37,7 @@ class GPTGenerator:
    >> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
    >> the interaction between computers and human language. It involves enabling computers to understand, interpret,
-    >> and respond to natural human language in a way that is both meaningful and useful.'], 'metadata': [{'model':
+    >> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model':
    >> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
    >> 'completion_tokens': 49, 'total_tokens': 65}}]}
    ```
@ -146,7 +146,7 @@ class GPTGenerator:
            data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
        return default_from_dict(cls, data)
-    @component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
+    @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
    def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
        """
        Invoke the text generation inference based on the provided messages and generation parameters.
@ -200,7 +200,7 @@ class GPTGenerator:
        return {
            "replies": [message.content for message in completions],
-            "metadata": [message.metadata for message in completions],
+            "meta": [message.meta for message in completions],
        }
    def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
@ -222,7 +222,7 @@ class GPTGenerator:
        Connects the streaming chunks into a single ChatMessage.
        """
        complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
-        complete_response.metadata.update(
+        complete_response.meta.update(
            {
                "model": chunk.model,
                "index": 0,
@ -242,7 +242,7 @@ class GPTGenerator:
        message: OpenAIObject = choice.message
        content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content
        chat_message = ChatMessage.from_assistant(content)
-        chat_message.metadata.update(
+        chat_message.meta.update(
            {
                "model": completion.model,
                "index": choice.index,
@ -267,9 +267,7 @@ class GPTGenerator:
        else:
            content = ""
        chunk_message = StreamingChunk(content)
-        chunk_message.metadata.update(
+        chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
            {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
        )
        return chunk_message
    def _check_finish_reason(self, message: ChatMessage) -> None:
@ -278,13 +276,13 @@ class GPTGenerator:
        If the `finish_reason` is `length`, log a warning to the user.
        :param message: The message returned by the LLM.
        """
-        if message.metadata["finish_reason"] == "length":
+        if message.meta["finish_reason"] == "length":
            logger.warning(
                "The completion for index %s has been truncated before reaching a natural stopping point. "
                "Increase the max_tokens parameter to allow for longer completions.",
-                message.metadata["index"],
+                message.meta["index"],
            )
-        if message.metadata["finish_reason"] == "content_filter":
+        if message.meta["finish_reason"] == "content_filter":
            logger.warning(
-                "The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
+                "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
            )
--- a/haystack/components/routers/file_type_router.py
+++ b/haystack/components/routers/file_type_router.py
@ -58,7 +58,7 @@ class FileTypeRouter:
            if isinstance(source, Path):
                mime_type = self.get_mime_type(source)
            elif isinstance(source, ByteStream):
-                mime_type = source.metadata.get("content_type")
+                mime_type = source.meta.get("content_type")
            else:
                raise ValueError(f"Unsupported data source type: {type(source)}")
--- a/haystack/dataclasses/byte_stream.py
+++ b/haystack/dataclasses/byte_stream.py
@ -10,7 +10,7 @@ class ByteStream:
    """
    data: bytes
-    metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
+    meta: Dict[str, Any] = field(default_factory=dict, hash=False)
    mime_type: Optional[str] = field(default=None)
    def to_file(self, destination_path: Path):
--- a/haystack/dataclasses/chat_message.py
+++ b/haystack/dataclasses/chat_message.py
@ -20,13 +20,13 @@ class ChatMessage:
    :param content: The text content of the message.
    :param role: The role of the entity sending the message.
    :param name: The name of the function being called (only applicable for role FUNCTION).
-    :param metadata: Additional metadata associated with the message.
+    :param meta: Additional metadata associated with the message.
    """
    content: str
    role: ChatRole
    name: Optional[str]
-    metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
+    meta: Dict[str, Any] = field(default_factory=dict, hash=False)
    def is_from(self, role: ChatRole) -> bool:
        """
@ -38,15 +38,15 @@ class ChatMessage:
        return self.role == role
    @classmethod
-    def from_assistant(cls, content: str, metadata: Optional[Dict[str, Any]] = None) -> "ChatMessage":
+    def from_assistant(cls, content: str, meta: Optional[Dict[str, Any]] = None) -> "ChatMessage":
        """
        Create a message from the assistant.
        :param content: The text content of the message.
-        :param metadata: Additional metadata associated with the message.
+        :param meta: Additional metadata associated with the message.
        :return: A new ChatMessage instance.
        """
-        return cls(content, ChatRole.ASSISTANT, None, metadata or {})
+        return cls(content, ChatRole.ASSISTANT, None, meta or {})
    @classmethod
    def from_user(cls, content: str) -> "ChatMessage":
--- a/haystack/dataclasses/streaming_chunk.py
+++ b/haystack/dataclasses/streaming_chunk.py
@ -10,8 +10,8 @@ class StreamingChunk:
    streamed data in a systematic manner.
    :param content: The content of the message chunk as a string.
-    :param metadata: A dictionary containing metadata related to the message chunk.
+    :param meta: A dictionary containing metadata related to the message chunk.
    """
    content: str
-    metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
+    meta: Dict[str, Any] = field(default_factory=dict, hash=False)
--- a/haystack/pipeline_utils/rag.py
+++ b/haystack/pipeline_utils/rag.py
@ -67,7 +67,7 @@ class _RAGPipeline:
        self.pipeline.connect("retriever", "prompt_builder.documents")
        self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
        self.pipeline.connect("llm.replies", "answer_builder.replies")
-        self.pipeline.connect("llm.metadata", "answer_builder.metadata")
+        self.pipeline.connect("llm.meta", "answer_builder.meta")
        self.pipeline.connect("retriever", "answer_builder.documents")
    def run(self, query: str) -> Answer:
--- a/releasenotes/notes/changed-metadata-to-meta-64cceb9ed19722fe.yaml
+++ b/releasenotes/notes/changed-metadata-to-meta-64cceb9ed19722fe.yaml
@ -0,0 +1,4 @@
 ---
 enhancements:
  - |
     Rename all metadata references to meta.
--- a/test/components/audio/test_whisper_local.py
+++ b/test/components/audio/test_whisper_local.py
@ -125,7 +125,7 @@ class TestLocalWhisperTranscriber:
        }
        path = SAMPLES_PATH / "audio" / "this is the content of the document.wav"
        bs = ByteStream.from_file_path(path)
-        bs.metadata["file_path"] = path
+        bs.meta["file_path"] = path
        results = comp.transcribe(sources=[bs])
        expected = Document(
            content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]}
--- a/test/components/audio/test_whisper_remote.py
+++ b/test/components/audio/test_whisper_remote.py
@ -210,7 +210,7 @@ class TestRemoteWhisperTranscriber:
            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
            with open(file_path, "rb") as audio_stream:
                byte_stream = audio_stream.read()
-                audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
+                audio_file = ByteStream(byte_stream, meta={"file_path": str(file_path.absolute())})
                result = transcriber.run(sources=[audio_file])
--- a/test/components/builders/test_answer_builder.py
+++ b/test/components/builders/test_answer_builder.py
@ -10,7 +10,7 @@ class TestAnswerBuilder:
    def test_run_unmatching_input_len(self):
        component = AnswerBuilder()
        with pytest.raises(ValueError):
-            component.run(query="query", replies=["reply1"], metadata=[{"test": "meta"}, {"test": "meta2"}])
+            component.run(query="query", replies=["reply1"], meta=[{"test": "meta"}, {"test": "meta2"}])
    def test_run_without_meta(self):
        component = AnswerBuilder()
@ -24,7 +24,7 @@ class TestAnswerBuilder:
    def test_run_meta_is_an_empty_list(self):
        component = AnswerBuilder()
-        output = component.run(query="query", replies=["reply1"], metadata=[])
+        output = component.run(query="query", replies=["reply1"], meta=[])
        answers = output["answers"]
        assert answers[0].data == "reply1"
        assert answers[0].meta == {}
@ -34,7 +34,7 @@ class TestAnswerBuilder:
    def test_run_without_pattern(self):
        component = AnswerBuilder()
-        output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
+        output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
        answers = output["answers"]
        assert len(answers) == 1
        assert answers[0].data == "Answer: AnswerString"
@ -45,7 +45,7 @@ class TestAnswerBuilder:
    def test_run_with_pattern_with_capturing_group(self):
        component = AnswerBuilder(pattern=r"Answer: (.*)")
-        output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
+        output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
        answers = output["answers"]
        assert len(answers) == 1
        assert answers[0].data == "AnswerString"
@ -56,7 +56,7 @@ class TestAnswerBuilder:
    def test_run_with_pattern_without_capturing_group(self):
        component = AnswerBuilder(pattern=r"'.*'")
-        output = component.run(query="test query", replies=["Answer: 'AnswerString'"], metadata=[{}])
+        output = component.run(query="test query", replies=["Answer: 'AnswerString'"], meta=[{}])
        answers = output["answers"]
        assert len(answers) == 1
        assert answers[0].data == "'AnswerString'"
@ -71,9 +71,7 @@ class TestAnswerBuilder:
    def test_run_with_pattern_set_at_runtime(self):
        component = AnswerBuilder(pattern="unused pattern")
-        output = component.run(
+        output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}], pattern=r"Answer: (.*)")
            query="test query", replies=["Answer: AnswerString"], metadata=[{}], pattern=r"Answer: (.*)"
        )
        answers = output["answers"]
        assert len(answers) == 1
        assert answers[0].data == "AnswerString"
@ -87,7 +85,7 @@ class TestAnswerBuilder:
        output = component.run(
            query="test query",
            replies=["Answer: AnswerString"],
-            metadata=[{}],
+            meta=[{}],
            documents=[Document(content="test doc 1"), Document(content="test doc 2")],
        )
        answers = output["answers"]
@ -104,7 +102,7 @@ class TestAnswerBuilder:
        output = component.run(
            query="test query",
            replies=["Answer: AnswerString[2]"],
-            metadata=[{}],
+            meta=[{}],
            documents=[Document(content="test doc 1"), Document(content="test doc 2")],
        )
        answers = output["answers"]
@ -121,7 +119,7 @@ class TestAnswerBuilder:
            output = component.run(
                query="test query",
                replies=["Answer: AnswerString[3]"],
-                metadata=[{}],
+                meta=[{}],
                documents=[Document(content="test doc 1"), Document(content="test doc 2")],
            )
        answers = output["answers"]
@ -137,7 +135,7 @@ class TestAnswerBuilder:
        output = component.run(
            query="test query",
            replies=["Answer: AnswerString[2][3]"],
-            metadata=[{}],
+            meta=[{}],
            documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")],
            reference_pattern="\\[(\\d+)\\]",
        )
--- a/test/components/converters/test_azure_ocr_doc_converter.py
+++ b/test/components/converters/test_azure_ocr_doc_converter.py
@ -45,7 +45,7 @@ class TestAzureOCRDocumentConverter:
            }
    def test_run_with_meta(self):
-        bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
+        bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
        with patch("haystack.components.converters.azure.DocumentAnalysisClient"):
            component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")
--- a/test/components/converters/test_html_to_document.py
+++ b/test/components/converters/test_html_to_document.py
@ -63,7 +63,7 @@ class TestHTMLToDocument:
        converter = HTMLToDocument()
        with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
            byte_stream = file.read()
-            stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
+            stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
        results = converter.run(sources=[stream])
        docs = results["documents"]
@ -81,7 +81,7 @@ class TestHTMLToDocument:
        converter = HTMLToDocument()
        with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
            byte_stream = file.read()
-            stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
+            stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
        metadata = [{"file_name": "what_is_haystack.html"}]
        results = converter.run(sources=[stream], meta=metadata)
@ -103,7 +103,7 @@ class TestHTMLToDocument:
        with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
            byte_stream = file.read()
            # ByteStream has "url" present in metadata
-            stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url_correct"})
+            stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url_correct"})
        # "url" supplied by the user overwrites value present in metadata
        metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}]
--- a/test/components/converters/test_markdown_to_document.py
+++ b/test/components/converters/test_markdown_to_document.py
@ -32,7 +32,7 @@ class TestMarkdownToDocument:
            assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content
    def test_run_with_meta(self):
-        bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
+        bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
        converter = MarkdownToDocument()
--- a/test/components/converters/test_pypdf_to_document.py
+++ b/test/components/converters/test_pypdf_to_document.py
@ -30,7 +30,7 @@ class TestPyPDFToDocument:
        assert "ReAct" in docs[0].content
    def test_run_with_meta(self):
-        bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
+        bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
        converter = PyPDFToDocument()
        with patch("haystack.components.converters.pypdf.PdfReader"):
--- a/test/components/converters/test_textfile_to_document.py
+++ b/test/components/converters/test_textfile_to_document.py
@ -14,8 +14,8 @@ class TestTextfileToDocument:
        Test if the component runs correctly.
        """
        bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt")
-        bytestream.metadata["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
+        bytestream.meta["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
-        bytestream.metadata["key"] = "value"
+        bytestream.meta["key"] = "value"
        files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream]
        converter = TextFileToDocument()
        output = converter.run(sources=files)
@ -26,7 +26,7 @@ class TestTextfileToDocument:
        assert "That's yet another file!" in docs[2].content
        assert docs[0].meta["file_path"] == str(files[0])
        assert docs[1].meta["file_path"] == str(files[1])
-        assert docs[2].meta == bytestream.metadata
+        assert docs[2].meta == bytestream.meta
    def test_run_error_handling(self, test_files_path, caplog):
        """
@ -47,18 +47,18 @@ class TestTextfileToDocument:
        Test if the encoding metadata field is used properly
        """
        bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt")
-        bytestream.metadata["key"] = "value"
+        bytestream.meta["key"] = "value"
        converter = TextFileToDocument(encoding="utf-16")
        output = converter.run(sources=[bytestream])
        assert "Some text for testing." not in output["documents"][0].content
-        bytestream.metadata["encoding"] = "utf-8"
+        bytestream.meta["encoding"] = "utf-8"
        output = converter.run(sources=[bytestream])
        assert "Some text for testing." in output["documents"][0].content
    def test_run_with_meta(self):
-        bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
+        bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
        converter = TextFileToDocument()
--- a/test/components/converters/test_tika_doc_converter.py
+++ b/test/components/converters/test_tika_doc_converter.py
@ -19,7 +19,7 @@ class TestTikaDocumentConverter:
        assert documents[0].content == "Content of mock source"
    def test_run_with_meta(self):
-        bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
+        bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
        converter = TikaDocumentConverter()
        with patch("haystack.components.converters.tika.tika_parser.from_buffer"):
--- a/test/components/fetchers/test_link_content_fetcher.py
+++ b/test/components/fetchers/test_link_content_fetcher.py
@ -67,7 +67,7 @@ class TestLinkContentFetcher:
            streams = fetcher.run(urls=["https://www.example.com"])["streams"]
            first_stream = streams[0]
            assert first_stream.data == correct_response
-            assert first_stream.metadata["content_type"] == "text/plain"
+            assert first_stream.meta["content_type"] == "text/plain"
    def test_run_html(self):
        correct_response = b"<h1>Example test response</h1>"
@ -79,7 +79,7 @@ class TestLinkContentFetcher:
            streams = fetcher.run(urls=["https://www.example.com"])["streams"]
            first_stream = streams[0]
            assert first_stream.data == correct_response
-            assert first_stream.metadata["content_type"] == "text/html"
+            assert first_stream.meta["content_type"] == "text/html"
    def test_run_binary(self, test_files_path):
        file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read()
@ -91,7 +91,7 @@ class TestLinkContentFetcher:
            streams = fetcher.run(urls=["https://www.example.com"])["streams"]
            first_stream = streams[0]
            assert first_stream.data == file_bytes
-            assert first_stream.metadata["content_type"] == "application/pdf"
+            assert first_stream.meta["content_type"] == "application/pdf"
    def test_run_bad_status_code(self):
        empty_byte_stream = b""
@ -105,7 +105,7 @@ class TestLinkContentFetcher:
        assert len(streams) == 1
        first_stream = streams[0]
        assert first_stream.data == empty_byte_stream
-        assert first_stream.metadata["content_type"] == "text/html"
+        assert first_stream.meta["content_type"] == "text/html"
    @pytest.mark.integration
    def test_link_content_fetcher_html(self):
@ -113,8 +113,8 @@ class TestLinkContentFetcher:
        streams = fetcher.run([HTML_URL])["streams"]
        first_stream = streams[0]
        assert "Haystack" in first_stream.data.decode("utf-8")
-        assert first_stream.metadata["content_type"] == "text/html"
+        assert first_stream.meta["content_type"] == "text/html"
-        assert "url" in first_stream.metadata and first_stream.metadata["url"] == HTML_URL
+        assert "url" in first_stream.meta and first_stream.meta["url"] == HTML_URL
    @pytest.mark.integration
    def test_link_content_fetcher_text(self):
@ -122,8 +122,8 @@ class TestLinkContentFetcher:
        streams = fetcher.run([TEXT_URL])["streams"]
        first_stream = streams[0]
        assert "Haystack" in first_stream.data.decode("utf-8")
-        assert first_stream.metadata["content_type"] == "text/plain"
+        assert first_stream.meta["content_type"] == "text/plain"
-        assert "url" in first_stream.metadata and first_stream.metadata["url"] == TEXT_URL
+        assert "url" in first_stream.meta and first_stream.meta["url"] == TEXT_URL
    @pytest.mark.integration
    def test_link_content_fetcher_pdf(self):
@ -131,8 +131,8 @@ class TestLinkContentFetcher:
        streams = fetcher.run([PDF_URL])["streams"]
        assert len(streams) == 1
        first_stream = streams[0]
-        assert first_stream.metadata["content_type"] in ("application/octet-stream", "application/pdf")
+        assert first_stream.meta["content_type"] in ("application/octet-stream", "application/pdf")
-        assert "url" in first_stream.metadata and first_stream.metadata["url"] == PDF_URL
+        assert "url" in first_stream.meta and first_stream.meta["url"] == PDF_URL
    @pytest.mark.integration
    def test_link_content_fetcher_multiple_different_content_types(self):
@ -143,10 +143,10 @@ class TestLinkContentFetcher:
        streams = fetcher.run([PDF_URL, HTML_URL])["streams"]
        assert len(streams) == 2
        for stream in streams:
-            assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
+            assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
-            if stream.metadata["content_type"] == "text/html":
+            if stream.meta["content_type"] == "text/html":
                assert "Haystack" in stream.data.decode("utf-8")
-            elif stream.metadata["content_type"] == "application/pdf":
+            elif stream.meta["content_type"] == "application/pdf":
                assert len(stream.data) > 0
    @pytest.mark.integration
@ -160,10 +160,10 @@ class TestLinkContentFetcher:
        streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"]
        assert len(streams) == 3
        for stream in streams:
-            assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
+            assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
-            if stream.metadata["content_type"] == "text/html":
+            if stream.meta["content_type"] == "text/html":
                assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8")
-            elif stream.metadata["content_type"] == "application/pdf":
+            elif stream.meta["content_type"] == "application/pdf":
                assert len(stream.data) > 0
    @pytest.mark.integration
@ -177,7 +177,7 @@ class TestLinkContentFetcher:
        result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"])
        assert len(result["streams"]) == 1
        first_stream = result["streams"][0]
-        assert first_stream.metadata["content_type"] == "text/html"
+        assert first_stream.meta["content_type"] == "text/html"
    @pytest.mark.integration
    def test_bad_request_exception_raised(self):
--- a/test/components/generators/chat/test_openai.py
+++ b/test/components/generators/chat/test_openai.py
@ -241,7 +241,7 @@ class TestGPTChatGenerator:
        component = GPTChatGenerator(api_key="test-api-key")
        messages = [
            ChatMessage.from_assistant(
-                "", metadata={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
+                "", meta={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
            )
            for i, _ in enumerate(range(4))
        ]
--- a/test/components/generators/test_hugging_face_tgi.py
+++ b/test/components/generators/test_hugging_face_tgi.py
@ -124,11 +124,11 @@ class TestHuggingFaceTGIGenerator:
        assert isinstance(response, dict)
        assert "replies" in response
-        assert "metadata" in response
+        assert "meta" in response
        assert isinstance(response["replies"], list)
-        assert isinstance(response["metadata"], list)
+        assert isinstance(response["meta"], list)
        assert len(response["replies"]) == 1
-        assert len(response["metadata"]) == 1
+        assert len(response["meta"]) == 1
        assert [isinstance(reply, str) for reply in response["replies"]]
    def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters(
@ -157,14 +157,14 @@ class TestHuggingFaceTGIGenerator:
        assert isinstance(response, dict)
        assert "replies" in response
-        assert "metadata" in response
+        assert "meta" in response
        assert isinstance(response["replies"], list)
        assert [isinstance(reply, str) for reply in response["replies"]]
-        assert isinstance(response["metadata"], list)
+        assert isinstance(response["meta"], list)
        assert len(response["replies"]) == 3
-        assert len(response["metadata"]) == 3
+        assert len(response["meta"]) == 3
-        assert [isinstance(reply, dict) for reply in response["metadata"]]
+        assert [isinstance(reply, dict) for reply in response["meta"]]
    def test_initialize_with_invalid_model(self, mock_check_valid_model):
        model = "invalid_model"
@ -200,9 +200,9 @@ class TestHuggingFaceTGIGenerator:
        assert [isinstance(reply, str) for reply in response["replies"]]
        # Assert that the response contains the metadata
-        assert "metadata" in response
+        assert "meta" in response
-        assert isinstance(response["metadata"], list)
+        assert isinstance(response["meta"], list)
-        assert len(response["metadata"]) > 0
+        assert len(response["meta"]) > 0
        assert [isinstance(reply, dict) for reply in response["replies"]]
    def test_generate_text_with_custom_generation_parameters(
@ -226,9 +226,9 @@ class TestHuggingFaceTGIGenerator:
        assert response["replies"][0] == "I'm fine, thanks."
        # Assert that the response contains the metadata
-        assert "metadata" in response
+        assert "meta" in response
-        assert isinstance(response["metadata"], list)
+        assert isinstance(response["meta"], list)
-        assert len(response["metadata"]) > 0
+        assert len(response["meta"]) > 0
        assert [isinstance(reply, str) for reply in response["replies"]]
    def test_generate_text_with_streaming_callback(
@ -278,7 +278,7 @@ class TestHuggingFaceTGIGenerator:
        assert [isinstance(reply, str) for reply in response["replies"]]
        # Assert that the response contains the metadata
-        assert "metadata" in response
+        assert "meta" in response
-        assert isinstance(response["metadata"], list)
+        assert isinstance(response["meta"], list)
-        assert len(response["metadata"]) > 0
+        assert len(response["meta"]) > 0
        assert [isinstance(reply, dict) for reply in response["replies"]]
--- a/test/components/generators/test_openai.py
+++ b/test/components/generators/test_openai.py
@ -242,7 +242,7 @@ class TestGPTGenerator:
        for i, _ in enumerate(range(4)):
            message = ChatMessage.from_assistant("Hello")
            metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
-            message.metadata.update(metadata)
+            message.meta.update(metadata)
            messages.append(message)
        for m in messages:
--- a/test/components/routers/test_file_router.py
+++ b/test/components/routers/test_file_router.py
@ -46,13 +46,13 @@ class TestFileTypeRouter:
        for path, mime_type in zip(file_paths, mime_types):
            stream = ByteStream(path.read_bytes())
-            stream.metadata["content_type"] = mime_type
+            stream.meta["content_type"] = mime_type
            byte_streams.append(stream)
        # add unclassified ByteStream
        bs = ByteStream(b"unclassified content")
-        bs.metadata["content_type"] = "unknown_type"
+        bs.meta["content_type"] = "unknown_type"
        byte_streams.append(bs)
        router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
@ -75,7 +75,7 @@ class TestFileTypeRouter:
        byte_stream_sources = []
        for path, mime_type in zip(file_paths, mime_types):
            stream = ByteStream(path.read_bytes())
-            stream.metadata["content_type"] = mime_type
+            stream.meta["content_type"] = mime_type
            byte_stream_sources.append(stream)
        mixed_sources = file_paths[:2] + byte_stream_sources[2:]
--- a/test/dataclasses/test_streaming_chunk.py
+++ b/test/dataclasses/test_streaming_chunk.py
@ -4,25 +4,25 @@ from haystack.dataclasses import StreamingChunk
 def test_create_chunk_with_content_and_metadata():
-    chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
+    chunk = StreamingChunk(content="Test content", meta={"key": "value"})
    assert chunk.content == "Test content"
-    assert chunk.metadata == {"key": "value"}
+    assert chunk.meta == {"key": "value"}
 def test_create_chunk_with_only_content():
    chunk = StreamingChunk(content="Test content")
    assert chunk.content == "Test content"
-    assert chunk.metadata == {}
+    assert chunk.meta == {}
 def test_access_content():
-    chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
+    chunk = StreamingChunk(content="Test content", meta={"key": "value"})
    assert chunk.content == "Test content"
 def test_create_chunk_with_empty_content():
    chunk = StreamingChunk(content="")
    assert chunk.content == ""
-    assert chunk.metadata == {}
+    assert chunk.meta == {}