changed metadata to meta (#6605)

This commit is contained in:
sahusiddharth 2023-12-21 17:09:58 +05:30 committed by GitHub
parent fc88ef7076
commit 3d17e6ff76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 138 additions and 140 deletions

View File

@ -134,16 +134,16 @@ class LocalWhisperTranscriber:
if not isinstance(source, ByteStream): if not isinstance(source, ByteStream):
path = Path(source) path = Path(source)
source = ByteStream.from_file_path(path) source = ByteStream.from_file_path(path)
source.metadata["file_path"] = path source.meta["file_path"] = path
else: else:
# If we received a ByteStream instance that doesn't have the "file_path" metadata set, # If we received a ByteStream instance that doesn't have the "file_path" metadata set,
# we dump the bytes into a temporary file. # we dump the bytes into a temporary file.
path = source.metadata.get("file_path") path = source.meta.get("file_path")
if path is None: if path is None:
fp = tempfile.NamedTemporaryFile(delete=False) fp = tempfile.NamedTemporaryFile(delete=False)
path = Path(fp.name) path = Path(fp.name)
source.to_file(path) source.to_file(path)
source.metadata["file_path"] = path source.meta["file_path"] = path
transcription = self._model.transcribe(str(path), **kwargs) transcription = self._model.transcribe(str(path), **kwargs)
if not return_segments: if not return_segments:

View File

@ -129,13 +129,13 @@ class RemoteWhisperTranscriber:
if not isinstance(source, ByteStream): if not isinstance(source, ByteStream):
path = source path = source
source = ByteStream.from_file_path(Path(source)) source = ByteStream.from_file_path(Path(source))
source.metadata["file_path"] = path source.meta["file_path"] = path
file = io.BytesIO(source.data) file = io.BytesIO(source.data)
file.name = str(source.metadata["file_path"]) if "file_path" in source.metadata else "__fallback__.wav" file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"
content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params) content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
doc = Document(content=content["text"], meta=source.metadata) doc = Document(content=content["text"], meta=source.meta)
documents.append(doc) documents.append(doc)
return {"documents": documents} return {"documents": documents}

View File

@ -42,7 +42,7 @@ class AnswerBuilder:
self, self,
query: str, query: str,
replies: List[str], replies: List[str],
metadata: Optional[List[Dict[str, Any]]] = None, meta: Optional[List[Dict[str, Any]]] = None,
documents: Optional[List[Document]] = None, documents: Optional[List[Document]] = None,
pattern: Optional[str] = None, pattern: Optional[str] = None,
reference_pattern: Optional[str] = None, reference_pattern: Optional[str] = None,
@ -52,7 +52,7 @@ class AnswerBuilder:
:param query: The query used in the prompts for the Generator as a string. :param query: The query used in the prompts for the Generator as a string.
:param replies: The output of the Generator. A list of strings. :param replies: The output of the Generator. A list of strings.
:param metadata: The metadata returned by the Generator. An optional list of dictionaries. If not specified, :param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
the generated answer will contain no metadata. the generated answer will contain no metadata.
:param documents: The documents used as input to the Generator. A list of `Document` objects. If :param documents: The documents used as input to the Generator. A list of `Document` objects. If
`documents` are specified, they are added to the `Answer` objects. `documents` are specified, they are added to the `Answer` objects.
@ -74,10 +74,10 @@ class AnswerBuilder:
If not specified, no parsing is done, and all documents are referenced. If not specified, no parsing is done, and all documents are referenced.
Default: `None`. Default: `None`.
""" """
if not metadata: if not meta:
metadata = [{}] * len(replies) meta = [{}] * len(replies)
elif len(replies) != len(metadata): elif len(replies) != len(meta):
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(metadata)}) must match.") raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
if pattern: if pattern:
AnswerBuilder._check_num_groups_in_regex(pattern) AnswerBuilder._check_num_groups_in_regex(pattern)
@ -86,7 +86,7 @@ class AnswerBuilder:
reference_pattern = reference_pattern or self.reference_pattern reference_pattern = reference_pattern or self.reference_pattern
all_answers = [] all_answers = []
for reply, meta in zip(replies, metadata): for reply, metadata in zip(replies, meta):
referenced_docs = [] referenced_docs = []
if documents: if documents:
reference_idxs = [] reference_idxs = []
@ -102,7 +102,7 @@ class AnswerBuilder:
logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1) logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
answer_string = AnswerBuilder._extract_answer_string(reply, pattern) answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=meta) answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata)
all_answers.append(answer) all_answers.append(answer)
return {"answers": all_answers} return {"answers": all_answers}

View File

@ -53,7 +53,7 @@ class DynamicPromptBuilder:
>> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant >> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic
capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613', capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}} 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}}
@ -65,7 +65,7 @@ class DynamicPromptBuilder:
print(res) print(res)
>> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5 >> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5
days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates
closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613', closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}} 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}}
``` ```
@ -126,7 +126,7 @@ class DynamicPromptBuilder:
"template_variables":{"query": "who's making a greeting?"}}}) "template_variables":{"query": "who's making a greeting?"}}})
>> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, >> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
>> metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': >> meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
>> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}} >> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}}
``` ```
@ -159,7 +159,7 @@ class DynamicPromptBuilder:
"template_variables":{"query": "Where does the speaker live?"}}}) "template_variables":{"query": "Where does the speaker live?"}}})
>> {'llm': {'replies': ['The speaker lives in Berlin.'], >> {'llm': {'replies': ['The speaker lives in Berlin.'],
>> 'metadata': [{'model': 'gpt-3.5-turbo-0613', >> 'meta': [{'model': 'gpt-3.5-turbo-0613',
>> 'index': 0, >> 'index': 0,
>> 'finish_reason': 'stop', >> 'finish_reason': 'stop',
>> 'usage': {'prompt_tokens': 28, >> 'usage': {'prompt_tokens': 28,

View File

@ -104,11 +104,11 @@ class AzureOCRDocumentConverter:
azure_output.append(result.to_dict()) azure_output.append(result.to_dict())
file_suffix = None file_suffix = None
if "file_path" in bytestream.metadata: if "file_path" in bytestream.meta:
file_suffix = Path(bytestream.metadata["file_path"]).suffix file_suffix = Path(bytestream.meta["file_path"]).suffix
document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix) document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix)
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document.meta = merged_metadata document.meta = merged_metadata
documents.append(document) documents.append(document)

View File

@ -83,7 +83,7 @@ class HTMLToDocument:
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
continue continue
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document = Document(content=text, meta=merged_metadata) document = Document(content=text, meta=merged_metadata)
documents.append(document) documents.append(document)

View File

@ -83,7 +83,7 @@ class MarkdownToDocument:
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
continue continue
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document = Document(content=text, meta=merged_metadata) document = Document(content=text, meta=merged_metadata)
documents.append(document) documents.append(document)

View File

@ -111,7 +111,7 @@ class PyPDFToDocument:
logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e) logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
continue continue
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document.meta = merged_metadata document.meta = merged_metadata
documents.append(document) documents.append(document)

View File

@ -77,7 +77,7 @@ class TikaDocumentConverter:
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
continue continue
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document = Document(content=text, meta=merged_metadata) document = Document(content=text, meta=merged_metadata)
documents.append(document) documents.append(document)
return {"documents": documents} return {"documents": documents}

View File

@ -63,13 +63,13 @@ class TextFileToDocument:
logger.warning("Could not read %s. Skipping it. Error: %s", source, e) logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
continue continue
try: try:
encoding = bytestream.metadata.get("encoding", self.encoding) encoding = bytestream.meta.get("encoding", self.encoding)
text = bytestream.data.decode(encoding) text = bytestream.data.decode(encoding)
except Exception as e: except Exception as e:
logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e) logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e)
continue continue
merged_metadata = {**bytestream.metadata, **metadata} merged_metadata = {**bytestream.meta, **metadata}
document = Document(content=text, meta=merged_metadata) document = Document(content=text, meta=merged_metadata)
documents.append(document) documents.append(document)

View File

@ -15,6 +15,6 @@ def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStre
return source return source
if isinstance(source, (str, Path)): if isinstance(source, (str, Path)):
bs = ByteStream.from_file_path(Path(source)) bs = ByteStream.from_file_path(Path(source))
bs.metadata["file_path"] = str(source) bs.meta["file_path"] = str(source)
return bs return bs
raise ValueError(f"Unsupported source type {type(source)}") raise ValueError(f"Unsupported source type {type(source)}")

View File

@ -118,7 +118,7 @@ class LinkContentFetcher:
# don't use multithreading if there's only one URL # don't use multithreading if there's only one URL
if len(urls) == 1: if len(urls) == 1:
stream_metadata, stream = self.fetch(urls[0]) stream_metadata, stream = self.fetch(urls[0])
stream.metadata.update(stream_metadata) stream.meta.update(stream_metadata)
streams.append(stream) streams.append(stream)
else: else:
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
@ -126,7 +126,7 @@ class LinkContentFetcher:
for stream_metadata, stream in results: # type: ignore for stream_metadata, stream in results: # type: ignore
if stream_metadata is not None and stream is not None: if stream_metadata is not None and stream is not None:
stream.metadata.update(stream_metadata) stream.meta.update(stream_metadata)
streams.append(stream) streams.append(stream)
return {"streams": streams} return {"streams": streams}

View File

@ -241,7 +241,7 @@ class HuggingFaceTGIChatGenerator:
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method) self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
message = ChatMessage.from_assistant(chunk.generated_text) message = ChatMessage.from_assistant(chunk.generated_text)
message.metadata.update( message.meta.update(
{ {
"finish_reason": chunk.details.finish_reason.value, "finish_reason": chunk.details.finish_reason.value,
"index": 0, "index": 0,
@ -264,7 +264,7 @@ class HuggingFaceTGIChatGenerator:
prepared_prompt, details=True, **generation_kwargs prepared_prompt, details=True, **generation_kwargs
) )
message = ChatMessage.from_assistant(tgr.generated_text) message = ChatMessage.from_assistant(tgr.generated_text)
message.metadata.update( message.meta.update(
{ {
"finish_reason": tgr.details.finish_reason.value, "finish_reason": tgr.details.finish_reason.value,
"index": _i, "index": _i,

View File

@ -42,7 +42,7 @@ class GPTChatGenerator:
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, >>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
>>metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]} >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
``` ```
@ -218,7 +218,7 @@ class GPTChatGenerator:
:param chunks: The list of all chunks returned by the OpenAI API. :param chunks: The list of all chunks returned by the OpenAI API.
""" """
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks])) complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
complete_response.metadata.update( complete_response.meta.update(
{ {
"model": chunk.model, "model": chunk.model,
"index": 0, "index": 0,
@ -239,7 +239,7 @@ class GPTChatGenerator:
# message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str # message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str
content = str(message.function_call) if choice.finish_reason == "function_call" else message.content content = str(message.function_call) if choice.finish_reason == "function_call" else message.content
chat_message = ChatMessage.from_assistant(content) chat_message = ChatMessage.from_assistant(content)
chat_message.metadata.update( chat_message.meta.update(
{ {
"model": completion.model, "model": completion.model,
"index": choice.index, "index": choice.index,
@ -264,9 +264,7 @@ class GPTChatGenerator:
else: else:
content = "" content = ""
chunk_message = StreamingChunk(content) chunk_message = StreamingChunk(content)
chunk_message.metadata.update( chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
)
return chunk_message return chunk_message
def _check_finish_reason(self, message: ChatMessage) -> None: def _check_finish_reason(self, message: ChatMessage) -> None:
@ -275,13 +273,13 @@ class GPTChatGenerator:
If the `finish_reason` is `length` or `content_filter`, log a warning. If the `finish_reason` is `length` or `content_filter`, log a warning.
:param message: The message returned by the LLM. :param message: The message returned by the LLM.
""" """
if message.metadata["finish_reason"] == "length": if message.meta["finish_reason"] == "length":
logger.warning( logger.warning(
"The completion for index %s has been truncated before reaching a natural stopping point. " "The completion for index %s has been truncated before reaching a natural stopping point. "
"Increase the max_tokens parameter to allow for longer completions.", "Increase the max_tokens parameter to allow for longer completions.",
message.metadata["index"], message.meta["index"],
) )
if message.metadata["finish_reason"] == "content_filter": if message.meta["finish_reason"] == "content_filter":
logger.warning( logger.warning(
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"] "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
) )

View File

@ -157,7 +157,7 @@ class HuggingFaceTGIGenerator:
# Don't send URL as it is sensitive information # Don't send URL as it is sensitive information
return {"model": self.model} return {"model": self.model}
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
""" """
Invoke the text generation inference for the given prompt and generation parameters. Invoke the text generation inference for the given prompt and generation parameters.
@ -204,15 +204,15 @@ class HuggingFaceTGIGenerator:
chunks.append(stream_chunk) chunks.append(stream_chunk)
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method) self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
metadata = { metadata = {
"finish_reason": chunks[-1].metadata.get("finish_reason", None), "finish_reason": chunks[-1].meta.get("finish_reason", None),
"model": self.client.model, "model": self.client.model,
"usage": { "usage": {
"completion_tokens": chunks[-1].metadata.get("generated_tokens", 0), "completion_tokens": chunks[-1].meta.get("generated_tokens", 0),
"prompt_tokens": prompt_token_count, "prompt_tokens": prompt_token_count,
"total_tokens": prompt_token_count + chunks[-1].metadata.get("generated_tokens", 0), "total_tokens": prompt_token_count + chunks[-1].meta.get("generated_tokens", 0),
}, },
} }
return {"replies": ["".join([chunk.content for chunk in chunks])], "metadata": [metadata]} return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}
def _run_non_streaming( def _run_non_streaming(
self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any] self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any]
@ -234,4 +234,4 @@ class HuggingFaceTGIGenerator:
} }
) )
responses.append(tgr.generated_text) responses.append(tgr.generated_text)
return {"replies": responses, "metadata": all_metadata} return {"replies": responses, "meta": all_metadata}

View File

@ -37,7 +37,7 @@ class GPTGenerator:
>> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on >> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
>> the interaction between computers and human language. It involves enabling computers to understand, interpret, >> the interaction between computers and human language. It involves enabling computers to understand, interpret,
>> and respond to natural human language in a way that is both meaningful and useful.'], 'metadata': [{'model': >> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model':
>> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16, >> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
>> 'completion_tokens': 49, 'total_tokens': 65}}]} >> 'completion_tokens': 49, 'total_tokens': 65}}]}
``` ```
@ -146,7 +146,7 @@ class GPTGenerator:
data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler) data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
return default_from_dict(cls, data) return default_from_dict(cls, data)
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) @component.output_types(replies=List[str], meta=List[Dict[str, Any]])
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
""" """
Invoke the text generation inference based on the provided messages and generation parameters. Invoke the text generation inference based on the provided messages and generation parameters.
@ -200,7 +200,7 @@ class GPTGenerator:
return { return {
"replies": [message.content for message in completions], "replies": [message.content for message in completions],
"metadata": [message.metadata for message in completions], "meta": [message.meta for message in completions],
} }
def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]: def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
@ -222,7 +222,7 @@ class GPTGenerator:
Connects the streaming chunks into a single ChatMessage. Connects the streaming chunks into a single ChatMessage.
""" """
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks])) complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
complete_response.metadata.update( complete_response.meta.update(
{ {
"model": chunk.model, "model": chunk.model,
"index": 0, "index": 0,
@ -242,7 +242,7 @@ class GPTGenerator:
message: OpenAIObject = choice.message message: OpenAIObject = choice.message
content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content
chat_message = ChatMessage.from_assistant(content) chat_message = ChatMessage.from_assistant(content)
chat_message.metadata.update( chat_message.meta.update(
{ {
"model": completion.model, "model": completion.model,
"index": choice.index, "index": choice.index,
@ -267,9 +267,7 @@ class GPTGenerator:
else: else:
content = "" content = ""
chunk_message = StreamingChunk(content) chunk_message = StreamingChunk(content)
chunk_message.metadata.update( chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
)
return chunk_message return chunk_message
def _check_finish_reason(self, message: ChatMessage) -> None: def _check_finish_reason(self, message: ChatMessage) -> None:
@ -278,13 +276,13 @@ class GPTGenerator:
If the `finish_reason` is `length`, log a warning to the user. If the `finish_reason` is `length`, log a warning to the user.
:param message: The message returned by the LLM. :param message: The message returned by the LLM.
""" """
if message.metadata["finish_reason"] == "length": if message.meta["finish_reason"] == "length":
logger.warning( logger.warning(
"The completion for index %s has been truncated before reaching a natural stopping point. " "The completion for index %s has been truncated before reaching a natural stopping point. "
"Increase the max_tokens parameter to allow for longer completions.", "Increase the max_tokens parameter to allow for longer completions.",
message.metadata["index"], message.meta["index"],
) )
if message.metadata["finish_reason"] == "content_filter": if message.meta["finish_reason"] == "content_filter":
logger.warning( logger.warning(
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"] "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
) )

View File

@ -58,7 +58,7 @@ class FileTypeRouter:
if isinstance(source, Path): if isinstance(source, Path):
mime_type = self.get_mime_type(source) mime_type = self.get_mime_type(source)
elif isinstance(source, ByteStream): elif isinstance(source, ByteStream):
mime_type = source.metadata.get("content_type") mime_type = source.meta.get("content_type")
else: else:
raise ValueError(f"Unsupported data source type: {type(source)}") raise ValueError(f"Unsupported data source type: {type(source)}")

View File

@ -10,7 +10,7 @@ class ByteStream:
""" """
data: bytes data: bytes
metadata: Dict[str, Any] = field(default_factory=dict, hash=False) meta: Dict[str, Any] = field(default_factory=dict, hash=False)
mime_type: Optional[str] = field(default=None) mime_type: Optional[str] = field(default=None)
def to_file(self, destination_path: Path): def to_file(self, destination_path: Path):

View File

@ -20,13 +20,13 @@ class ChatMessage:
:param content: The text content of the message. :param content: The text content of the message.
:param role: The role of the entity sending the message. :param role: The role of the entity sending the message.
:param name: The name of the function being called (only applicable for role FUNCTION). :param name: The name of the function being called (only applicable for role FUNCTION).
:param metadata: Additional metadata associated with the message. :param meta: Additional metadata associated with the message.
""" """
content: str content: str
role: ChatRole role: ChatRole
name: Optional[str] name: Optional[str]
metadata: Dict[str, Any] = field(default_factory=dict, hash=False) meta: Dict[str, Any] = field(default_factory=dict, hash=False)
def is_from(self, role: ChatRole) -> bool: def is_from(self, role: ChatRole) -> bool:
""" """
@ -38,15 +38,15 @@ class ChatMessage:
return self.role == role return self.role == role
@classmethod @classmethod
def from_assistant(cls, content: str, metadata: Optional[Dict[str, Any]] = None) -> "ChatMessage": def from_assistant(cls, content: str, meta: Optional[Dict[str, Any]] = None) -> "ChatMessage":
""" """
Create a message from the assistant. Create a message from the assistant.
:param content: The text content of the message. :param content: The text content of the message.
:param metadata: Additional metadata associated with the message. :param meta: Additional metadata associated with the message.
:return: A new ChatMessage instance. :return: A new ChatMessage instance.
""" """
return cls(content, ChatRole.ASSISTANT, None, metadata or {}) return cls(content, ChatRole.ASSISTANT, None, meta or {})
@classmethod @classmethod
def from_user(cls, content: str) -> "ChatMessage": def from_user(cls, content: str) -> "ChatMessage":

View File

@ -10,8 +10,8 @@ class StreamingChunk:
streamed data in a systematic manner. streamed data in a systematic manner.
:param content: The content of the message chunk as a string. :param content: The content of the message chunk as a string.
:param metadata: A dictionary containing metadata related to the message chunk. :param meta: A dictionary containing metadata related to the message chunk.
""" """
content: str content: str
metadata: Dict[str, Any] = field(default_factory=dict, hash=False) meta: Dict[str, Any] = field(default_factory=dict, hash=False)

View File

@ -67,7 +67,7 @@ class _RAGPipeline:
self.pipeline.connect("retriever", "prompt_builder.documents") self.pipeline.connect("retriever", "prompt_builder.documents")
self.pipeline.connect("prompt_builder.prompt", "llm.prompt") self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
self.pipeline.connect("llm.replies", "answer_builder.replies") self.pipeline.connect("llm.replies", "answer_builder.replies")
self.pipeline.connect("llm.metadata", "answer_builder.metadata") self.pipeline.connect("llm.meta", "answer_builder.meta")
self.pipeline.connect("retriever", "answer_builder.documents") self.pipeline.connect("retriever", "answer_builder.documents")
def run(self, query: str) -> Answer: def run(self, query: str) -> Answer:

View File

@ -0,0 +1,4 @@
---
enhancements:
- |
Rename all metadata references to meta.

View File

@ -125,7 +125,7 @@ class TestLocalWhisperTranscriber:
} }
path = SAMPLES_PATH / "audio" / "this is the content of the document.wav" path = SAMPLES_PATH / "audio" / "this is the content of the document.wav"
bs = ByteStream.from_file_path(path) bs = ByteStream.from_file_path(path)
bs.metadata["file_path"] = path bs.meta["file_path"] = path
results = comp.transcribe(sources=[bs]) results = comp.transcribe(sources=[bs])
expected = Document( expected = Document(
content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]} content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]}

View File

@ -210,7 +210,7 @@ class TestRemoteWhisperTranscriber:
transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json") transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
with open(file_path, "rb") as audio_stream: with open(file_path, "rb") as audio_stream:
byte_stream = audio_stream.read() byte_stream = audio_stream.read()
audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())}) audio_file = ByteStream(byte_stream, meta={"file_path": str(file_path.absolute())})
result = transcriber.run(sources=[audio_file]) result = transcriber.run(sources=[audio_file])

View File

@ -10,7 +10,7 @@ class TestAnswerBuilder:
def test_run_unmatching_input_len(self): def test_run_unmatching_input_len(self):
component = AnswerBuilder() component = AnswerBuilder()
with pytest.raises(ValueError): with pytest.raises(ValueError):
component.run(query="query", replies=["reply1"], metadata=[{"test": "meta"}, {"test": "meta2"}]) component.run(query="query", replies=["reply1"], meta=[{"test": "meta"}, {"test": "meta2"}])
def test_run_without_meta(self): def test_run_without_meta(self):
component = AnswerBuilder() component = AnswerBuilder()
@ -24,7 +24,7 @@ class TestAnswerBuilder:
def test_run_meta_is_an_empty_list(self): def test_run_meta_is_an_empty_list(self):
component = AnswerBuilder() component = AnswerBuilder()
output = component.run(query="query", replies=["reply1"], metadata=[]) output = component.run(query="query", replies=["reply1"], meta=[])
answers = output["answers"] answers = output["answers"]
assert answers[0].data == "reply1" assert answers[0].data == "reply1"
assert answers[0].meta == {} assert answers[0].meta == {}
@ -34,7 +34,7 @@ class TestAnswerBuilder:
def test_run_without_pattern(self): def test_run_without_pattern(self):
component = AnswerBuilder() component = AnswerBuilder()
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}]) output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
answers = output["answers"] answers = output["answers"]
assert len(answers) == 1 assert len(answers) == 1
assert answers[0].data == "Answer: AnswerString" assert answers[0].data == "Answer: AnswerString"
@ -45,7 +45,7 @@ class TestAnswerBuilder:
def test_run_with_pattern_with_capturing_group(self): def test_run_with_pattern_with_capturing_group(self):
component = AnswerBuilder(pattern=r"Answer: (.*)") component = AnswerBuilder(pattern=r"Answer: (.*)")
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}]) output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
answers = output["answers"] answers = output["answers"]
assert len(answers) == 1 assert len(answers) == 1
assert answers[0].data == "AnswerString" assert answers[0].data == "AnswerString"
@ -56,7 +56,7 @@ class TestAnswerBuilder:
def test_run_with_pattern_without_capturing_group(self): def test_run_with_pattern_without_capturing_group(self):
component = AnswerBuilder(pattern=r"'.*'") component = AnswerBuilder(pattern=r"'.*'")
output = component.run(query="test query", replies=["Answer: 'AnswerString'"], metadata=[{}]) output = component.run(query="test query", replies=["Answer: 'AnswerString'"], meta=[{}])
answers = output["answers"] answers = output["answers"]
assert len(answers) == 1 assert len(answers) == 1
assert answers[0].data == "'AnswerString'" assert answers[0].data == "'AnswerString'"
@ -71,9 +71,7 @@ class TestAnswerBuilder:
def test_run_with_pattern_set_at_runtime(self): def test_run_with_pattern_set_at_runtime(self):
component = AnswerBuilder(pattern="unused pattern") component = AnswerBuilder(pattern="unused pattern")
output = component.run( output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}], pattern=r"Answer: (.*)")
query="test query", replies=["Answer: AnswerString"], metadata=[{}], pattern=r"Answer: (.*)"
)
answers = output["answers"] answers = output["answers"]
assert len(answers) == 1 assert len(answers) == 1
assert answers[0].data == "AnswerString" assert answers[0].data == "AnswerString"
@ -87,7 +85,7 @@ class TestAnswerBuilder:
output = component.run( output = component.run(
query="test query", query="test query",
replies=["Answer: AnswerString"], replies=["Answer: AnswerString"],
metadata=[{}], meta=[{}],
documents=[Document(content="test doc 1"), Document(content="test doc 2")], documents=[Document(content="test doc 1"), Document(content="test doc 2")],
) )
answers = output["answers"] answers = output["answers"]
@ -104,7 +102,7 @@ class TestAnswerBuilder:
output = component.run( output = component.run(
query="test query", query="test query",
replies=["Answer: AnswerString[2]"], replies=["Answer: AnswerString[2]"],
metadata=[{}], meta=[{}],
documents=[Document(content="test doc 1"), Document(content="test doc 2")], documents=[Document(content="test doc 1"), Document(content="test doc 2")],
) )
answers = output["answers"] answers = output["answers"]
@ -121,7 +119,7 @@ class TestAnswerBuilder:
output = component.run( output = component.run(
query="test query", query="test query",
replies=["Answer: AnswerString[3]"], replies=["Answer: AnswerString[3]"],
metadata=[{}], meta=[{}],
documents=[Document(content="test doc 1"), Document(content="test doc 2")], documents=[Document(content="test doc 1"), Document(content="test doc 2")],
) )
answers = output["answers"] answers = output["answers"]
@ -137,7 +135,7 @@ class TestAnswerBuilder:
output = component.run( output = component.run(
query="test query", query="test query",
replies=["Answer: AnswerString[2][3]"], replies=["Answer: AnswerString[2][3]"],
metadata=[{}], meta=[{}],
documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")], documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")],
reference_pattern="\\[(\\d+)\\]", reference_pattern="\\[(\\d+)\\]",
) )

View File

@ -45,7 +45,7 @@ class TestAzureOCRDocumentConverter:
} }
def test_run_with_meta(self): def test_run_with_meta(self):
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
with patch("haystack.components.converters.azure.DocumentAnalysisClient"): with patch("haystack.components.converters.azure.DocumentAnalysisClient"):
component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key") component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")

View File

@ -63,7 +63,7 @@ class TestHTMLToDocument:
converter = HTMLToDocument() converter = HTMLToDocument()
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
byte_stream = file.read() byte_stream = file.read()
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"}) stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
results = converter.run(sources=[stream]) results = converter.run(sources=[stream])
docs = results["documents"] docs = results["documents"]
@ -81,7 +81,7 @@ class TestHTMLToDocument:
converter = HTMLToDocument() converter = HTMLToDocument()
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
byte_stream = file.read() byte_stream = file.read()
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"}) stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
metadata = [{"file_name": "what_is_haystack.html"}] metadata = [{"file_name": "what_is_haystack.html"}]
results = converter.run(sources=[stream], meta=metadata) results = converter.run(sources=[stream], meta=metadata)
@ -103,7 +103,7 @@ class TestHTMLToDocument:
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
byte_stream = file.read() byte_stream = file.read()
# ByteStream has "url" present in metadata # ByteStream has "url" present in metadata
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url_correct"}) stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url_correct"})
# "url" supplied by the user overwrites value present in metadata # "url" supplied by the user overwrites value present in metadata
metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}] metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}]

View File

@ -32,7 +32,7 @@ class TestMarkdownToDocument:
assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content
def test_run_with_meta(self): def test_run_with_meta(self):
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
converter = MarkdownToDocument() converter = MarkdownToDocument()

View File

@ -30,7 +30,7 @@ class TestPyPDFToDocument:
assert "ReAct" in docs[0].content assert "ReAct" in docs[0].content
def test_run_with_meta(self): def test_run_with_meta(self):
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
converter = PyPDFToDocument() converter = PyPDFToDocument()
with patch("haystack.components.converters.pypdf.PdfReader"): with patch("haystack.components.converters.pypdf.PdfReader"):

View File

@ -14,8 +14,8 @@ class TestTextfileToDocument:
Test if the component runs correctly. Test if the component runs correctly.
""" """
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt") bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt")
bytestream.metadata["file_path"] = str(test_files_path / "txt" / "doc_3.txt") bytestream.meta["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
bytestream.metadata["key"] = "value" bytestream.meta["key"] = "value"
files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream] files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream]
converter = TextFileToDocument() converter = TextFileToDocument()
output = converter.run(sources=files) output = converter.run(sources=files)
@ -26,7 +26,7 @@ class TestTextfileToDocument:
assert "That's yet another file!" in docs[2].content assert "That's yet another file!" in docs[2].content
assert docs[0].meta["file_path"] == str(files[0]) assert docs[0].meta["file_path"] == str(files[0])
assert docs[1].meta["file_path"] == str(files[1]) assert docs[1].meta["file_path"] == str(files[1])
assert docs[2].meta == bytestream.metadata assert docs[2].meta == bytestream.meta
def test_run_error_handling(self, test_files_path, caplog): def test_run_error_handling(self, test_files_path, caplog):
""" """
@ -47,18 +47,18 @@ class TestTextfileToDocument:
Test if the encoding metadata field is used properly Test if the encoding metadata field is used properly
""" """
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt") bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt")
bytestream.metadata["key"] = "value" bytestream.meta["key"] = "value"
converter = TextFileToDocument(encoding="utf-16") converter = TextFileToDocument(encoding="utf-16")
output = converter.run(sources=[bytestream]) output = converter.run(sources=[bytestream])
assert "Some text for testing." not in output["documents"][0].content assert "Some text for testing." not in output["documents"][0].content
bytestream.metadata["encoding"] = "utf-8" bytestream.meta["encoding"] = "utf-8"
output = converter.run(sources=[bytestream]) output = converter.run(sources=[bytestream])
assert "Some text for testing." in output["documents"][0].content assert "Some text for testing." in output["documents"][0].content
def test_run_with_meta(self): def test_run_with_meta(self):
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
converter = TextFileToDocument() converter = TextFileToDocument()

View File

@ -19,7 +19,7 @@ class TestTikaDocumentConverter:
assert documents[0].content == "Content of mock source" assert documents[0].content == "Content of mock source"
def test_run_with_meta(self): def test_run_with_meta(self):
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
converter = TikaDocumentConverter() converter = TikaDocumentConverter()
with patch("haystack.components.converters.tika.tika_parser.from_buffer"): with patch("haystack.components.converters.tika.tika_parser.from_buffer"):

View File

@ -67,7 +67,7 @@ class TestLinkContentFetcher:
streams = fetcher.run(urls=["https://www.example.com"])["streams"] streams = fetcher.run(urls=["https://www.example.com"])["streams"]
first_stream = streams[0] first_stream = streams[0]
assert first_stream.data == correct_response assert first_stream.data == correct_response
assert first_stream.metadata["content_type"] == "text/plain" assert first_stream.meta["content_type"] == "text/plain"
def test_run_html(self): def test_run_html(self):
correct_response = b"<h1>Example test response</h1>" correct_response = b"<h1>Example test response</h1>"
@ -79,7 +79,7 @@ class TestLinkContentFetcher:
streams = fetcher.run(urls=["https://www.example.com"])["streams"] streams = fetcher.run(urls=["https://www.example.com"])["streams"]
first_stream = streams[0] first_stream = streams[0]
assert first_stream.data == correct_response assert first_stream.data == correct_response
assert first_stream.metadata["content_type"] == "text/html" assert first_stream.meta["content_type"] == "text/html"
def test_run_binary(self, test_files_path): def test_run_binary(self, test_files_path):
file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read() file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read()
@ -91,7 +91,7 @@ class TestLinkContentFetcher:
streams = fetcher.run(urls=["https://www.example.com"])["streams"] streams = fetcher.run(urls=["https://www.example.com"])["streams"]
first_stream = streams[0] first_stream = streams[0]
assert first_stream.data == file_bytes assert first_stream.data == file_bytes
assert first_stream.metadata["content_type"] == "application/pdf" assert first_stream.meta["content_type"] == "application/pdf"
def test_run_bad_status_code(self): def test_run_bad_status_code(self):
empty_byte_stream = b"" empty_byte_stream = b""
@ -105,7 +105,7 @@ class TestLinkContentFetcher:
assert len(streams) == 1 assert len(streams) == 1
first_stream = streams[0] first_stream = streams[0]
assert first_stream.data == empty_byte_stream assert first_stream.data == empty_byte_stream
assert first_stream.metadata["content_type"] == "text/html" assert first_stream.meta["content_type"] == "text/html"
@pytest.mark.integration @pytest.mark.integration
def test_link_content_fetcher_html(self): def test_link_content_fetcher_html(self):
@ -113,8 +113,8 @@ class TestLinkContentFetcher:
streams = fetcher.run([HTML_URL])["streams"] streams = fetcher.run([HTML_URL])["streams"]
first_stream = streams[0] first_stream = streams[0]
assert "Haystack" in first_stream.data.decode("utf-8") assert "Haystack" in first_stream.data.decode("utf-8")
assert first_stream.metadata["content_type"] == "text/html" assert first_stream.meta["content_type"] == "text/html"
assert "url" in first_stream.metadata and first_stream.metadata["url"] == HTML_URL assert "url" in first_stream.meta and first_stream.meta["url"] == HTML_URL
@pytest.mark.integration @pytest.mark.integration
def test_link_content_fetcher_text(self): def test_link_content_fetcher_text(self):
@ -122,8 +122,8 @@ class TestLinkContentFetcher:
streams = fetcher.run([TEXT_URL])["streams"] streams = fetcher.run([TEXT_URL])["streams"]
first_stream = streams[0] first_stream = streams[0]
assert "Haystack" in first_stream.data.decode("utf-8") assert "Haystack" in first_stream.data.decode("utf-8")
assert first_stream.metadata["content_type"] == "text/plain" assert first_stream.meta["content_type"] == "text/plain"
assert "url" in first_stream.metadata and first_stream.metadata["url"] == TEXT_URL assert "url" in first_stream.meta and first_stream.meta["url"] == TEXT_URL
@pytest.mark.integration @pytest.mark.integration
def test_link_content_fetcher_pdf(self): def test_link_content_fetcher_pdf(self):
@ -131,8 +131,8 @@ class TestLinkContentFetcher:
streams = fetcher.run([PDF_URL])["streams"] streams = fetcher.run([PDF_URL])["streams"]
assert len(streams) == 1 assert len(streams) == 1
first_stream = streams[0] first_stream = streams[0]
assert first_stream.metadata["content_type"] in ("application/octet-stream", "application/pdf") assert first_stream.meta["content_type"] in ("application/octet-stream", "application/pdf")
assert "url" in first_stream.metadata and first_stream.metadata["url"] == PDF_URL assert "url" in first_stream.meta and first_stream.meta["url"] == PDF_URL
@pytest.mark.integration @pytest.mark.integration
def test_link_content_fetcher_multiple_different_content_types(self): def test_link_content_fetcher_multiple_different_content_types(self):
@ -143,10 +143,10 @@ class TestLinkContentFetcher:
streams = fetcher.run([PDF_URL, HTML_URL])["streams"] streams = fetcher.run([PDF_URL, HTML_URL])["streams"]
assert len(streams) == 2 assert len(streams) == 2
for stream in streams: for stream in streams:
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream") assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
if stream.metadata["content_type"] == "text/html": if stream.meta["content_type"] == "text/html":
assert "Haystack" in stream.data.decode("utf-8") assert "Haystack" in stream.data.decode("utf-8")
elif stream.metadata["content_type"] == "application/pdf": elif stream.meta["content_type"] == "application/pdf":
assert len(stream.data) > 0 assert len(stream.data) > 0
@pytest.mark.integration @pytest.mark.integration
@ -160,10 +160,10 @@ class TestLinkContentFetcher:
streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"] streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"]
assert len(streams) == 3 assert len(streams) == 3
for stream in streams: for stream in streams:
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream") assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
if stream.metadata["content_type"] == "text/html": if stream.meta["content_type"] == "text/html":
assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8") assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8")
elif stream.metadata["content_type"] == "application/pdf": elif stream.meta["content_type"] == "application/pdf":
assert len(stream.data) > 0 assert len(stream.data) > 0
@pytest.mark.integration @pytest.mark.integration
@ -177,7 +177,7 @@ class TestLinkContentFetcher:
result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"]) result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"])
assert len(result["streams"]) == 1 assert len(result["streams"]) == 1
first_stream = result["streams"][0] first_stream = result["streams"][0]
assert first_stream.metadata["content_type"] == "text/html" assert first_stream.meta["content_type"] == "text/html"
@pytest.mark.integration @pytest.mark.integration
def test_bad_request_exception_raised(self): def test_bad_request_exception_raised(self):

View File

@ -241,7 +241,7 @@ class TestGPTChatGenerator:
component = GPTChatGenerator(api_key="test-api-key") component = GPTChatGenerator(api_key="test-api-key")
messages = [ messages = [
ChatMessage.from_assistant( ChatMessage.from_assistant(
"", metadata={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i} "", meta={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
) )
for i, _ in enumerate(range(4)) for i, _ in enumerate(range(4))
] ]

View File

@ -124,11 +124,11 @@ class TestHuggingFaceTGIGenerator:
assert isinstance(response, dict) assert isinstance(response, dict)
assert "replies" in response assert "replies" in response
assert "metadata" in response assert "meta" in response
assert isinstance(response["replies"], list) assert isinstance(response["replies"], list)
assert isinstance(response["metadata"], list) assert isinstance(response["meta"], list)
assert len(response["replies"]) == 1 assert len(response["replies"]) == 1
assert len(response["metadata"]) == 1 assert len(response["meta"]) == 1
assert [isinstance(reply, str) for reply in response["replies"]] assert [isinstance(reply, str) for reply in response["replies"]]
def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters( def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters(
@ -157,14 +157,14 @@ class TestHuggingFaceTGIGenerator:
assert isinstance(response, dict) assert isinstance(response, dict)
assert "replies" in response assert "replies" in response
assert "metadata" in response assert "meta" in response
assert isinstance(response["replies"], list) assert isinstance(response["replies"], list)
assert [isinstance(reply, str) for reply in response["replies"]] assert [isinstance(reply, str) for reply in response["replies"]]
assert isinstance(response["metadata"], list) assert isinstance(response["meta"], list)
assert len(response["replies"]) == 3 assert len(response["replies"]) == 3
assert len(response["metadata"]) == 3 assert len(response["meta"]) == 3
assert [isinstance(reply, dict) for reply in response["metadata"]] assert [isinstance(reply, dict) for reply in response["meta"]]
def test_initialize_with_invalid_model(self, mock_check_valid_model): def test_initialize_with_invalid_model(self, mock_check_valid_model):
model = "invalid_model" model = "invalid_model"
@ -200,9 +200,9 @@ class TestHuggingFaceTGIGenerator:
assert [isinstance(reply, str) for reply in response["replies"]] assert [isinstance(reply, str) for reply in response["replies"]]
# Assert that the response contains the metadata # Assert that the response contains the metadata
assert "metadata" in response assert "meta" in response
assert isinstance(response["metadata"], list) assert isinstance(response["meta"], list)
assert len(response["metadata"]) > 0 assert len(response["meta"]) > 0
assert [isinstance(reply, dict) for reply in response["replies"]] assert [isinstance(reply, dict) for reply in response["replies"]]
def test_generate_text_with_custom_generation_parameters( def test_generate_text_with_custom_generation_parameters(
@ -226,9 +226,9 @@ class TestHuggingFaceTGIGenerator:
assert response["replies"][0] == "I'm fine, thanks." assert response["replies"][0] == "I'm fine, thanks."
# Assert that the response contains the metadata # Assert that the response contains the metadata
assert "metadata" in response assert "meta" in response
assert isinstance(response["metadata"], list) assert isinstance(response["meta"], list)
assert len(response["metadata"]) > 0 assert len(response["meta"]) > 0
assert [isinstance(reply, str) for reply in response["replies"]] assert [isinstance(reply, str) for reply in response["replies"]]
def test_generate_text_with_streaming_callback( def test_generate_text_with_streaming_callback(
@ -278,7 +278,7 @@ class TestHuggingFaceTGIGenerator:
assert [isinstance(reply, str) for reply in response["replies"]] assert [isinstance(reply, str) for reply in response["replies"]]
# Assert that the response contains the metadata # Assert that the response contains the metadata
assert "metadata" in response assert "meta" in response
assert isinstance(response["metadata"], list) assert isinstance(response["meta"], list)
assert len(response["metadata"]) > 0 assert len(response["meta"]) > 0
assert [isinstance(reply, dict) for reply in response["replies"]] assert [isinstance(reply, dict) for reply in response["replies"]]

View File

@ -242,7 +242,7 @@ class TestGPTGenerator:
for i, _ in enumerate(range(4)): for i, _ in enumerate(range(4)):
message = ChatMessage.from_assistant("Hello") message = ChatMessage.from_assistant("Hello")
metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i} metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
message.metadata.update(metadata) message.meta.update(metadata)
messages.append(message) messages.append(message)
for m in messages: for m in messages:

View File

@ -46,13 +46,13 @@ class TestFileTypeRouter:
for path, mime_type in zip(file_paths, mime_types): for path, mime_type in zip(file_paths, mime_types):
stream = ByteStream(path.read_bytes()) stream = ByteStream(path.read_bytes())
stream.metadata["content_type"] = mime_type stream.meta["content_type"] = mime_type
byte_streams.append(stream) byte_streams.append(stream)
# add unclassified ByteStream # add unclassified ByteStream
bs = ByteStream(b"unclassified content") bs = ByteStream(b"unclassified content")
bs.metadata["content_type"] = "unknown_type" bs.meta["content_type"] = "unknown_type"
byte_streams.append(bs) byte_streams.append(bs)
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"]) router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
@ -75,7 +75,7 @@ class TestFileTypeRouter:
byte_stream_sources = [] byte_stream_sources = []
for path, mime_type in zip(file_paths, mime_types): for path, mime_type in zip(file_paths, mime_types):
stream = ByteStream(path.read_bytes()) stream = ByteStream(path.read_bytes())
stream.metadata["content_type"] = mime_type stream.meta["content_type"] = mime_type
byte_stream_sources.append(stream) byte_stream_sources.append(stream)
mixed_sources = file_paths[:2] + byte_stream_sources[2:] mixed_sources = file_paths[:2] + byte_stream_sources[2:]

View File

@ -4,25 +4,25 @@ from haystack.dataclasses import StreamingChunk
def test_create_chunk_with_content_and_metadata(): def test_create_chunk_with_content_and_metadata():
chunk = StreamingChunk(content="Test content", metadata={"key": "value"}) chunk = StreamingChunk(content="Test content", meta={"key": "value"})
assert chunk.content == "Test content" assert chunk.content == "Test content"
assert chunk.metadata == {"key": "value"} assert chunk.meta == {"key": "value"}
def test_create_chunk_with_only_content(): def test_create_chunk_with_only_content():
chunk = StreamingChunk(content="Test content") chunk = StreamingChunk(content="Test content")
assert chunk.content == "Test content" assert chunk.content == "Test content"
assert chunk.metadata == {} assert chunk.meta == {}
def test_access_content(): def test_access_content():
chunk = StreamingChunk(content="Test content", metadata={"key": "value"}) chunk = StreamingChunk(content="Test content", meta={"key": "value"})
assert chunk.content == "Test content" assert chunk.content == "Test content"
def test_create_chunk_with_empty_content(): def test_create_chunk_with_empty_content():
chunk = StreamingChunk(content="") chunk = StreamingChunk(content="")
assert chunk.content == "" assert chunk.content == ""
assert chunk.metadata == {} assert chunk.meta == {}