mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-12 15:27:06 +00:00
changed metadata to meta (#6605)
This commit is contained in:
parent
fc88ef7076
commit
3d17e6ff76
@ -134,16 +134,16 @@ class LocalWhisperTranscriber:
|
||||
if not isinstance(source, ByteStream):
|
||||
path = Path(source)
|
||||
source = ByteStream.from_file_path(path)
|
||||
source.metadata["file_path"] = path
|
||||
source.meta["file_path"] = path
|
||||
else:
|
||||
# If we received a ByteStream instance that doesn't have the "file_path" metadata set,
|
||||
# we dump the bytes into a temporary file.
|
||||
path = source.metadata.get("file_path")
|
||||
path = source.meta.get("file_path")
|
||||
if path is None:
|
||||
fp = tempfile.NamedTemporaryFile(delete=False)
|
||||
path = Path(fp.name)
|
||||
source.to_file(path)
|
||||
source.metadata["file_path"] = path
|
||||
source.meta["file_path"] = path
|
||||
|
||||
transcription = self._model.transcribe(str(path), **kwargs)
|
||||
if not return_segments:
|
||||
|
||||
@ -129,13 +129,13 @@ class RemoteWhisperTranscriber:
|
||||
if not isinstance(source, ByteStream):
|
||||
path = source
|
||||
source = ByteStream.from_file_path(Path(source))
|
||||
source.metadata["file_path"] = path
|
||||
source.meta["file_path"] = path
|
||||
|
||||
file = io.BytesIO(source.data)
|
||||
file.name = str(source.metadata["file_path"]) if "file_path" in source.metadata else "__fallback__.wav"
|
||||
file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"
|
||||
|
||||
content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
|
||||
doc = Document(content=content["text"], meta=source.metadata)
|
||||
doc = Document(content=content["text"], meta=source.meta)
|
||||
documents.append(doc)
|
||||
|
||||
return {"documents": documents}
|
||||
|
||||
@ -42,7 +42,7 @@ class AnswerBuilder:
|
||||
self,
|
||||
query: str,
|
||||
replies: List[str],
|
||||
metadata: Optional[List[Dict[str, Any]]] = None,
|
||||
meta: Optional[List[Dict[str, Any]]] = None,
|
||||
documents: Optional[List[Document]] = None,
|
||||
pattern: Optional[str] = None,
|
||||
reference_pattern: Optional[str] = None,
|
||||
@ -52,7 +52,7 @@ class AnswerBuilder:
|
||||
|
||||
:param query: The query used in the prompts for the Generator as a string.
|
||||
:param replies: The output of the Generator. A list of strings.
|
||||
:param metadata: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
|
||||
:param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
|
||||
the generated answer will contain no metadata.
|
||||
:param documents: The documents used as input to the Generator. A list of `Document` objects. If
|
||||
`documents` are specified, they are added to the `Answer` objects.
|
||||
@ -74,10 +74,10 @@ class AnswerBuilder:
|
||||
If not specified, no parsing is done, and all documents are referenced.
|
||||
Default: `None`.
|
||||
"""
|
||||
if not metadata:
|
||||
metadata = [{}] * len(replies)
|
||||
elif len(replies) != len(metadata):
|
||||
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(metadata)}) must match.")
|
||||
if not meta:
|
||||
meta = [{}] * len(replies)
|
||||
elif len(replies) != len(meta):
|
||||
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
|
||||
|
||||
if pattern:
|
||||
AnswerBuilder._check_num_groups_in_regex(pattern)
|
||||
@ -86,7 +86,7 @@ class AnswerBuilder:
|
||||
reference_pattern = reference_pattern or self.reference_pattern
|
||||
|
||||
all_answers = []
|
||||
for reply, meta in zip(replies, metadata):
|
||||
for reply, metadata in zip(replies, meta):
|
||||
referenced_docs = []
|
||||
if documents:
|
||||
reference_idxs = []
|
||||
@ -102,7 +102,7 @@ class AnswerBuilder:
|
||||
logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
|
||||
|
||||
answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
|
||||
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=meta)
|
||||
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata)
|
||||
all_answers.append(answer)
|
||||
|
||||
return {"answers": all_answers}
|
||||
|
||||
@ -53,7 +53,7 @@ class DynamicPromptBuilder:
|
||||
|
||||
>> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
|
||||
and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic
|
||||
capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
|
||||
capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
|
||||
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}}
|
||||
|
||||
|
||||
@ -65,7 +65,7 @@ class DynamicPromptBuilder:
|
||||
print(res)
|
||||
>> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5
|
||||
days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates
|
||||
closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
|
||||
closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
|
||||
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}}
|
||||
|
||||
```
|
||||
@ -126,7 +126,7 @@ class DynamicPromptBuilder:
|
||||
"template_variables":{"query": "who's making a greeting?"}}})
|
||||
|
||||
>> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
||||
>> metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
|
||||
>> meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
|
||||
>> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}}
|
||||
```
|
||||
|
||||
@ -159,7 +159,7 @@ class DynamicPromptBuilder:
|
||||
"template_variables":{"query": "Where does the speaker live?"}}})
|
||||
|
||||
>> {'llm': {'replies': ['The speaker lives in Berlin.'],
|
||||
>> 'metadata': [{'model': 'gpt-3.5-turbo-0613',
|
||||
>> 'meta': [{'model': 'gpt-3.5-turbo-0613',
|
||||
>> 'index': 0,
|
||||
>> 'finish_reason': 'stop',
|
||||
>> 'usage': {'prompt_tokens': 28,
|
||||
|
||||
@ -104,11 +104,11 @@ class AzureOCRDocumentConverter:
|
||||
azure_output.append(result.to_dict())
|
||||
|
||||
file_suffix = None
|
||||
if "file_path" in bytestream.metadata:
|
||||
file_suffix = Path(bytestream.metadata["file_path"]).suffix
|
||||
if "file_path" in bytestream.meta:
|
||||
file_suffix = Path(bytestream.meta["file_path"]).suffix
|
||||
|
||||
document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix)
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document.meta = merged_metadata
|
||||
documents.append(document)
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ class HTMLToDocument:
|
||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||
continue
|
||||
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document = Document(content=text, meta=merged_metadata)
|
||||
documents.append(document)
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ class MarkdownToDocument:
|
||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||
continue
|
||||
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document = Document(content=text, meta=merged_metadata)
|
||||
documents.append(document)
|
||||
|
||||
|
||||
@ -111,7 +111,7 @@ class PyPDFToDocument:
|
||||
logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
|
||||
continue
|
||||
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document.meta = merged_metadata
|
||||
documents.append(document)
|
||||
|
||||
|
||||
@ -77,7 +77,7 @@ class TikaDocumentConverter:
|
||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||
continue
|
||||
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document = Document(content=text, meta=merged_metadata)
|
||||
documents.append(document)
|
||||
return {"documents": documents}
|
||||
|
||||
@ -63,13 +63,13 @@ class TextFileToDocument:
|
||||
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
|
||||
continue
|
||||
try:
|
||||
encoding = bytestream.metadata.get("encoding", self.encoding)
|
||||
encoding = bytestream.meta.get("encoding", self.encoding)
|
||||
text = bytestream.data.decode(encoding)
|
||||
except Exception as e:
|
||||
logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e)
|
||||
continue
|
||||
|
||||
merged_metadata = {**bytestream.metadata, **metadata}
|
||||
merged_metadata = {**bytestream.meta, **metadata}
|
||||
document = Document(content=text, meta=merged_metadata)
|
||||
documents.append(document)
|
||||
|
||||
|
||||
@ -15,6 +15,6 @@ def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStre
|
||||
return source
|
||||
if isinstance(source, (str, Path)):
|
||||
bs = ByteStream.from_file_path(Path(source))
|
||||
bs.metadata["file_path"] = str(source)
|
||||
bs.meta["file_path"] = str(source)
|
||||
return bs
|
||||
raise ValueError(f"Unsupported source type {type(source)}")
|
||||
|
||||
@ -118,7 +118,7 @@ class LinkContentFetcher:
|
||||
# don't use multithreading if there's only one URL
|
||||
if len(urls) == 1:
|
||||
stream_metadata, stream = self.fetch(urls[0])
|
||||
stream.metadata.update(stream_metadata)
|
||||
stream.meta.update(stream_metadata)
|
||||
streams.append(stream)
|
||||
else:
|
||||
with ThreadPoolExecutor() as executor:
|
||||
@ -126,7 +126,7 @@ class LinkContentFetcher:
|
||||
|
||||
for stream_metadata, stream in results: # type: ignore
|
||||
if stream_metadata is not None and stream is not None:
|
||||
stream.metadata.update(stream_metadata)
|
||||
stream.meta.update(stream_metadata)
|
||||
streams.append(stream)
|
||||
|
||||
return {"streams": streams}
|
||||
|
||||
@ -241,7 +241,7 @@ class HuggingFaceTGIChatGenerator:
|
||||
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
||||
|
||||
message = ChatMessage.from_assistant(chunk.generated_text)
|
||||
message.metadata.update(
|
||||
message.meta.update(
|
||||
{
|
||||
"finish_reason": chunk.details.finish_reason.value,
|
||||
"index": 0,
|
||||
@ -264,7 +264,7 @@ class HuggingFaceTGIChatGenerator:
|
||||
prepared_prompt, details=True, **generation_kwargs
|
||||
)
|
||||
message = ChatMessage.from_assistant(tgr.generated_text)
|
||||
message.metadata.update(
|
||||
message.meta.update(
|
||||
{
|
||||
"finish_reason": tgr.details.finish_reason.value,
|
||||
"index": _i,
|
||||
|
||||
@ -42,7 +42,7 @@ class GPTChatGenerator:
|
||||
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
|
||||
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
|
||||
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
||||
>>metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
|
||||
>>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
|
||||
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
|
||||
|
||||
```
|
||||
@ -218,7 +218,7 @@ class GPTChatGenerator:
|
||||
:param chunks: The list of all chunks returned by the OpenAI API.
|
||||
"""
|
||||
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
||||
complete_response.metadata.update(
|
||||
complete_response.meta.update(
|
||||
{
|
||||
"model": chunk.model,
|
||||
"index": 0,
|
||||
@ -239,7 +239,7 @@ class GPTChatGenerator:
|
||||
# message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str
|
||||
content = str(message.function_call) if choice.finish_reason == "function_call" else message.content
|
||||
chat_message = ChatMessage.from_assistant(content)
|
||||
chat_message.metadata.update(
|
||||
chat_message.meta.update(
|
||||
{
|
||||
"model": completion.model,
|
||||
"index": choice.index,
|
||||
@ -264,9 +264,7 @@ class GPTChatGenerator:
|
||||
else:
|
||||
content = ""
|
||||
chunk_message = StreamingChunk(content)
|
||||
chunk_message.metadata.update(
|
||||
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
|
||||
)
|
||||
chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
|
||||
return chunk_message
|
||||
|
||||
def _check_finish_reason(self, message: ChatMessage) -> None:
|
||||
@ -275,13 +273,13 @@ class GPTChatGenerator:
|
||||
If the `finish_reason` is `length` or `content_filter`, log a warning.
|
||||
:param message: The message returned by the LLM.
|
||||
"""
|
||||
if message.metadata["finish_reason"] == "length":
|
||||
if message.meta["finish_reason"] == "length":
|
||||
logger.warning(
|
||||
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
||||
"Increase the max_tokens parameter to allow for longer completions.",
|
||||
message.metadata["index"],
|
||||
message.meta["index"],
|
||||
)
|
||||
if message.metadata["finish_reason"] == "content_filter":
|
||||
if message.meta["finish_reason"] == "content_filter":
|
||||
logger.warning(
|
||||
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
|
||||
"The completion for index %s has been truncated due to the content filter.", message.meta["index"]
|
||||
)
|
||||
|
||||
@ -157,7 +157,7 @@ class HuggingFaceTGIGenerator:
|
||||
# Don't send URL as it is sensitive information
|
||||
return {"model": self.model}
|
||||
|
||||
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
|
||||
@component.output_types(replies=List[str], meta=List[Dict[str, Any]])
|
||||
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
||||
"""
|
||||
Invoke the text generation inference for the given prompt and generation parameters.
|
||||
@ -204,15 +204,15 @@ class HuggingFaceTGIGenerator:
|
||||
chunks.append(stream_chunk)
|
||||
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
||||
metadata = {
|
||||
"finish_reason": chunks[-1].metadata.get("finish_reason", None),
|
||||
"finish_reason": chunks[-1].meta.get("finish_reason", None),
|
||||
"model": self.client.model,
|
||||
"usage": {
|
||||
"completion_tokens": chunks[-1].metadata.get("generated_tokens", 0),
|
||||
"completion_tokens": chunks[-1].meta.get("generated_tokens", 0),
|
||||
"prompt_tokens": prompt_token_count,
|
||||
"total_tokens": prompt_token_count + chunks[-1].metadata.get("generated_tokens", 0),
|
||||
"total_tokens": prompt_token_count + chunks[-1].meta.get("generated_tokens", 0),
|
||||
},
|
||||
}
|
||||
return {"replies": ["".join([chunk.content for chunk in chunks])], "metadata": [metadata]}
|
||||
return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}
|
||||
|
||||
def _run_non_streaming(
|
||||
self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any]
|
||||
@ -234,4 +234,4 @@ class HuggingFaceTGIGenerator:
|
||||
}
|
||||
)
|
||||
responses.append(tgr.generated_text)
|
||||
return {"replies": responses, "metadata": all_metadata}
|
||||
return {"replies": responses, "meta": all_metadata}
|
||||
|
||||
@ -37,7 +37,7 @@ class GPTGenerator:
|
||||
|
||||
>> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
|
||||
>> the interaction between computers and human language. It involves enabling computers to understand, interpret,
|
||||
>> and respond to natural human language in a way that is both meaningful and useful.'], 'metadata': [{'model':
|
||||
>> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model':
|
||||
>> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
|
||||
>> 'completion_tokens': 49, 'total_tokens': 65}}]}
|
||||
```
|
||||
@ -146,7 +146,7 @@ class GPTGenerator:
|
||||
data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
|
||||
return default_from_dict(cls, data)
|
||||
|
||||
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
|
||||
@component.output_types(replies=List[str], meta=List[Dict[str, Any]])
|
||||
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
||||
"""
|
||||
Invoke the text generation inference based on the provided messages and generation parameters.
|
||||
@ -200,7 +200,7 @@ class GPTGenerator:
|
||||
|
||||
return {
|
||||
"replies": [message.content for message in completions],
|
||||
"metadata": [message.metadata for message in completions],
|
||||
"meta": [message.meta for message in completions],
|
||||
}
|
||||
|
||||
def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
|
||||
@ -222,7 +222,7 @@ class GPTGenerator:
|
||||
Connects the streaming chunks into a single ChatMessage.
|
||||
"""
|
||||
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
||||
complete_response.metadata.update(
|
||||
complete_response.meta.update(
|
||||
{
|
||||
"model": chunk.model,
|
||||
"index": 0,
|
||||
@ -242,7 +242,7 @@ class GPTGenerator:
|
||||
message: OpenAIObject = choice.message
|
||||
content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content
|
||||
chat_message = ChatMessage.from_assistant(content)
|
||||
chat_message.metadata.update(
|
||||
chat_message.meta.update(
|
||||
{
|
||||
"model": completion.model,
|
||||
"index": choice.index,
|
||||
@ -267,9 +267,7 @@ class GPTGenerator:
|
||||
else:
|
||||
content = ""
|
||||
chunk_message = StreamingChunk(content)
|
||||
chunk_message.metadata.update(
|
||||
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
|
||||
)
|
||||
chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
|
||||
return chunk_message
|
||||
|
||||
def _check_finish_reason(self, message: ChatMessage) -> None:
|
||||
@ -278,13 +276,13 @@ class GPTGenerator:
|
||||
If the `finish_reason` is `length`, log a warning to the user.
|
||||
:param message: The message returned by the LLM.
|
||||
"""
|
||||
if message.metadata["finish_reason"] == "length":
|
||||
if message.meta["finish_reason"] == "length":
|
||||
logger.warning(
|
||||
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
||||
"Increase the max_tokens parameter to allow for longer completions.",
|
||||
message.metadata["index"],
|
||||
message.meta["index"],
|
||||
)
|
||||
if message.metadata["finish_reason"] == "content_filter":
|
||||
if message.meta["finish_reason"] == "content_filter":
|
||||
logger.warning(
|
||||
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
|
||||
"The completion for index %s has been truncated due to the content filter.", message.meta["index"]
|
||||
)
|
||||
|
||||
@ -58,7 +58,7 @@ class FileTypeRouter:
|
||||
if isinstance(source, Path):
|
||||
mime_type = self.get_mime_type(source)
|
||||
elif isinstance(source, ByteStream):
|
||||
mime_type = source.metadata.get("content_type")
|
||||
mime_type = source.meta.get("content_type")
|
||||
else:
|
||||
raise ValueError(f"Unsupported data source type: {type(source)}")
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ class ByteStream:
|
||||
"""
|
||||
|
||||
data: bytes
|
||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
mime_type: Optional[str] = field(default=None)
|
||||
|
||||
def to_file(self, destination_path: Path):
|
||||
|
||||
@ -20,13 +20,13 @@ class ChatMessage:
|
||||
:param content: The text content of the message.
|
||||
:param role: The role of the entity sending the message.
|
||||
:param name: The name of the function being called (only applicable for role FUNCTION).
|
||||
:param metadata: Additional metadata associated with the message.
|
||||
:param meta: Additional metadata associated with the message.
|
||||
"""
|
||||
|
||||
content: str
|
||||
role: ChatRole
|
||||
name: Optional[str]
|
||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
|
||||
def is_from(self, role: ChatRole) -> bool:
|
||||
"""
|
||||
@ -38,15 +38,15 @@ class ChatMessage:
|
||||
return self.role == role
|
||||
|
||||
@classmethod
|
||||
def from_assistant(cls, content: str, metadata: Optional[Dict[str, Any]] = None) -> "ChatMessage":
|
||||
def from_assistant(cls, content: str, meta: Optional[Dict[str, Any]] = None) -> "ChatMessage":
|
||||
"""
|
||||
Create a message from the assistant.
|
||||
|
||||
:param content: The text content of the message.
|
||||
:param metadata: Additional metadata associated with the message.
|
||||
:param meta: Additional metadata associated with the message.
|
||||
:return: A new ChatMessage instance.
|
||||
"""
|
||||
return cls(content, ChatRole.ASSISTANT, None, metadata or {})
|
||||
return cls(content, ChatRole.ASSISTANT, None, meta or {})
|
||||
|
||||
@classmethod
|
||||
def from_user(cls, content: str) -> "ChatMessage":
|
||||
|
||||
@ -10,8 +10,8 @@ class StreamingChunk:
|
||||
streamed data in a systematic manner.
|
||||
|
||||
:param content: The content of the message chunk as a string.
|
||||
:param metadata: A dictionary containing metadata related to the message chunk.
|
||||
:param meta: A dictionary containing metadata related to the message chunk.
|
||||
"""
|
||||
|
||||
content: str
|
||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||
|
||||
@ -67,7 +67,7 @@ class _RAGPipeline:
|
||||
self.pipeline.connect("retriever", "prompt_builder.documents")
|
||||
self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
|
||||
self.pipeline.connect("llm.replies", "answer_builder.replies")
|
||||
self.pipeline.connect("llm.metadata", "answer_builder.metadata")
|
||||
self.pipeline.connect("llm.meta", "answer_builder.meta")
|
||||
self.pipeline.connect("retriever", "answer_builder.documents")
|
||||
|
||||
def run(self, query: str) -> Answer:
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
Rename all metadata references to meta.
|
||||
@ -125,7 +125,7 @@ class TestLocalWhisperTranscriber:
|
||||
}
|
||||
path = SAMPLES_PATH / "audio" / "this is the content of the document.wav"
|
||||
bs = ByteStream.from_file_path(path)
|
||||
bs.metadata["file_path"] = path
|
||||
bs.meta["file_path"] = path
|
||||
results = comp.transcribe(sources=[bs])
|
||||
expected = Document(
|
||||
content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]}
|
||||
|
||||
@ -210,7 +210,7 @@ class TestRemoteWhisperTranscriber:
|
||||
transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
|
||||
with open(file_path, "rb") as audio_stream:
|
||||
byte_stream = audio_stream.read()
|
||||
audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
|
||||
audio_file = ByteStream(byte_stream, meta={"file_path": str(file_path.absolute())})
|
||||
|
||||
result = transcriber.run(sources=[audio_file])
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ class TestAnswerBuilder:
|
||||
def test_run_unmatching_input_len(self):
|
||||
component = AnswerBuilder()
|
||||
with pytest.raises(ValueError):
|
||||
component.run(query="query", replies=["reply1"], metadata=[{"test": "meta"}, {"test": "meta2"}])
|
||||
component.run(query="query", replies=["reply1"], meta=[{"test": "meta"}, {"test": "meta2"}])
|
||||
|
||||
def test_run_without_meta(self):
|
||||
component = AnswerBuilder()
|
||||
@ -24,7 +24,7 @@ class TestAnswerBuilder:
|
||||
|
||||
def test_run_meta_is_an_empty_list(self):
|
||||
component = AnswerBuilder()
|
||||
output = component.run(query="query", replies=["reply1"], metadata=[])
|
||||
output = component.run(query="query", replies=["reply1"], meta=[])
|
||||
answers = output["answers"]
|
||||
assert answers[0].data == "reply1"
|
||||
assert answers[0].meta == {}
|
||||
@ -34,7 +34,7 @@ class TestAnswerBuilder:
|
||||
|
||||
def test_run_without_pattern(self):
|
||||
component = AnswerBuilder()
|
||||
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
|
||||
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 1
|
||||
assert answers[0].data == "Answer: AnswerString"
|
||||
@ -45,7 +45,7 @@ class TestAnswerBuilder:
|
||||
|
||||
def test_run_with_pattern_with_capturing_group(self):
|
||||
component = AnswerBuilder(pattern=r"Answer: (.*)")
|
||||
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
|
||||
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 1
|
||||
assert answers[0].data == "AnswerString"
|
||||
@ -56,7 +56,7 @@ class TestAnswerBuilder:
|
||||
|
||||
def test_run_with_pattern_without_capturing_group(self):
|
||||
component = AnswerBuilder(pattern=r"'.*'")
|
||||
output = component.run(query="test query", replies=["Answer: 'AnswerString'"], metadata=[{}])
|
||||
output = component.run(query="test query", replies=["Answer: 'AnswerString'"], meta=[{}])
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 1
|
||||
assert answers[0].data == "'AnswerString'"
|
||||
@ -71,9 +71,7 @@ class TestAnswerBuilder:
|
||||
|
||||
def test_run_with_pattern_set_at_runtime(self):
|
||||
component = AnswerBuilder(pattern="unused pattern")
|
||||
output = component.run(
|
||||
query="test query", replies=["Answer: AnswerString"], metadata=[{}], pattern=r"Answer: (.*)"
|
||||
)
|
||||
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}], pattern=r"Answer: (.*)")
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 1
|
||||
assert answers[0].data == "AnswerString"
|
||||
@ -87,7 +85,7 @@ class TestAnswerBuilder:
|
||||
output = component.run(
|
||||
query="test query",
|
||||
replies=["Answer: AnswerString"],
|
||||
metadata=[{}],
|
||||
meta=[{}],
|
||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||
)
|
||||
answers = output["answers"]
|
||||
@ -104,7 +102,7 @@ class TestAnswerBuilder:
|
||||
output = component.run(
|
||||
query="test query",
|
||||
replies=["Answer: AnswerString[2]"],
|
||||
metadata=[{}],
|
||||
meta=[{}],
|
||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||
)
|
||||
answers = output["answers"]
|
||||
@ -121,7 +119,7 @@ class TestAnswerBuilder:
|
||||
output = component.run(
|
||||
query="test query",
|
||||
replies=["Answer: AnswerString[3]"],
|
||||
metadata=[{}],
|
||||
meta=[{}],
|
||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||
)
|
||||
answers = output["answers"]
|
||||
@ -137,7 +135,7 @@ class TestAnswerBuilder:
|
||||
output = component.run(
|
||||
query="test query",
|
||||
replies=["Answer: AnswerString[2][3]"],
|
||||
metadata=[{}],
|
||||
meta=[{}],
|
||||
documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")],
|
||||
reference_pattern="\\[(\\d+)\\]",
|
||||
)
|
||||
|
||||
@ -45,7 +45,7 @@ class TestAzureOCRDocumentConverter:
|
||||
}
|
||||
|
||||
def test_run_with_meta(self):
|
||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
||||
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||
|
||||
with patch("haystack.components.converters.azure.DocumentAnalysisClient"):
|
||||
component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")
|
||||
|
||||
@ -63,7 +63,7 @@ class TestHTMLToDocument:
|
||||
converter = HTMLToDocument()
|
||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||
byte_stream = file.read()
|
||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
|
||||
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
|
||||
|
||||
results = converter.run(sources=[stream])
|
||||
docs = results["documents"]
|
||||
@ -81,7 +81,7 @@ class TestHTMLToDocument:
|
||||
converter = HTMLToDocument()
|
||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||
byte_stream = file.read()
|
||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
|
||||
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
|
||||
|
||||
metadata = [{"file_name": "what_is_haystack.html"}]
|
||||
results = converter.run(sources=[stream], meta=metadata)
|
||||
@ -103,7 +103,7 @@ class TestHTMLToDocument:
|
||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||
byte_stream = file.read()
|
||||
# ByteStream has "url" present in metadata
|
||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url_correct"})
|
||||
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url_correct"})
|
||||
|
||||
# "url" supplied by the user overwrites value present in metadata
|
||||
metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}]
|
||||
|
||||
@ -32,7 +32,7 @@ class TestMarkdownToDocument:
|
||||
assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content
|
||||
|
||||
def test_run_with_meta(self):
|
||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
||||
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||
|
||||
converter = MarkdownToDocument()
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ class TestPyPDFToDocument:
|
||||
assert "ReAct" in docs[0].content
|
||||
|
||||
def test_run_with_meta(self):
|
||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
||||
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||
|
||||
converter = PyPDFToDocument()
|
||||
with patch("haystack.components.converters.pypdf.PdfReader"):
|
||||
|
||||
@ -14,8 +14,8 @@ class TestTextfileToDocument:
|
||||
Test if the component runs correctly.
|
||||
"""
|
||||
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt")
|
||||
bytestream.metadata["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
|
||||
bytestream.metadata["key"] = "value"
|
||||
bytestream.meta["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
|
||||
bytestream.meta["key"] = "value"
|
||||
files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream]
|
||||
converter = TextFileToDocument()
|
||||
output = converter.run(sources=files)
|
||||
@ -26,7 +26,7 @@ class TestTextfileToDocument:
|
||||
assert "That's yet another file!" in docs[2].content
|
||||
assert docs[0].meta["file_path"] == str(files[0])
|
||||
assert docs[1].meta["file_path"] == str(files[1])
|
||||
assert docs[2].meta == bytestream.metadata
|
||||
assert docs[2].meta == bytestream.meta
|
||||
|
||||
def test_run_error_handling(self, test_files_path, caplog):
|
||||
"""
|
||||
@ -47,18 +47,18 @@ class TestTextfileToDocument:
|
||||
Test if the encoding metadata field is used properly
|
||||
"""
|
||||
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt")
|
||||
bytestream.metadata["key"] = "value"
|
||||
bytestream.meta["key"] = "value"
|
||||
|
||||
converter = TextFileToDocument(encoding="utf-16")
|
||||
output = converter.run(sources=[bytestream])
|
||||
assert "Some text for testing." not in output["documents"][0].content
|
||||
|
||||
bytestream.metadata["encoding"] = "utf-8"
|
||||
bytestream.meta["encoding"] = "utf-8"
|
||||
output = converter.run(sources=[bytestream])
|
||||
assert "Some text for testing." in output["documents"][0].content
|
||||
|
||||
def test_run_with_meta(self):
|
||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
||||
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||
|
||||
converter = TextFileToDocument()
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ class TestTikaDocumentConverter:
|
||||
assert documents[0].content == "Content of mock source"
|
||||
|
||||
def test_run_with_meta(self):
|
||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
||||
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||
|
||||
converter = TikaDocumentConverter()
|
||||
with patch("haystack.components.converters.tika.tika_parser.from_buffer"):
|
||||
|
||||
@ -67,7 +67,7 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||
first_stream = streams[0]
|
||||
assert first_stream.data == correct_response
|
||||
assert first_stream.metadata["content_type"] == "text/plain"
|
||||
assert first_stream.meta["content_type"] == "text/plain"
|
||||
|
||||
def test_run_html(self):
|
||||
correct_response = b"<h1>Example test response</h1>"
|
||||
@ -79,7 +79,7 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||
first_stream = streams[0]
|
||||
assert first_stream.data == correct_response
|
||||
assert first_stream.metadata["content_type"] == "text/html"
|
||||
assert first_stream.meta["content_type"] == "text/html"
|
||||
|
||||
def test_run_binary(self, test_files_path):
|
||||
file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read()
|
||||
@ -91,7 +91,7 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||
first_stream = streams[0]
|
||||
assert first_stream.data == file_bytes
|
||||
assert first_stream.metadata["content_type"] == "application/pdf"
|
||||
assert first_stream.meta["content_type"] == "application/pdf"
|
||||
|
||||
def test_run_bad_status_code(self):
|
||||
empty_byte_stream = b""
|
||||
@ -105,7 +105,7 @@ class TestLinkContentFetcher:
|
||||
assert len(streams) == 1
|
||||
first_stream = streams[0]
|
||||
assert first_stream.data == empty_byte_stream
|
||||
assert first_stream.metadata["content_type"] == "text/html"
|
||||
assert first_stream.meta["content_type"] == "text/html"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_link_content_fetcher_html(self):
|
||||
@ -113,8 +113,8 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run([HTML_URL])["streams"]
|
||||
first_stream = streams[0]
|
||||
assert "Haystack" in first_stream.data.decode("utf-8")
|
||||
assert first_stream.metadata["content_type"] == "text/html"
|
||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == HTML_URL
|
||||
assert first_stream.meta["content_type"] == "text/html"
|
||||
assert "url" in first_stream.meta and first_stream.meta["url"] == HTML_URL
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_link_content_fetcher_text(self):
|
||||
@ -122,8 +122,8 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run([TEXT_URL])["streams"]
|
||||
first_stream = streams[0]
|
||||
assert "Haystack" in first_stream.data.decode("utf-8")
|
||||
assert first_stream.metadata["content_type"] == "text/plain"
|
||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == TEXT_URL
|
||||
assert first_stream.meta["content_type"] == "text/plain"
|
||||
assert "url" in first_stream.meta and first_stream.meta["url"] == TEXT_URL
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_link_content_fetcher_pdf(self):
|
||||
@ -131,8 +131,8 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run([PDF_URL])["streams"]
|
||||
assert len(streams) == 1
|
||||
first_stream = streams[0]
|
||||
assert first_stream.metadata["content_type"] in ("application/octet-stream", "application/pdf")
|
||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == PDF_URL
|
||||
assert first_stream.meta["content_type"] in ("application/octet-stream", "application/pdf")
|
||||
assert "url" in first_stream.meta and first_stream.meta["url"] == PDF_URL
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_link_content_fetcher_multiple_different_content_types(self):
|
||||
@ -143,10 +143,10 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run([PDF_URL, HTML_URL])["streams"]
|
||||
assert len(streams) == 2
|
||||
for stream in streams:
|
||||
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||
if stream.metadata["content_type"] == "text/html":
|
||||
assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||
if stream.meta["content_type"] == "text/html":
|
||||
assert "Haystack" in stream.data.decode("utf-8")
|
||||
elif stream.metadata["content_type"] == "application/pdf":
|
||||
elif stream.meta["content_type"] == "application/pdf":
|
||||
assert len(stream.data) > 0
|
||||
|
||||
@pytest.mark.integration
|
||||
@ -160,10 +160,10 @@ class TestLinkContentFetcher:
|
||||
streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"]
|
||||
assert len(streams) == 3
|
||||
for stream in streams:
|
||||
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||
if stream.metadata["content_type"] == "text/html":
|
||||
assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||
if stream.meta["content_type"] == "text/html":
|
||||
assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8")
|
||||
elif stream.metadata["content_type"] == "application/pdf":
|
||||
elif stream.meta["content_type"] == "application/pdf":
|
||||
assert len(stream.data) > 0
|
||||
|
||||
@pytest.mark.integration
|
||||
@ -177,7 +177,7 @@ class TestLinkContentFetcher:
|
||||
result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"])
|
||||
assert len(result["streams"]) == 1
|
||||
first_stream = result["streams"][0]
|
||||
assert first_stream.metadata["content_type"] == "text/html"
|
||||
assert first_stream.meta["content_type"] == "text/html"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_bad_request_exception_raised(self):
|
||||
|
||||
@ -241,7 +241,7 @@ class TestGPTChatGenerator:
|
||||
component = GPTChatGenerator(api_key="test-api-key")
|
||||
messages = [
|
||||
ChatMessage.from_assistant(
|
||||
"", metadata={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
||||
"", meta={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
||||
)
|
||||
for i, _ in enumerate(range(4))
|
||||
]
|
||||
|
||||
@ -124,11 +124,11 @@ class TestHuggingFaceTGIGenerator:
|
||||
|
||||
assert isinstance(response, dict)
|
||||
assert "replies" in response
|
||||
assert "metadata" in response
|
||||
assert "meta" in response
|
||||
assert isinstance(response["replies"], list)
|
||||
assert isinstance(response["metadata"], list)
|
||||
assert isinstance(response["meta"], list)
|
||||
assert len(response["replies"]) == 1
|
||||
assert len(response["metadata"]) == 1
|
||||
assert len(response["meta"]) == 1
|
||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||
|
||||
def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters(
|
||||
@ -157,14 +157,14 @@ class TestHuggingFaceTGIGenerator:
|
||||
|
||||
assert isinstance(response, dict)
|
||||
assert "replies" in response
|
||||
assert "metadata" in response
|
||||
assert "meta" in response
|
||||
assert isinstance(response["replies"], list)
|
||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||
|
||||
assert isinstance(response["metadata"], list)
|
||||
assert isinstance(response["meta"], list)
|
||||
assert len(response["replies"]) == 3
|
||||
assert len(response["metadata"]) == 3
|
||||
assert [isinstance(reply, dict) for reply in response["metadata"]]
|
||||
assert len(response["meta"]) == 3
|
||||
assert [isinstance(reply, dict) for reply in response["meta"]]
|
||||
|
||||
def test_initialize_with_invalid_model(self, mock_check_valid_model):
|
||||
model = "invalid_model"
|
||||
@ -200,9 +200,9 @@ class TestHuggingFaceTGIGenerator:
|
||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||
|
||||
# Assert that the response contains the metadata
|
||||
assert "metadata" in response
|
||||
assert isinstance(response["metadata"], list)
|
||||
assert len(response["metadata"]) > 0
|
||||
assert "meta" in response
|
||||
assert isinstance(response["meta"], list)
|
||||
assert len(response["meta"]) > 0
|
||||
assert [isinstance(reply, dict) for reply in response["replies"]]
|
||||
|
||||
def test_generate_text_with_custom_generation_parameters(
|
||||
@ -226,9 +226,9 @@ class TestHuggingFaceTGIGenerator:
|
||||
assert response["replies"][0] == "I'm fine, thanks."
|
||||
|
||||
# Assert that the response contains the metadata
|
||||
assert "metadata" in response
|
||||
assert isinstance(response["metadata"], list)
|
||||
assert len(response["metadata"]) > 0
|
||||
assert "meta" in response
|
||||
assert isinstance(response["meta"], list)
|
||||
assert len(response["meta"]) > 0
|
||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||
|
||||
def test_generate_text_with_streaming_callback(
|
||||
@ -278,7 +278,7 @@ class TestHuggingFaceTGIGenerator:
|
||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||
|
||||
# Assert that the response contains the metadata
|
||||
assert "metadata" in response
|
||||
assert isinstance(response["metadata"], list)
|
||||
assert len(response["metadata"]) > 0
|
||||
assert "meta" in response
|
||||
assert isinstance(response["meta"], list)
|
||||
assert len(response["meta"]) > 0
|
||||
assert [isinstance(reply, dict) for reply in response["replies"]]
|
||||
|
||||
@ -242,7 +242,7 @@ class TestGPTGenerator:
|
||||
for i, _ in enumerate(range(4)):
|
||||
message = ChatMessage.from_assistant("Hello")
|
||||
metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
||||
message.metadata.update(metadata)
|
||||
message.meta.update(metadata)
|
||||
messages.append(message)
|
||||
|
||||
for m in messages:
|
||||
|
||||
@ -46,13 +46,13 @@ class TestFileTypeRouter:
|
||||
for path, mime_type in zip(file_paths, mime_types):
|
||||
stream = ByteStream(path.read_bytes())
|
||||
|
||||
stream.metadata["content_type"] = mime_type
|
||||
stream.meta["content_type"] = mime_type
|
||||
|
||||
byte_streams.append(stream)
|
||||
|
||||
# add unclassified ByteStream
|
||||
bs = ByteStream(b"unclassified content")
|
||||
bs.metadata["content_type"] = "unknown_type"
|
||||
bs.meta["content_type"] = "unknown_type"
|
||||
byte_streams.append(bs)
|
||||
|
||||
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
|
||||
@ -75,7 +75,7 @@ class TestFileTypeRouter:
|
||||
byte_stream_sources = []
|
||||
for path, mime_type in zip(file_paths, mime_types):
|
||||
stream = ByteStream(path.read_bytes())
|
||||
stream.metadata["content_type"] = mime_type
|
||||
stream.meta["content_type"] = mime_type
|
||||
byte_stream_sources.append(stream)
|
||||
|
||||
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
||||
|
||||
@ -4,25 +4,25 @@ from haystack.dataclasses import StreamingChunk
|
||||
|
||||
|
||||
def test_create_chunk_with_content_and_metadata():
|
||||
chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
|
||||
chunk = StreamingChunk(content="Test content", meta={"key": "value"})
|
||||
|
||||
assert chunk.content == "Test content"
|
||||
assert chunk.metadata == {"key": "value"}
|
||||
assert chunk.meta == {"key": "value"}
|
||||
|
||||
|
||||
def test_create_chunk_with_only_content():
|
||||
chunk = StreamingChunk(content="Test content")
|
||||
|
||||
assert chunk.content == "Test content"
|
||||
assert chunk.metadata == {}
|
||||
assert chunk.meta == {}
|
||||
|
||||
|
||||
def test_access_content():
|
||||
chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
|
||||
chunk = StreamingChunk(content="Test content", meta={"key": "value"})
|
||||
assert chunk.content == "Test content"
|
||||
|
||||
|
||||
def test_create_chunk_with_empty_content():
|
||||
chunk = StreamingChunk(content="")
|
||||
assert chunk.content == ""
|
||||
assert chunk.metadata == {}
|
||||
assert chunk.meta == {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user