mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-18 10:38:36 +00:00
changed metadata to meta (#6605)
This commit is contained in:
parent
fc88ef7076
commit
3d17e6ff76
@ -134,16 +134,16 @@ class LocalWhisperTranscriber:
|
|||||||
if not isinstance(source, ByteStream):
|
if not isinstance(source, ByteStream):
|
||||||
path = Path(source)
|
path = Path(source)
|
||||||
source = ByteStream.from_file_path(path)
|
source = ByteStream.from_file_path(path)
|
||||||
source.metadata["file_path"] = path
|
source.meta["file_path"] = path
|
||||||
else:
|
else:
|
||||||
# If we received a ByteStream instance that doesn't have the "file_path" metadata set,
|
# If we received a ByteStream instance that doesn't have the "file_path" metadata set,
|
||||||
# we dump the bytes into a temporary file.
|
# we dump the bytes into a temporary file.
|
||||||
path = source.metadata.get("file_path")
|
path = source.meta.get("file_path")
|
||||||
if path is None:
|
if path is None:
|
||||||
fp = tempfile.NamedTemporaryFile(delete=False)
|
fp = tempfile.NamedTemporaryFile(delete=False)
|
||||||
path = Path(fp.name)
|
path = Path(fp.name)
|
||||||
source.to_file(path)
|
source.to_file(path)
|
||||||
source.metadata["file_path"] = path
|
source.meta["file_path"] = path
|
||||||
|
|
||||||
transcription = self._model.transcribe(str(path), **kwargs)
|
transcription = self._model.transcribe(str(path), **kwargs)
|
||||||
if not return_segments:
|
if not return_segments:
|
||||||
|
|||||||
@ -129,13 +129,13 @@ class RemoteWhisperTranscriber:
|
|||||||
if not isinstance(source, ByteStream):
|
if not isinstance(source, ByteStream):
|
||||||
path = source
|
path = source
|
||||||
source = ByteStream.from_file_path(Path(source))
|
source = ByteStream.from_file_path(Path(source))
|
||||||
source.metadata["file_path"] = path
|
source.meta["file_path"] = path
|
||||||
|
|
||||||
file = io.BytesIO(source.data)
|
file = io.BytesIO(source.data)
|
||||||
file.name = str(source.metadata["file_path"]) if "file_path" in source.metadata else "__fallback__.wav"
|
file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"
|
||||||
|
|
||||||
content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
|
content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
|
||||||
doc = Document(content=content["text"], meta=source.metadata)
|
doc = Document(content=content["text"], meta=source.meta)
|
||||||
documents.append(doc)
|
documents.append(doc)
|
||||||
|
|
||||||
return {"documents": documents}
|
return {"documents": documents}
|
||||||
|
|||||||
@ -42,7 +42,7 @@ class AnswerBuilder:
|
|||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
replies: List[str],
|
replies: List[str],
|
||||||
metadata: Optional[List[Dict[str, Any]]] = None,
|
meta: Optional[List[Dict[str, Any]]] = None,
|
||||||
documents: Optional[List[Document]] = None,
|
documents: Optional[List[Document]] = None,
|
||||||
pattern: Optional[str] = None,
|
pattern: Optional[str] = None,
|
||||||
reference_pattern: Optional[str] = None,
|
reference_pattern: Optional[str] = None,
|
||||||
@ -52,7 +52,7 @@ class AnswerBuilder:
|
|||||||
|
|
||||||
:param query: The query used in the prompts for the Generator as a string.
|
:param query: The query used in the prompts for the Generator as a string.
|
||||||
:param replies: The output of the Generator. A list of strings.
|
:param replies: The output of the Generator. A list of strings.
|
||||||
:param metadata: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
|
:param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified,
|
||||||
the generated answer will contain no metadata.
|
the generated answer will contain no metadata.
|
||||||
:param documents: The documents used as input to the Generator. A list of `Document` objects. If
|
:param documents: The documents used as input to the Generator. A list of `Document` objects. If
|
||||||
`documents` are specified, they are added to the `Answer` objects.
|
`documents` are specified, they are added to the `Answer` objects.
|
||||||
@ -74,10 +74,10 @@ class AnswerBuilder:
|
|||||||
If not specified, no parsing is done, and all documents are referenced.
|
If not specified, no parsing is done, and all documents are referenced.
|
||||||
Default: `None`.
|
Default: `None`.
|
||||||
"""
|
"""
|
||||||
if not metadata:
|
if not meta:
|
||||||
metadata = [{}] * len(replies)
|
meta = [{}] * len(replies)
|
||||||
elif len(replies) != len(metadata):
|
elif len(replies) != len(meta):
|
||||||
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(metadata)}) must match.")
|
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
|
||||||
|
|
||||||
if pattern:
|
if pattern:
|
||||||
AnswerBuilder._check_num_groups_in_regex(pattern)
|
AnswerBuilder._check_num_groups_in_regex(pattern)
|
||||||
@ -86,7 +86,7 @@ class AnswerBuilder:
|
|||||||
reference_pattern = reference_pattern or self.reference_pattern
|
reference_pattern = reference_pattern or self.reference_pattern
|
||||||
|
|
||||||
all_answers = []
|
all_answers = []
|
||||||
for reply, meta in zip(replies, metadata):
|
for reply, metadata in zip(replies, meta):
|
||||||
referenced_docs = []
|
referenced_docs = []
|
||||||
if documents:
|
if documents:
|
||||||
reference_idxs = []
|
reference_idxs = []
|
||||||
@ -102,7 +102,7 @@ class AnswerBuilder:
|
|||||||
logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
|
logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
|
||||||
|
|
||||||
answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
|
answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
|
||||||
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=meta)
|
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata)
|
||||||
all_answers.append(answer)
|
all_answers.append(answer)
|
||||||
|
|
||||||
return {"answers": all_answers}
|
return {"answers": all_answers}
|
||||||
|
|||||||
@ -53,7 +53,7 @@ class DynamicPromptBuilder:
|
|||||||
|
|
||||||
>> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
|
>> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
|
||||||
and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic
|
and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic
|
||||||
capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
|
capital of Germany!", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
|
||||||
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}}
|
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}}
|
||||||
|
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ class DynamicPromptBuilder:
|
|||||||
print(res)
|
print(res)
|
||||||
>> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5
|
>> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5
|
||||||
days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates
|
days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates
|
||||||
closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, metadata={'model': 'gpt-3.5-turbo-0613',
|
closer to your visit.", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-3.5-turbo-0613',
|
||||||
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}}
|
'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}}
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -126,7 +126,7 @@ class DynamicPromptBuilder:
|
|||||||
"template_variables":{"query": "who's making a greeting?"}}})
|
"template_variables":{"query": "who's making a greeting?"}}})
|
||||||
|
|
||||||
>> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
>> {'llm': {'replies': [ChatMessage(content='Haystack', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
||||||
>> metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
|
>> meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage':
|
||||||
>> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}}
|
>> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ class DynamicPromptBuilder:
|
|||||||
"template_variables":{"query": "Where does the speaker live?"}}})
|
"template_variables":{"query": "Where does the speaker live?"}}})
|
||||||
|
|
||||||
>> {'llm': {'replies': ['The speaker lives in Berlin.'],
|
>> {'llm': {'replies': ['The speaker lives in Berlin.'],
|
||||||
>> 'metadata': [{'model': 'gpt-3.5-turbo-0613',
|
>> 'meta': [{'model': 'gpt-3.5-turbo-0613',
|
||||||
>> 'index': 0,
|
>> 'index': 0,
|
||||||
>> 'finish_reason': 'stop',
|
>> 'finish_reason': 'stop',
|
||||||
>> 'usage': {'prompt_tokens': 28,
|
>> 'usage': {'prompt_tokens': 28,
|
||||||
|
|||||||
@ -104,11 +104,11 @@ class AzureOCRDocumentConverter:
|
|||||||
azure_output.append(result.to_dict())
|
azure_output.append(result.to_dict())
|
||||||
|
|
||||||
file_suffix = None
|
file_suffix = None
|
||||||
if "file_path" in bytestream.metadata:
|
if "file_path" in bytestream.meta:
|
||||||
file_suffix = Path(bytestream.metadata["file_path"]).suffix
|
file_suffix = Path(bytestream.meta["file_path"]).suffix
|
||||||
|
|
||||||
document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix)
|
document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix)
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document.meta = merged_metadata
|
document.meta = merged_metadata
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|||||||
@ -83,7 +83,7 @@ class HTMLToDocument:
|
|||||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document = Document(content=text, meta=merged_metadata)
|
document = Document(content=text, meta=merged_metadata)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|||||||
@ -83,7 +83,7 @@ class MarkdownToDocument:
|
|||||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document = Document(content=text, meta=merged_metadata)
|
document = Document(content=text, meta=merged_metadata)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|||||||
@ -111,7 +111,7 @@ class PyPDFToDocument:
|
|||||||
logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
|
logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document.meta = merged_metadata
|
document.meta = merged_metadata
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|||||||
@ -77,7 +77,7 @@ class TikaDocumentConverter:
|
|||||||
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document = Document(content=text, meta=merged_metadata)
|
document = Document(content=text, meta=merged_metadata)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
return {"documents": documents}
|
return {"documents": documents}
|
||||||
|
|||||||
@ -63,13 +63,13 @@ class TextFileToDocument:
|
|||||||
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
|
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
encoding = bytestream.metadata.get("encoding", self.encoding)
|
encoding = bytestream.meta.get("encoding", self.encoding)
|
||||||
text = bytestream.data.decode(encoding)
|
text = bytestream.data.decode(encoding)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e)
|
logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
merged_metadata = {**bytestream.metadata, **metadata}
|
merged_metadata = {**bytestream.meta, **metadata}
|
||||||
document = Document(content=text, meta=merged_metadata)
|
document = Document(content=text, meta=merged_metadata)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|||||||
@ -15,6 +15,6 @@ def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStre
|
|||||||
return source
|
return source
|
||||||
if isinstance(source, (str, Path)):
|
if isinstance(source, (str, Path)):
|
||||||
bs = ByteStream.from_file_path(Path(source))
|
bs = ByteStream.from_file_path(Path(source))
|
||||||
bs.metadata["file_path"] = str(source)
|
bs.meta["file_path"] = str(source)
|
||||||
return bs
|
return bs
|
||||||
raise ValueError(f"Unsupported source type {type(source)}")
|
raise ValueError(f"Unsupported source type {type(source)}")
|
||||||
|
|||||||
@ -118,7 +118,7 @@ class LinkContentFetcher:
|
|||||||
# don't use multithreading if there's only one URL
|
# don't use multithreading if there's only one URL
|
||||||
if len(urls) == 1:
|
if len(urls) == 1:
|
||||||
stream_metadata, stream = self.fetch(urls[0])
|
stream_metadata, stream = self.fetch(urls[0])
|
||||||
stream.metadata.update(stream_metadata)
|
stream.meta.update(stream_metadata)
|
||||||
streams.append(stream)
|
streams.append(stream)
|
||||||
else:
|
else:
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
@ -126,7 +126,7 @@ class LinkContentFetcher:
|
|||||||
|
|
||||||
for stream_metadata, stream in results: # type: ignore
|
for stream_metadata, stream in results: # type: ignore
|
||||||
if stream_metadata is not None and stream is not None:
|
if stream_metadata is not None and stream is not None:
|
||||||
stream.metadata.update(stream_metadata)
|
stream.meta.update(stream_metadata)
|
||||||
streams.append(stream)
|
streams.append(stream)
|
||||||
|
|
||||||
return {"streams": streams}
|
return {"streams": streams}
|
||||||
|
|||||||
@ -241,7 +241,7 @@ class HuggingFaceTGIChatGenerator:
|
|||||||
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
||||||
|
|
||||||
message = ChatMessage.from_assistant(chunk.generated_text)
|
message = ChatMessage.from_assistant(chunk.generated_text)
|
||||||
message.metadata.update(
|
message.meta.update(
|
||||||
{
|
{
|
||||||
"finish_reason": chunk.details.finish_reason.value,
|
"finish_reason": chunk.details.finish_reason.value,
|
||||||
"index": 0,
|
"index": 0,
|
||||||
@ -264,7 +264,7 @@ class HuggingFaceTGIChatGenerator:
|
|||||||
prepared_prompt, details=True, **generation_kwargs
|
prepared_prompt, details=True, **generation_kwargs
|
||||||
)
|
)
|
||||||
message = ChatMessage.from_assistant(tgr.generated_text)
|
message = ChatMessage.from_assistant(tgr.generated_text)
|
||||||
message.metadata.update(
|
message.meta.update(
|
||||||
{
|
{
|
||||||
"finish_reason": tgr.details.finish_reason.value,
|
"finish_reason": tgr.details.finish_reason.value,
|
||||||
"index": _i,
|
"index": _i,
|
||||||
|
|||||||
@ -42,7 +42,7 @@ class GPTChatGenerator:
|
|||||||
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
|
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
|
||||||
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
|
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
|
||||||
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
|
||||||
>>metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
|
>>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
|
||||||
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
|
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -218,7 +218,7 @@ class GPTChatGenerator:
|
|||||||
:param chunks: The list of all chunks returned by the OpenAI API.
|
:param chunks: The list of all chunks returned by the OpenAI API.
|
||||||
"""
|
"""
|
||||||
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
||||||
complete_response.metadata.update(
|
complete_response.meta.update(
|
||||||
{
|
{
|
||||||
"model": chunk.model,
|
"model": chunk.model,
|
||||||
"index": 0,
|
"index": 0,
|
||||||
@ -239,7 +239,7 @@ class GPTChatGenerator:
|
|||||||
# message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str
|
# message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str
|
||||||
content = str(message.function_call) if choice.finish_reason == "function_call" else message.content
|
content = str(message.function_call) if choice.finish_reason == "function_call" else message.content
|
||||||
chat_message = ChatMessage.from_assistant(content)
|
chat_message = ChatMessage.from_assistant(content)
|
||||||
chat_message.metadata.update(
|
chat_message.meta.update(
|
||||||
{
|
{
|
||||||
"model": completion.model,
|
"model": completion.model,
|
||||||
"index": choice.index,
|
"index": choice.index,
|
||||||
@ -264,9 +264,7 @@ class GPTChatGenerator:
|
|||||||
else:
|
else:
|
||||||
content = ""
|
content = ""
|
||||||
chunk_message = StreamingChunk(content)
|
chunk_message = StreamingChunk(content)
|
||||||
chunk_message.metadata.update(
|
chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
|
||||||
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
|
|
||||||
)
|
|
||||||
return chunk_message
|
return chunk_message
|
||||||
|
|
||||||
def _check_finish_reason(self, message: ChatMessage) -> None:
|
def _check_finish_reason(self, message: ChatMessage) -> None:
|
||||||
@ -275,13 +273,13 @@ class GPTChatGenerator:
|
|||||||
If the `finish_reason` is `length` or `content_filter`, log a warning.
|
If the `finish_reason` is `length` or `content_filter`, log a warning.
|
||||||
:param message: The message returned by the LLM.
|
:param message: The message returned by the LLM.
|
||||||
"""
|
"""
|
||||||
if message.metadata["finish_reason"] == "length":
|
if message.meta["finish_reason"] == "length":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
||||||
"Increase the max_tokens parameter to allow for longer completions.",
|
"Increase the max_tokens parameter to allow for longer completions.",
|
||||||
message.metadata["index"],
|
message.meta["index"],
|
||||||
)
|
)
|
||||||
if message.metadata["finish_reason"] == "content_filter":
|
if message.meta["finish_reason"] == "content_filter":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
|
"The completion for index %s has been truncated due to the content filter.", message.meta["index"]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -157,7 +157,7 @@ class HuggingFaceTGIGenerator:
|
|||||||
# Don't send URL as it is sensitive information
|
# Don't send URL as it is sensitive information
|
||||||
return {"model": self.model}
|
return {"model": self.model}
|
||||||
|
|
||||||
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
|
@component.output_types(replies=List[str], meta=List[Dict[str, Any]])
|
||||||
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
||||||
"""
|
"""
|
||||||
Invoke the text generation inference for the given prompt and generation parameters.
|
Invoke the text generation inference for the given prompt and generation parameters.
|
||||||
@ -204,15 +204,15 @@ class HuggingFaceTGIGenerator:
|
|||||||
chunks.append(stream_chunk)
|
chunks.append(stream_chunk)
|
||||||
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
|
||||||
metadata = {
|
metadata = {
|
||||||
"finish_reason": chunks[-1].metadata.get("finish_reason", None),
|
"finish_reason": chunks[-1].meta.get("finish_reason", None),
|
||||||
"model": self.client.model,
|
"model": self.client.model,
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": chunks[-1].metadata.get("generated_tokens", 0),
|
"completion_tokens": chunks[-1].meta.get("generated_tokens", 0),
|
||||||
"prompt_tokens": prompt_token_count,
|
"prompt_tokens": prompt_token_count,
|
||||||
"total_tokens": prompt_token_count + chunks[-1].metadata.get("generated_tokens", 0),
|
"total_tokens": prompt_token_count + chunks[-1].meta.get("generated_tokens", 0),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
return {"replies": ["".join([chunk.content for chunk in chunks])], "metadata": [metadata]}
|
return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}
|
||||||
|
|
||||||
def _run_non_streaming(
|
def _run_non_streaming(
|
||||||
self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any]
|
self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any]
|
||||||
@ -234,4 +234,4 @@ class HuggingFaceTGIGenerator:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
responses.append(tgr.generated_text)
|
responses.append(tgr.generated_text)
|
||||||
return {"replies": responses, "metadata": all_metadata}
|
return {"replies": responses, "meta": all_metadata}
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class GPTGenerator:
|
|||||||
|
|
||||||
>> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
|
>> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
|
||||||
>> the interaction between computers and human language. It involves enabling computers to understand, interpret,
|
>> the interaction between computers and human language. It involves enabling computers to understand, interpret,
|
||||||
>> and respond to natural human language in a way that is both meaningful and useful.'], 'metadata': [{'model':
|
>> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model':
|
||||||
>> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
|
>> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
|
||||||
>> 'completion_tokens': 49, 'total_tokens': 65}}]}
|
>> 'completion_tokens': 49, 'total_tokens': 65}}]}
|
||||||
```
|
```
|
||||||
@ -146,7 +146,7 @@ class GPTGenerator:
|
|||||||
data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
|
data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
|
||||||
return default_from_dict(cls, data)
|
return default_from_dict(cls, data)
|
||||||
|
|
||||||
@component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
|
@component.output_types(replies=List[str], meta=List[Dict[str, Any]])
|
||||||
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
|
||||||
"""
|
"""
|
||||||
Invoke the text generation inference based on the provided messages and generation parameters.
|
Invoke the text generation inference based on the provided messages and generation parameters.
|
||||||
@ -200,7 +200,7 @@ class GPTGenerator:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"replies": [message.content for message in completions],
|
"replies": [message.content for message in completions],
|
||||||
"metadata": [message.metadata for message in completions],
|
"meta": [message.meta for message in completions],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
|
def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
|
||||||
@ -222,7 +222,7 @@ class GPTGenerator:
|
|||||||
Connects the streaming chunks into a single ChatMessage.
|
Connects the streaming chunks into a single ChatMessage.
|
||||||
"""
|
"""
|
||||||
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
|
||||||
complete_response.metadata.update(
|
complete_response.meta.update(
|
||||||
{
|
{
|
||||||
"model": chunk.model,
|
"model": chunk.model,
|
||||||
"index": 0,
|
"index": 0,
|
||||||
@ -242,7 +242,7 @@ class GPTGenerator:
|
|||||||
message: OpenAIObject = choice.message
|
message: OpenAIObject = choice.message
|
||||||
content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content
|
content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content
|
||||||
chat_message = ChatMessage.from_assistant(content)
|
chat_message = ChatMessage.from_assistant(content)
|
||||||
chat_message.metadata.update(
|
chat_message.meta.update(
|
||||||
{
|
{
|
||||||
"model": completion.model,
|
"model": completion.model,
|
||||||
"index": choice.index,
|
"index": choice.index,
|
||||||
@ -267,9 +267,7 @@ class GPTGenerator:
|
|||||||
else:
|
else:
|
||||||
content = ""
|
content = ""
|
||||||
chunk_message = StreamingChunk(content)
|
chunk_message = StreamingChunk(content)
|
||||||
chunk_message.metadata.update(
|
chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
|
||||||
{"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}
|
|
||||||
)
|
|
||||||
return chunk_message
|
return chunk_message
|
||||||
|
|
||||||
def _check_finish_reason(self, message: ChatMessage) -> None:
|
def _check_finish_reason(self, message: ChatMessage) -> None:
|
||||||
@ -278,13 +276,13 @@ class GPTGenerator:
|
|||||||
If the `finish_reason` is `length`, log a warning to the user.
|
If the `finish_reason` is `length`, log a warning to the user.
|
||||||
:param message: The message returned by the LLM.
|
:param message: The message returned by the LLM.
|
||||||
"""
|
"""
|
||||||
if message.metadata["finish_reason"] == "length":
|
if message.meta["finish_reason"] == "length":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
"The completion for index %s has been truncated before reaching a natural stopping point. "
|
||||||
"Increase the max_tokens parameter to allow for longer completions.",
|
"Increase the max_tokens parameter to allow for longer completions.",
|
||||||
message.metadata["index"],
|
message.meta["index"],
|
||||||
)
|
)
|
||||||
if message.metadata["finish_reason"] == "content_filter":
|
if message.meta["finish_reason"] == "content_filter":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The completion for index %s has been truncated due to the content filter.", message.metadata["index"]
|
"The completion for index %s has been truncated due to the content filter.", message.meta["index"]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -58,7 +58,7 @@ class FileTypeRouter:
|
|||||||
if isinstance(source, Path):
|
if isinstance(source, Path):
|
||||||
mime_type = self.get_mime_type(source)
|
mime_type = self.get_mime_type(source)
|
||||||
elif isinstance(source, ByteStream):
|
elif isinstance(source, ByteStream):
|
||||||
mime_type = source.metadata.get("content_type")
|
mime_type = source.meta.get("content_type")
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported data source type: {type(source)}")
|
raise ValueError(f"Unsupported data source type: {type(source)}")
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ class ByteStream:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
data: bytes
|
data: bytes
|
||||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||||
mime_type: Optional[str] = field(default=None)
|
mime_type: Optional[str] = field(default=None)
|
||||||
|
|
||||||
def to_file(self, destination_path: Path):
|
def to_file(self, destination_path: Path):
|
||||||
|
|||||||
@ -20,13 +20,13 @@ class ChatMessage:
|
|||||||
:param content: The text content of the message.
|
:param content: The text content of the message.
|
||||||
:param role: The role of the entity sending the message.
|
:param role: The role of the entity sending the message.
|
||||||
:param name: The name of the function being called (only applicable for role FUNCTION).
|
:param name: The name of the function being called (only applicable for role FUNCTION).
|
||||||
:param metadata: Additional metadata associated with the message.
|
:param meta: Additional metadata associated with the message.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: str
|
content: str
|
||||||
role: ChatRole
|
role: ChatRole
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||||
|
|
||||||
def is_from(self, role: ChatRole) -> bool:
|
def is_from(self, role: ChatRole) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -38,15 +38,15 @@ class ChatMessage:
|
|||||||
return self.role == role
|
return self.role == role
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_assistant(cls, content: str, metadata: Optional[Dict[str, Any]] = None) -> "ChatMessage":
|
def from_assistant(cls, content: str, meta: Optional[Dict[str, Any]] = None) -> "ChatMessage":
|
||||||
"""
|
"""
|
||||||
Create a message from the assistant.
|
Create a message from the assistant.
|
||||||
|
|
||||||
:param content: The text content of the message.
|
:param content: The text content of the message.
|
||||||
:param metadata: Additional metadata associated with the message.
|
:param meta: Additional metadata associated with the message.
|
||||||
:return: A new ChatMessage instance.
|
:return: A new ChatMessage instance.
|
||||||
"""
|
"""
|
||||||
return cls(content, ChatRole.ASSISTANT, None, metadata or {})
|
return cls(content, ChatRole.ASSISTANT, None, meta or {})
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_user(cls, content: str) -> "ChatMessage":
|
def from_user(cls, content: str) -> "ChatMessage":
|
||||||
|
|||||||
@ -10,8 +10,8 @@ class StreamingChunk:
|
|||||||
streamed data in a systematic manner.
|
streamed data in a systematic manner.
|
||||||
|
|
||||||
:param content: The content of the message chunk as a string.
|
:param content: The content of the message chunk as a string.
|
||||||
:param metadata: A dictionary containing metadata related to the message chunk.
|
:param meta: A dictionary containing metadata related to the message chunk.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: str
|
content: str
|
||||||
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)
|
meta: Dict[str, Any] = field(default_factory=dict, hash=False)
|
||||||
|
|||||||
@ -67,7 +67,7 @@ class _RAGPipeline:
|
|||||||
self.pipeline.connect("retriever", "prompt_builder.documents")
|
self.pipeline.connect("retriever", "prompt_builder.documents")
|
||||||
self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
|
self.pipeline.connect("prompt_builder.prompt", "llm.prompt")
|
||||||
self.pipeline.connect("llm.replies", "answer_builder.replies")
|
self.pipeline.connect("llm.replies", "answer_builder.replies")
|
||||||
self.pipeline.connect("llm.metadata", "answer_builder.metadata")
|
self.pipeline.connect("llm.meta", "answer_builder.meta")
|
||||||
self.pipeline.connect("retriever", "answer_builder.documents")
|
self.pipeline.connect("retriever", "answer_builder.documents")
|
||||||
|
|
||||||
def run(self, query: str) -> Answer:
|
def run(self, query: str) -> Answer:
|
||||||
|
|||||||
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
enhancements:
|
||||||
|
- |
|
||||||
|
Rename all metadata references to meta.
|
||||||
@ -125,7 +125,7 @@ class TestLocalWhisperTranscriber:
|
|||||||
}
|
}
|
||||||
path = SAMPLES_PATH / "audio" / "this is the content of the document.wav"
|
path = SAMPLES_PATH / "audio" / "this is the content of the document.wav"
|
||||||
bs = ByteStream.from_file_path(path)
|
bs = ByteStream.from_file_path(path)
|
||||||
bs.metadata["file_path"] = path
|
bs.meta["file_path"] = path
|
||||||
results = comp.transcribe(sources=[bs])
|
results = comp.transcribe(sources=[bs])
|
||||||
expected = Document(
|
expected = Document(
|
||||||
content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]}
|
content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]}
|
||||||
|
|||||||
@ -210,7 +210,7 @@ class TestRemoteWhisperTranscriber:
|
|||||||
transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
|
transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
|
||||||
with open(file_path, "rb") as audio_stream:
|
with open(file_path, "rb") as audio_stream:
|
||||||
byte_stream = audio_stream.read()
|
byte_stream = audio_stream.read()
|
||||||
audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
|
audio_file = ByteStream(byte_stream, meta={"file_path": str(file_path.absolute())})
|
||||||
|
|
||||||
result = transcriber.run(sources=[audio_file])
|
result = transcriber.run(sources=[audio_file])
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ class TestAnswerBuilder:
|
|||||||
def test_run_unmatching_input_len(self):
|
def test_run_unmatching_input_len(self):
|
||||||
component = AnswerBuilder()
|
component = AnswerBuilder()
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
component.run(query="query", replies=["reply1"], metadata=[{"test": "meta"}, {"test": "meta2"}])
|
component.run(query="query", replies=["reply1"], meta=[{"test": "meta"}, {"test": "meta2"}])
|
||||||
|
|
||||||
def test_run_without_meta(self):
|
def test_run_without_meta(self):
|
||||||
component = AnswerBuilder()
|
component = AnswerBuilder()
|
||||||
@ -24,7 +24,7 @@ class TestAnswerBuilder:
|
|||||||
|
|
||||||
def test_run_meta_is_an_empty_list(self):
|
def test_run_meta_is_an_empty_list(self):
|
||||||
component = AnswerBuilder()
|
component = AnswerBuilder()
|
||||||
output = component.run(query="query", replies=["reply1"], metadata=[])
|
output = component.run(query="query", replies=["reply1"], meta=[])
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
assert answers[0].data == "reply1"
|
assert answers[0].data == "reply1"
|
||||||
assert answers[0].meta == {}
|
assert answers[0].meta == {}
|
||||||
@ -34,7 +34,7 @@ class TestAnswerBuilder:
|
|||||||
|
|
||||||
def test_run_without_pattern(self):
|
def test_run_without_pattern(self):
|
||||||
component = AnswerBuilder()
|
component = AnswerBuilder()
|
||||||
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
|
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
assert len(answers) == 1
|
assert len(answers) == 1
|
||||||
assert answers[0].data == "Answer: AnswerString"
|
assert answers[0].data == "Answer: AnswerString"
|
||||||
@ -45,7 +45,7 @@ class TestAnswerBuilder:
|
|||||||
|
|
||||||
def test_run_with_pattern_with_capturing_group(self):
|
def test_run_with_pattern_with_capturing_group(self):
|
||||||
component = AnswerBuilder(pattern=r"Answer: (.*)")
|
component = AnswerBuilder(pattern=r"Answer: (.*)")
|
||||||
output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}])
|
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}])
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
assert len(answers) == 1
|
assert len(answers) == 1
|
||||||
assert answers[0].data == "AnswerString"
|
assert answers[0].data == "AnswerString"
|
||||||
@ -56,7 +56,7 @@ class TestAnswerBuilder:
|
|||||||
|
|
||||||
def test_run_with_pattern_without_capturing_group(self):
|
def test_run_with_pattern_without_capturing_group(self):
|
||||||
component = AnswerBuilder(pattern=r"'.*'")
|
component = AnswerBuilder(pattern=r"'.*'")
|
||||||
output = component.run(query="test query", replies=["Answer: 'AnswerString'"], metadata=[{}])
|
output = component.run(query="test query", replies=["Answer: 'AnswerString'"], meta=[{}])
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
assert len(answers) == 1
|
assert len(answers) == 1
|
||||||
assert answers[0].data == "'AnswerString'"
|
assert answers[0].data == "'AnswerString'"
|
||||||
@ -71,9 +71,7 @@ class TestAnswerBuilder:
|
|||||||
|
|
||||||
def test_run_with_pattern_set_at_runtime(self):
|
def test_run_with_pattern_set_at_runtime(self):
|
||||||
component = AnswerBuilder(pattern="unused pattern")
|
component = AnswerBuilder(pattern="unused pattern")
|
||||||
output = component.run(
|
output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}], pattern=r"Answer: (.*)")
|
||||||
query="test query", replies=["Answer: AnswerString"], metadata=[{}], pattern=r"Answer: (.*)"
|
|
||||||
)
|
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
assert len(answers) == 1
|
assert len(answers) == 1
|
||||||
assert answers[0].data == "AnswerString"
|
assert answers[0].data == "AnswerString"
|
||||||
@ -87,7 +85,7 @@ class TestAnswerBuilder:
|
|||||||
output = component.run(
|
output = component.run(
|
||||||
query="test query",
|
query="test query",
|
||||||
replies=["Answer: AnswerString"],
|
replies=["Answer: AnswerString"],
|
||||||
metadata=[{}],
|
meta=[{}],
|
||||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||||
)
|
)
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
@ -104,7 +102,7 @@ class TestAnswerBuilder:
|
|||||||
output = component.run(
|
output = component.run(
|
||||||
query="test query",
|
query="test query",
|
||||||
replies=["Answer: AnswerString[2]"],
|
replies=["Answer: AnswerString[2]"],
|
||||||
metadata=[{}],
|
meta=[{}],
|
||||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||||
)
|
)
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
@ -121,7 +119,7 @@ class TestAnswerBuilder:
|
|||||||
output = component.run(
|
output = component.run(
|
||||||
query="test query",
|
query="test query",
|
||||||
replies=["Answer: AnswerString[3]"],
|
replies=["Answer: AnswerString[3]"],
|
||||||
metadata=[{}],
|
meta=[{}],
|
||||||
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
documents=[Document(content="test doc 1"), Document(content="test doc 2")],
|
||||||
)
|
)
|
||||||
answers = output["answers"]
|
answers = output["answers"]
|
||||||
@ -137,7 +135,7 @@ class TestAnswerBuilder:
|
|||||||
output = component.run(
|
output = component.run(
|
||||||
query="test query",
|
query="test query",
|
||||||
replies=["Answer: AnswerString[2][3]"],
|
replies=["Answer: AnswerString[2][3]"],
|
||||||
metadata=[{}],
|
meta=[{}],
|
||||||
documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")],
|
documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")],
|
||||||
reference_pattern="\\[(\\d+)\\]",
|
reference_pattern="\\[(\\d+)\\]",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -45,7 +45,7 @@ class TestAzureOCRDocumentConverter:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def test_run_with_meta(self):
|
def test_run_with_meta(self):
|
||||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||||
|
|
||||||
with patch("haystack.components.converters.azure.DocumentAnalysisClient"):
|
with patch("haystack.components.converters.azure.DocumentAnalysisClient"):
|
||||||
component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")
|
component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")
|
||||||
|
|||||||
@ -63,7 +63,7 @@ class TestHTMLToDocument:
|
|||||||
converter = HTMLToDocument()
|
converter = HTMLToDocument()
|
||||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||||
byte_stream = file.read()
|
byte_stream = file.read()
|
||||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
|
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
|
||||||
|
|
||||||
results = converter.run(sources=[stream])
|
results = converter.run(sources=[stream])
|
||||||
docs = results["documents"]
|
docs = results["documents"]
|
||||||
@ -81,7 +81,7 @@ class TestHTMLToDocument:
|
|||||||
converter = HTMLToDocument()
|
converter = HTMLToDocument()
|
||||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||||
byte_stream = file.read()
|
byte_stream = file.read()
|
||||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"})
|
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"})
|
||||||
|
|
||||||
metadata = [{"file_name": "what_is_haystack.html"}]
|
metadata = [{"file_name": "what_is_haystack.html"}]
|
||||||
results = converter.run(sources=[stream], meta=metadata)
|
results = converter.run(sources=[stream], meta=metadata)
|
||||||
@ -103,7 +103,7 @@ class TestHTMLToDocument:
|
|||||||
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file:
|
||||||
byte_stream = file.read()
|
byte_stream = file.read()
|
||||||
# ByteStream has "url" present in metadata
|
# ByteStream has "url" present in metadata
|
||||||
stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url_correct"})
|
stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url_correct"})
|
||||||
|
|
||||||
# "url" supplied by the user overwrites value present in metadata
|
# "url" supplied by the user overwrites value present in metadata
|
||||||
metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}]
|
metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}]
|
||||||
|
|||||||
@ -32,7 +32,7 @@ class TestMarkdownToDocument:
|
|||||||
assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content
|
assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content
|
||||||
|
|
||||||
def test_run_with_meta(self):
|
def test_run_with_meta(self):
|
||||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||||
|
|
||||||
converter = MarkdownToDocument()
|
converter = MarkdownToDocument()
|
||||||
|
|
||||||
|
|||||||
@ -30,7 +30,7 @@ class TestPyPDFToDocument:
|
|||||||
assert "ReAct" in docs[0].content
|
assert "ReAct" in docs[0].content
|
||||||
|
|
||||||
def test_run_with_meta(self):
|
def test_run_with_meta(self):
|
||||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||||
|
|
||||||
converter = PyPDFToDocument()
|
converter = PyPDFToDocument()
|
||||||
with patch("haystack.components.converters.pypdf.PdfReader"):
|
with patch("haystack.components.converters.pypdf.PdfReader"):
|
||||||
|
|||||||
@ -14,8 +14,8 @@ class TestTextfileToDocument:
|
|||||||
Test if the component runs correctly.
|
Test if the component runs correctly.
|
||||||
"""
|
"""
|
||||||
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt")
|
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt")
|
||||||
bytestream.metadata["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
|
bytestream.meta["file_path"] = str(test_files_path / "txt" / "doc_3.txt")
|
||||||
bytestream.metadata["key"] = "value"
|
bytestream.meta["key"] = "value"
|
||||||
files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream]
|
files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream]
|
||||||
converter = TextFileToDocument()
|
converter = TextFileToDocument()
|
||||||
output = converter.run(sources=files)
|
output = converter.run(sources=files)
|
||||||
@ -26,7 +26,7 @@ class TestTextfileToDocument:
|
|||||||
assert "That's yet another file!" in docs[2].content
|
assert "That's yet another file!" in docs[2].content
|
||||||
assert docs[0].meta["file_path"] == str(files[0])
|
assert docs[0].meta["file_path"] == str(files[0])
|
||||||
assert docs[1].meta["file_path"] == str(files[1])
|
assert docs[1].meta["file_path"] == str(files[1])
|
||||||
assert docs[2].meta == bytestream.metadata
|
assert docs[2].meta == bytestream.meta
|
||||||
|
|
||||||
def test_run_error_handling(self, test_files_path, caplog):
|
def test_run_error_handling(self, test_files_path, caplog):
|
||||||
"""
|
"""
|
||||||
@ -47,18 +47,18 @@ class TestTextfileToDocument:
|
|||||||
Test if the encoding metadata field is used properly
|
Test if the encoding metadata field is used properly
|
||||||
"""
|
"""
|
||||||
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt")
|
bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt")
|
||||||
bytestream.metadata["key"] = "value"
|
bytestream.meta["key"] = "value"
|
||||||
|
|
||||||
converter = TextFileToDocument(encoding="utf-16")
|
converter = TextFileToDocument(encoding="utf-16")
|
||||||
output = converter.run(sources=[bytestream])
|
output = converter.run(sources=[bytestream])
|
||||||
assert "Some text for testing." not in output["documents"][0].content
|
assert "Some text for testing." not in output["documents"][0].content
|
||||||
|
|
||||||
bytestream.metadata["encoding"] = "utf-8"
|
bytestream.meta["encoding"] = "utf-8"
|
||||||
output = converter.run(sources=[bytestream])
|
output = converter.run(sources=[bytestream])
|
||||||
assert "Some text for testing." in output["documents"][0].content
|
assert "Some text for testing." in output["documents"][0].content
|
||||||
|
|
||||||
def test_run_with_meta(self):
|
def test_run_with_meta(self):
|
||||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||||
|
|
||||||
converter = TextFileToDocument()
|
converter = TextFileToDocument()
|
||||||
|
|
||||||
|
|||||||
@ -19,7 +19,7 @@ class TestTikaDocumentConverter:
|
|||||||
assert documents[0].content == "Content of mock source"
|
assert documents[0].content == "Content of mock source"
|
||||||
|
|
||||||
def test_run_with_meta(self):
|
def test_run_with_meta(self):
|
||||||
bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"})
|
bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"})
|
||||||
|
|
||||||
converter = TikaDocumentConverter()
|
converter = TikaDocumentConverter()
|
||||||
with patch("haystack.components.converters.tika.tika_parser.from_buffer"):
|
with patch("haystack.components.converters.tika.tika_parser.from_buffer"):
|
||||||
|
|||||||
@ -67,7 +67,7 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert first_stream.data == correct_response
|
assert first_stream.data == correct_response
|
||||||
assert first_stream.metadata["content_type"] == "text/plain"
|
assert first_stream.meta["content_type"] == "text/plain"
|
||||||
|
|
||||||
def test_run_html(self):
|
def test_run_html(self):
|
||||||
correct_response = b"<h1>Example test response</h1>"
|
correct_response = b"<h1>Example test response</h1>"
|
||||||
@ -79,7 +79,7 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert first_stream.data == correct_response
|
assert first_stream.data == correct_response
|
||||||
assert first_stream.metadata["content_type"] == "text/html"
|
assert first_stream.meta["content_type"] == "text/html"
|
||||||
|
|
||||||
def test_run_binary(self, test_files_path):
|
def test_run_binary(self, test_files_path):
|
||||||
file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read()
|
file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read()
|
||||||
@ -91,7 +91,7 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
streams = fetcher.run(urls=["https://www.example.com"])["streams"]
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert first_stream.data == file_bytes
|
assert first_stream.data == file_bytes
|
||||||
assert first_stream.metadata["content_type"] == "application/pdf"
|
assert first_stream.meta["content_type"] == "application/pdf"
|
||||||
|
|
||||||
def test_run_bad_status_code(self):
|
def test_run_bad_status_code(self):
|
||||||
empty_byte_stream = b""
|
empty_byte_stream = b""
|
||||||
@ -105,7 +105,7 @@ class TestLinkContentFetcher:
|
|||||||
assert len(streams) == 1
|
assert len(streams) == 1
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert first_stream.data == empty_byte_stream
|
assert first_stream.data == empty_byte_stream
|
||||||
assert first_stream.metadata["content_type"] == "text/html"
|
assert first_stream.meta["content_type"] == "text/html"
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_link_content_fetcher_html(self):
|
def test_link_content_fetcher_html(self):
|
||||||
@ -113,8 +113,8 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run([HTML_URL])["streams"]
|
streams = fetcher.run([HTML_URL])["streams"]
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert "Haystack" in first_stream.data.decode("utf-8")
|
assert "Haystack" in first_stream.data.decode("utf-8")
|
||||||
assert first_stream.metadata["content_type"] == "text/html"
|
assert first_stream.meta["content_type"] == "text/html"
|
||||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == HTML_URL
|
assert "url" in first_stream.meta and first_stream.meta["url"] == HTML_URL
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_link_content_fetcher_text(self):
|
def test_link_content_fetcher_text(self):
|
||||||
@ -122,8 +122,8 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run([TEXT_URL])["streams"]
|
streams = fetcher.run([TEXT_URL])["streams"]
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert "Haystack" in first_stream.data.decode("utf-8")
|
assert "Haystack" in first_stream.data.decode("utf-8")
|
||||||
assert first_stream.metadata["content_type"] == "text/plain"
|
assert first_stream.meta["content_type"] == "text/plain"
|
||||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == TEXT_URL
|
assert "url" in first_stream.meta and first_stream.meta["url"] == TEXT_URL
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_link_content_fetcher_pdf(self):
|
def test_link_content_fetcher_pdf(self):
|
||||||
@ -131,8 +131,8 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run([PDF_URL])["streams"]
|
streams = fetcher.run([PDF_URL])["streams"]
|
||||||
assert len(streams) == 1
|
assert len(streams) == 1
|
||||||
first_stream = streams[0]
|
first_stream = streams[0]
|
||||||
assert first_stream.metadata["content_type"] in ("application/octet-stream", "application/pdf")
|
assert first_stream.meta["content_type"] in ("application/octet-stream", "application/pdf")
|
||||||
assert "url" in first_stream.metadata and first_stream.metadata["url"] == PDF_URL
|
assert "url" in first_stream.meta and first_stream.meta["url"] == PDF_URL
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_link_content_fetcher_multiple_different_content_types(self):
|
def test_link_content_fetcher_multiple_different_content_types(self):
|
||||||
@ -143,10 +143,10 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run([PDF_URL, HTML_URL])["streams"]
|
streams = fetcher.run([PDF_URL, HTML_URL])["streams"]
|
||||||
assert len(streams) == 2
|
assert len(streams) == 2
|
||||||
for stream in streams:
|
for stream in streams:
|
||||||
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||||
if stream.metadata["content_type"] == "text/html":
|
if stream.meta["content_type"] == "text/html":
|
||||||
assert "Haystack" in stream.data.decode("utf-8")
|
assert "Haystack" in stream.data.decode("utf-8")
|
||||||
elif stream.metadata["content_type"] == "application/pdf":
|
elif stream.meta["content_type"] == "application/pdf":
|
||||||
assert len(stream.data) > 0
|
assert len(stream.data) > 0
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
@ -160,10 +160,10 @@ class TestLinkContentFetcher:
|
|||||||
streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"]
|
streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"]
|
||||||
assert len(streams) == 3
|
assert len(streams) == 3
|
||||||
for stream in streams:
|
for stream in streams:
|
||||||
assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream")
|
||||||
if stream.metadata["content_type"] == "text/html":
|
if stream.meta["content_type"] == "text/html":
|
||||||
assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8")
|
assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8")
|
||||||
elif stream.metadata["content_type"] == "application/pdf":
|
elif stream.meta["content_type"] == "application/pdf":
|
||||||
assert len(stream.data) > 0
|
assert len(stream.data) > 0
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
@ -177,7 +177,7 @@ class TestLinkContentFetcher:
|
|||||||
result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"])
|
result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"])
|
||||||
assert len(result["streams"]) == 1
|
assert len(result["streams"]) == 1
|
||||||
first_stream = result["streams"][0]
|
first_stream = result["streams"][0]
|
||||||
assert first_stream.metadata["content_type"] == "text/html"
|
assert first_stream.meta["content_type"] == "text/html"
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_bad_request_exception_raised(self):
|
def test_bad_request_exception_raised(self):
|
||||||
|
|||||||
@ -241,7 +241,7 @@ class TestGPTChatGenerator:
|
|||||||
component = GPTChatGenerator(api_key="test-api-key")
|
component = GPTChatGenerator(api_key="test-api-key")
|
||||||
messages = [
|
messages = [
|
||||||
ChatMessage.from_assistant(
|
ChatMessage.from_assistant(
|
||||||
"", metadata={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
"", meta={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
||||||
)
|
)
|
||||||
for i, _ in enumerate(range(4))
|
for i, _ in enumerate(range(4))
|
||||||
]
|
]
|
||||||
|
|||||||
@ -124,11 +124,11 @@ class TestHuggingFaceTGIGenerator:
|
|||||||
|
|
||||||
assert isinstance(response, dict)
|
assert isinstance(response, dict)
|
||||||
assert "replies" in response
|
assert "replies" in response
|
||||||
assert "metadata" in response
|
assert "meta" in response
|
||||||
assert isinstance(response["replies"], list)
|
assert isinstance(response["replies"], list)
|
||||||
assert isinstance(response["metadata"], list)
|
assert isinstance(response["meta"], list)
|
||||||
assert len(response["replies"]) == 1
|
assert len(response["replies"]) == 1
|
||||||
assert len(response["metadata"]) == 1
|
assert len(response["meta"]) == 1
|
||||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||||
|
|
||||||
def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters(
|
def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters(
|
||||||
@ -157,14 +157,14 @@ class TestHuggingFaceTGIGenerator:
|
|||||||
|
|
||||||
assert isinstance(response, dict)
|
assert isinstance(response, dict)
|
||||||
assert "replies" in response
|
assert "replies" in response
|
||||||
assert "metadata" in response
|
assert "meta" in response
|
||||||
assert isinstance(response["replies"], list)
|
assert isinstance(response["replies"], list)
|
||||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||||
|
|
||||||
assert isinstance(response["metadata"], list)
|
assert isinstance(response["meta"], list)
|
||||||
assert len(response["replies"]) == 3
|
assert len(response["replies"]) == 3
|
||||||
assert len(response["metadata"]) == 3
|
assert len(response["meta"]) == 3
|
||||||
assert [isinstance(reply, dict) for reply in response["metadata"]]
|
assert [isinstance(reply, dict) for reply in response["meta"]]
|
||||||
|
|
||||||
def test_initialize_with_invalid_model(self, mock_check_valid_model):
|
def test_initialize_with_invalid_model(self, mock_check_valid_model):
|
||||||
model = "invalid_model"
|
model = "invalid_model"
|
||||||
@ -200,9 +200,9 @@ class TestHuggingFaceTGIGenerator:
|
|||||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||||
|
|
||||||
# Assert that the response contains the metadata
|
# Assert that the response contains the metadata
|
||||||
assert "metadata" in response
|
assert "meta" in response
|
||||||
assert isinstance(response["metadata"], list)
|
assert isinstance(response["meta"], list)
|
||||||
assert len(response["metadata"]) > 0
|
assert len(response["meta"]) > 0
|
||||||
assert [isinstance(reply, dict) for reply in response["replies"]]
|
assert [isinstance(reply, dict) for reply in response["replies"]]
|
||||||
|
|
||||||
def test_generate_text_with_custom_generation_parameters(
|
def test_generate_text_with_custom_generation_parameters(
|
||||||
@ -226,9 +226,9 @@ class TestHuggingFaceTGIGenerator:
|
|||||||
assert response["replies"][0] == "I'm fine, thanks."
|
assert response["replies"][0] == "I'm fine, thanks."
|
||||||
|
|
||||||
# Assert that the response contains the metadata
|
# Assert that the response contains the metadata
|
||||||
assert "metadata" in response
|
assert "meta" in response
|
||||||
assert isinstance(response["metadata"], list)
|
assert isinstance(response["meta"], list)
|
||||||
assert len(response["metadata"]) > 0
|
assert len(response["meta"]) > 0
|
||||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||||
|
|
||||||
def test_generate_text_with_streaming_callback(
|
def test_generate_text_with_streaming_callback(
|
||||||
@ -278,7 +278,7 @@ class TestHuggingFaceTGIGenerator:
|
|||||||
assert [isinstance(reply, str) for reply in response["replies"]]
|
assert [isinstance(reply, str) for reply in response["replies"]]
|
||||||
|
|
||||||
# Assert that the response contains the metadata
|
# Assert that the response contains the metadata
|
||||||
assert "metadata" in response
|
assert "meta" in response
|
||||||
assert isinstance(response["metadata"], list)
|
assert isinstance(response["meta"], list)
|
||||||
assert len(response["metadata"]) > 0
|
assert len(response["meta"]) > 0
|
||||||
assert [isinstance(reply, dict) for reply in response["replies"]]
|
assert [isinstance(reply, dict) for reply in response["replies"]]
|
||||||
|
|||||||
@ -242,7 +242,7 @@ class TestGPTGenerator:
|
|||||||
for i, _ in enumerate(range(4)):
|
for i, _ in enumerate(range(4)):
|
||||||
message = ChatMessage.from_assistant("Hello")
|
message = ChatMessage.from_assistant("Hello")
|
||||||
metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i}
|
||||||
message.metadata.update(metadata)
|
message.meta.update(metadata)
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
|
|
||||||
for m in messages:
|
for m in messages:
|
||||||
|
|||||||
@ -46,13 +46,13 @@ class TestFileTypeRouter:
|
|||||||
for path, mime_type in zip(file_paths, mime_types):
|
for path, mime_type in zip(file_paths, mime_types):
|
||||||
stream = ByteStream(path.read_bytes())
|
stream = ByteStream(path.read_bytes())
|
||||||
|
|
||||||
stream.metadata["content_type"] = mime_type
|
stream.meta["content_type"] = mime_type
|
||||||
|
|
||||||
byte_streams.append(stream)
|
byte_streams.append(stream)
|
||||||
|
|
||||||
# add unclassified ByteStream
|
# add unclassified ByteStream
|
||||||
bs = ByteStream(b"unclassified content")
|
bs = ByteStream(b"unclassified content")
|
||||||
bs.metadata["content_type"] = "unknown_type"
|
bs.meta["content_type"] = "unknown_type"
|
||||||
byte_streams.append(bs)
|
byte_streams.append(bs)
|
||||||
|
|
||||||
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
|
router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"])
|
||||||
@ -75,7 +75,7 @@ class TestFileTypeRouter:
|
|||||||
byte_stream_sources = []
|
byte_stream_sources = []
|
||||||
for path, mime_type in zip(file_paths, mime_types):
|
for path, mime_type in zip(file_paths, mime_types):
|
||||||
stream = ByteStream(path.read_bytes())
|
stream = ByteStream(path.read_bytes())
|
||||||
stream.metadata["content_type"] = mime_type
|
stream.meta["content_type"] = mime_type
|
||||||
byte_stream_sources.append(stream)
|
byte_stream_sources.append(stream)
|
||||||
|
|
||||||
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
mixed_sources = file_paths[:2] + byte_stream_sources[2:]
|
||||||
|
|||||||
@ -4,25 +4,25 @@ from haystack.dataclasses import StreamingChunk
|
|||||||
|
|
||||||
|
|
||||||
def test_create_chunk_with_content_and_metadata():
|
def test_create_chunk_with_content_and_metadata():
|
||||||
chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
|
chunk = StreamingChunk(content="Test content", meta={"key": "value"})
|
||||||
|
|
||||||
assert chunk.content == "Test content"
|
assert chunk.content == "Test content"
|
||||||
assert chunk.metadata == {"key": "value"}
|
assert chunk.meta == {"key": "value"}
|
||||||
|
|
||||||
|
|
||||||
def test_create_chunk_with_only_content():
|
def test_create_chunk_with_only_content():
|
||||||
chunk = StreamingChunk(content="Test content")
|
chunk = StreamingChunk(content="Test content")
|
||||||
|
|
||||||
assert chunk.content == "Test content"
|
assert chunk.content == "Test content"
|
||||||
assert chunk.metadata == {}
|
assert chunk.meta == {}
|
||||||
|
|
||||||
|
|
||||||
def test_access_content():
|
def test_access_content():
|
||||||
chunk = StreamingChunk(content="Test content", metadata={"key": "value"})
|
chunk = StreamingChunk(content="Test content", meta={"key": "value"})
|
||||||
assert chunk.content == "Test content"
|
assert chunk.content == "Test content"
|
||||||
|
|
||||||
|
|
||||||
def test_create_chunk_with_empty_content():
|
def test_create_chunk_with_empty_content():
|
||||||
chunk = StreamingChunk(content="")
|
chunk = StreamingChunk(content="")
|
||||||
assert chunk.content == ""
|
assert chunk.content == ""
|
||||||
assert chunk.metadata == {}
|
assert chunk.meta == {}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user