docs: fixing all D205 docstring issues (#7577)

* fixing all D205 issues

* Update haystack/components/embedders/hugging_face_api_document_embedder.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/embedders/hugging_face_api_text_embedder.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/generators/chat/hugging_face_api.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/generators/chat/hugging_face_local.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/generators/hugging_face_api.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* fixing 205 issues and attending PR comments

* fixing 205 issues and attending PR comments

* Update haystack/components/converters/azure.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/components/converters/azure.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/components/extractors/named_entity_extractor.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/components/extractors/named_entity_extractor.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/core/component/component.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/components/evaluators/answer_exact_match.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/core/pipeline/template.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/core/serialization.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/core/serialization.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/core/pipeline/draw.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Update haystack/components/generators/azure.py

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

* Apply suggestions from code review

Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
Co-authored-by: Daria Fokina <daria.fokina@deepset.ai>
This commit is contained in:
David S. Batista 2024-04-23 15:42:46 +02:00 committed by GitHub
parent 081757c6b9
commit 201db5b288
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 247 additions and 83 deletions

View File

@ -10,6 +10,7 @@ logger = logging.getLogger(__name__)
class AnswerBuilder:
"""
Takes a query and the replies a Generator returns as input and parses them into GeneratedAnswer objects.
Optionally, it also takes Documents and metadata from the Generator as inputs to enrich the GeneratedAnswer objects.
Usage example:
@ -126,9 +127,10 @@ class AnswerBuilder:
def _extract_answer_string(reply: str, pattern: Optional[str] = None) -> str:
"""
Extract the answer string from the generator output using the specified pattern.
If no pattern is specified, the whole string is used as the answer.
:param replies:
:param reply:
The output of the Generator. A string.
:param pattern:
The regular expression pattern to use to extract the answer text from the generator output.

View File

@ -11,10 +11,12 @@ logger = logging.getLogger(__name__)
@component
class DynamicChatPromptBuilder:
"""
DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances. It
integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the list
potentially containing a template and renders it with variables provided to the constructor. Additional template
variables can be feed into the component/pipeline `run` method and will be merged before rendering the template.
DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances.
It integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the
list potentially containing a template and renders it with variables provided to the constructor. Additional
template variables can be feed into the component/pipeline `run` method and will be merged before rendering the
template.
Usage example:
```python
@ -92,6 +94,7 @@ class DynamicChatPromptBuilder:
def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dict[str, Any]] = None, **kwargs):
"""
Executes the dynamic prompt building process by processing a list of `ChatMessage` instances.
Any user message or system message is inspected for templates and rendered with the variables provided to the
constructor. You can provide additional template variables directly to this method, which are then merged with
the variables provided to the constructor.
@ -151,6 +154,7 @@ class DynamicChatPromptBuilder:
def _validate_template(self, template_text: str, provided_variables: Set[str]):
"""
Checks if all the required template variables are provided to the pipeline `run` method.
If all the required template variables are provided, returns a Jinja2 template object.
Otherwise, raises a ValueError.

View File

@ -10,8 +10,10 @@ logger = logging.getLogger(__name__)
@component
class DynamicPromptBuilder:
"""
DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline. Users can change the prompt
template at runtime by providing a new template for each pipeline run invocation if needed.
DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline.
Users can change the prompt template at runtime by providing a new template for each pipeline run invocation
if needed.
Usage example:
```python
@ -92,12 +94,15 @@ class DynamicPromptBuilder:
def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] = None, **kwargs):
"""
Executes the dynamic prompt building process. Depending on the provided type of `prompt_source`, this method
either processes a list of `ChatMessage` instances or a string template. In the case of `ChatMessage` instances,
the last user message is treated as a template and rendered with the resolved pipeline variables and any
additional template variables provided. For a string template, it directly applies the template variables to
render the final prompt. You can provide additional template variables directly to this method, that are then
merged with the variables resolved from the pipeline runtime.
Executes the dynamic prompt building process.
Depending on the provided type of `prompt_source`, this method either processes a list of `ChatMessage`
instances or a string template. In the case of `ChatMessage` instances, the last user message is treated as a
template and rendered with the resolved pipeline variables and any additional template variables provided.
For a string template, it directly applies the template variables to render the final prompt. You can provide
additional template variables directly to this method, that are then merged with the variables resolved from
the pipeline runtime.
:param prompt_source:
A string template.
@ -127,6 +132,7 @@ class DynamicPromptBuilder:
def _validate_template(self, template_text: str, provided_variables: Set[str]):
"""
Checks if all the required template variables are provided to the pipeline `run` method.
If all the required template variables are provided, returns a Jinja2 template object.
Otherwise, raises a ValueError.

View File

@ -40,6 +40,8 @@ class PromptBuilder:
@component.output_types(prompt=str)
def run(self, **kwargs):
"""
Renders the prompt template with the provided variables.
:param kwargs:
The variables that will be used to render the prompt template.

View File

@ -10,8 +10,7 @@ logger = logging.getLogger(__name__)
@component
class CacheChecker:
"""
Checks for the presence of documents in a Document Store based on a specified
field in each document's metadata.
Checks for the presence of documents in a Document Store based on a specified field in each document's metadata.
If matching documents are found, they are returned as hits. If not, the items
are returned as misses, indicating they are not in the cache.
@ -92,8 +91,7 @@ class CacheChecker:
@component.output_types(hits=List[Document], misses=List)
def run(self, items: List[Any]):
"""
Checks if any document associated with the specified cache field
is already present in the store.
Checks if any document associated with the specified cache field is already present in the store.
:param items:
Values to be checked against the cache field.

View File

@ -50,6 +50,8 @@ class DocumentLanguageClassifier:
def __init__(self, languages: Optional[List[str]] = None):
"""
Initialize the DocumentLanguageClassifier.
:param languages: A list of languages in ISO code, each corresponding to a different output connection.
For supported languages, see the [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages).
If not specified, the default is ["en"].
@ -63,6 +65,7 @@ class DocumentLanguageClassifier:
def run(self, documents: List[Document]):
"""
This method classifies the documents' language and adds it to their metadata.
If a Document's text does not match any of the languages specified at initialization,
the metadata value "unmatched" will be stored.

View File

@ -16,6 +16,8 @@ with LazyImport("Run 'pip install openapi3'") as openapi_imports:
@component
class OpenAPIServiceConnector:
"""
A component which connects the Haystack framework to OpenAPI services.
The `OpenAPIServiceConnector` component connects the Haystack framework to OpenAPI services, enabling it to call
operations as defined in the OpenAPI specification of the service.
@ -77,8 +79,10 @@ class OpenAPIServiceConnector:
service_credentials: Optional[Union[dict, str]] = None,
) -> Dict[str, List[ChatMessage]]:
"""
Processes a list of chat messages to invoke a method on an OpenAPI service. It parses the last message in the
list, expecting it to contain an OpenAI function calling descriptor (name & parameters) in JSON format.
Processes a list of chat messages to invoke a method on an OpenAPI service.
It parses the last message in the list, expecting it to contain an OpenAI function calling descriptor
(name & parameters) in JSON format.
:param messages: A list of `ChatMessage` objects containing the messages to be processed. The last message
should contain the function invocation payload in OpenAI function calling format. See the example in the class
@ -148,6 +152,8 @@ class OpenAPIServiceConnector:
def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[Union[dict, str]] = None):
"""
Authentication with an OpenAPI service.
Authenticates with the OpenAPI service if required, supporting both single (str) and multiple
authentication methods (dict).
@ -201,8 +207,9 @@ class OpenAPIServiceConnector:
def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: Dict[str, Any]) -> Any:
"""
Invokes the specified method on the OpenAPI service. The method name and arguments are passed in the
method_invocation_descriptor.
Invokes the specified method on the OpenAPI service.
The method name and arguments are passed in the method_invocation_descriptor.
:param openapi_service: The OpenAPI service instance.
:param method_invocation_descriptor: The method name and arguments to be passed to the method. The payload

View File

@ -23,7 +23,8 @@ with LazyImport(message="Run 'pip install \"azure-ai-formrecognizer>=3.2.0b2\"'"
@component
class AzureOCRDocumentConverter:
"""
A component for converting files to Documents using Azure's Document Intelligence service.
Convert files to documents using Azure's Document Intelligence service.
Supported file formats are: PDF, JPEG, PNG, BMP, TIFF, DOCX, XLSX, PPTX, and HTML.
In order to be able to use this component, you need an active Azure account
@ -170,6 +171,8 @@ class AzureOCRDocumentConverter:
# pylint: disable=line-too-long
def _convert_tables_and_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
"""
Converts the tables and text extracted by Azure's Document Intelligence service into Haystack Documents.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
:param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
@ -188,6 +191,7 @@ class AzureOCRDocumentConverter:
def _convert_tables(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
"""
Converts the tables extracted by Azure's Document Intelligence service into Haystack Documents.
:param result: The AnalyzeResult Azure object
:param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
@ -296,8 +300,10 @@ class AzureOCRDocumentConverter:
def _convert_to_natural_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> Document:
"""
This converts the `AnalyzeResult` object into a single Document. We add "\f" separators between to
differentiate between the text on separate pages. This is the expected format for the PreProcessor.
This converts the `AnalyzeResult` object into a single document.
We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
for the PreProcessor.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
@ -340,8 +346,10 @@ class AzureOCRDocumentConverter:
self, result: "AnalyzeResult", meta: Optional[Dict[str, str]], threshold_y: float = 0.05
) -> Document:
"""
This converts the `AnalyzeResult` object into a single Haystack Document. We add "\f" separators between to
differentiate between the text on separate pages. This is the expected format for the PreProcessor.
This converts the `AnalyzeResult` object into a single Haystack Document.
We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
for the PreProcessor.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
@ -427,6 +435,7 @@ class AzureOCRDocumentConverter:
def _collect_table_spans(self, result: "AnalyzeResult") -> Dict:
"""
Collect the spans of all tables by page number.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method.
:returns: A dictionary with the page number as key and a list of table spans as value.
"""
@ -443,6 +452,7 @@ class AzureOCRDocumentConverter:
) -> bool:
"""
Check if a line or paragraph is part of a table.
:param tables_on_page: A dictionary with the page number as key and a list of table spans as value.
:param line_or_paragraph: The line or paragraph to check.
:returns: True if the line or paragraph is part of a table, False otherwise.
@ -457,7 +467,9 @@ class AzureOCRDocumentConverter:
def _hash_dataframe(self, df: pd.DataFrame, desired_samples=5, hash_length=4) -> str:
"""
Returns a hash of the DataFrame content. The hash is based on the content of the DataFrame.
Returns a hash of the DataFrame content.
The hash is based on the content of the DataFrame.
:param df: The DataFrame to hash.
:param desired_samples: The desired number of samples to hash.
:param hash_length: The length of the hash for each sample.

View File

@ -112,6 +112,8 @@ class OpenAPIServiceToFunctions:
def _openapi_to_functions(self, service_openapi_spec: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
OpenAPI to OpenAI function conversion.
Extracts functions from the OpenAPI specification of the service and converts them into a format
suitable for OpenAI function calling.
@ -188,6 +190,8 @@ class OpenAPIServiceToFunctions:
self, property_schema: Dict[str, Any], include_attributes: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Parses the attributes of a property schema.
Recursively parses the attributes of a property schema, including nested objects and arrays,
and includes specified attributes like description, pattern, etc.

View File

@ -7,6 +7,7 @@ from haystack.dataclasses import ByteStream
def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStream:
"""
Creates a ByteStream object from a source.
:param source: A source to convert to a ByteStream. Can be a string (path to a file), a Path object, or a ByteStream.
:return: A ByteStream object.
"""
@ -24,6 +25,8 @@ def normalize_metadata(
meta: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], sources_count: int
) -> List[Dict[str, Any]]:
"""
Normalize the metadata input for a converter.
Given all the possible value of the meta input for a converter (None, dictionary or list of dicts),
makes sure to return a list of dictionaries of the correct length for the converter to use.

View File

@ -19,6 +19,8 @@ logger = logging.getLogger(__name__)
@component
class HuggingFaceAPIDocumentEmbedder:
"""
A component that embeds documents using Hugging Face APIs.
This component can be used to compute Document embeddings using different Hugging Face APIs:
- [Free Serverless Inference API]((https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

View File

@ -16,6 +16,8 @@ logger = logging.getLogger(__name__)
@component
class HuggingFaceAPITextEmbedder:
"""
A component that embeds text using Hugging Face APIs.
This component can be used to embed strings using different Hugging Face APIs:
- [Free Serverless Inference API]((https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

View File

@ -6,11 +6,13 @@ from haystack.core.component import component
@component
class AnswerExactMatchEvaluator:
"""
Evaluator that checks if predicted answers exactly match ground truth answers.
An answer exact match evaluator class.
The evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
The result is a number from 0.0 to 1.0, it represents the proportion of predicted answers
that matched one of the ground truth answers.
There can be multiple ground truth answers and multiple predicted answers as input.
Each predicted answer is compared to one ground truth answer.
The final score is a number ranging from 0.0 to 1.0.
It represents the proportion of predicted answers that match their corresponding ground truth answer.
Usage example:
```python
@ -33,7 +35,8 @@ class AnswerExactMatchEvaluator:
def run(self, ground_truth_answers: List[str], predicted_answers: List[str]) -> Dict[str, Any]:
"""
Run the AnswerExactMatchEvaluator on the given inputs.
`ground_truth_answers` and `retrieved_answers` must have the same length.
The `ground_truth_answers` and `retrieved_answers` must have the same length.
:param ground_truth_answers:
A list of expected answers.

View File

@ -6,6 +6,8 @@ from haystack import Document, component
@component
class DocumentMAPEvaluator:
"""
A Mean Average Precision (MAP) evaluator for documents.
Evaluator that calculates the mean average precision of the retrieved documents, a metric
that measures how high retrieved documents are ranked.
Each question can have multiple ground truth documents and multiple retrieved documents.
@ -43,6 +45,7 @@ class DocumentMAPEvaluator:
) -> Dict[str, Any]:
"""
Run the DocumentMAPEvaluator on the given inputs.
All lists must have the same length.
:param ground_truth_documents:
@ -52,7 +55,7 @@ class DocumentMAPEvaluator:
:returns:
A dictionary with the following outputs:
- `score` - The average of calculated scores.
- `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
- `individual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
"""
if len(ground_truth_documents) != len(retrieved_documents):
msg = "The length of ground_truth_documents and retrieved_documents must be the same."

View File

@ -32,6 +32,7 @@ class RecallMode(Enum):
class DocumentRecallEvaluator:
"""
Evaluator that calculates the Recall score for a list of documents.
Returns both a list of scores for each question and the average.
There can be multiple ground truth documents and multiple predicted documents as input.
@ -91,6 +92,7 @@ class DocumentRecallEvaluator:
) -> Dict[str, Any]:
"""
Run the DocumentRecallEvaluator on the given inputs.
`ground_truth_documents` and `retrieved_documents` must have the same length.
:param ground_truth_documents:

View File

@ -178,6 +178,8 @@ class LLMEvaluator:
def prepare_template(self) -> str:
"""
Prepare the prompt template.
Combine instructions, inputs, outputs, and examples into one prompt template with the following format:
Instructions:
<instructions>

View File

@ -16,6 +16,7 @@ with LazyImport(message="Run 'pip install scikit-learn \"sentence-transformers>=
class SASEvaluator:
"""
SASEvaluator computes the Semantic Answer Similarity (SAS) between a list of predictions and a list of ground truths.
It's usually used in Retrieval Augmented Generation (RAG) pipelines to evaluate the quality of the generated answers.
The SAS is computed using a pre-trained model from the Hugging Face model hub. The model can be either a
@ -132,6 +133,8 @@ class SASEvaluator:
@component.output_types(score=float, individual_scores=List[float])
def run(self, ground_truth_answers: List[str], predicted_answers: List[str]) -> Dict[str, Any]:
"""
SASEvaluator component run method.
Run the SASEvaluator to compute the Semantic Answer Similarity (SAS) between a list of predicted answers
and a list of ground truth answers. Both must be list of strings of same length.

View File

@ -159,8 +159,7 @@ class NamedEntityExtractor:
@component.output_types(documents=List[Document])
def run(self, documents: List[Document], batch_size: int = 1) -> Dict[str, Any]:
"""
Annotate named entities in each document and store
the annotations in the document's metadata.
Annotate named entities in each document and store the annotations in the document's metadata.
:param documents:
Documents to process.
@ -227,8 +226,7 @@ class NamedEntityExtractor:
@classmethod
def get_stored_annotations(cls, document: Document) -> Optional[List[NamedEntityAnnotation]]:
"""
Returns the document's named entity annotations stored
in its metadata, if any.
Returns the document's named entity annotations stored in its metadata, if any.
:param document:
Document whose annotations are to be fetched.
@ -259,16 +257,14 @@ class _NerBackend(ABC):
@abstractmethod
def initialize(self):
"""
Initializes the backend. This would usually
entail loading models, pipelines, etc.
Initializes the backend. This would usually entail loading models, pipelines, and so on.
"""
@property
@abstractmethod
def initialized(self) -> bool:
"""
Returns if the backend has been initialized, i.e,
ready to annotate text.
Returns if the backend has been initialized, for example, ready to annotate text.
"""
@abstractmethod
@ -295,6 +291,8 @@ class _NerBackend(ABC):
@property
def device(self) -> ComponentDevice:
"""
The device on which the backend's model is loaded.
:returns:
The device on which the backend's model is loaded.
"""
@ -457,8 +455,7 @@ class _SpacyBackend(_NerBackend):
@contextmanager
def _select_device(self):
"""
Context manager used to run spaCy models on a specific
GPU in a scoped manner.
Context manager used to run spaCy models on a specific GPU in a scoped manner.
"""
# TODO: This won't restore the active device.

View File

@ -26,6 +26,8 @@ REQUEST_HEADERS = {
def _text_content_handler(response: Response) -> ByteStream:
"""
Handles text content.
:param response: Response object from the request.
:return: The extracted text.
"""
@ -34,6 +36,8 @@ def _text_content_handler(response: Response) -> ByteStream:
def _binary_content_handler(response: Response) -> ByteStream:
"""
Handles binary content.
:param response: Response object from the request.
:return: The extracted binary file-like object.
"""
@ -211,6 +215,7 @@ class LinkContentFetcher:
def _switch_user_agent(self, retry_state: RetryCallState) -> None:
"""
Switches the User-Agent for this LinkContentRetriever to the next one in the list of user agents.
Used by tenacity to retry the requests with a different user agent.
:param retry_state: The retry state (unused, required by tenacity).

View File

@ -14,8 +14,9 @@ logger = logging.getLogger(__name__)
class AzureOpenAIGenerator(OpenAIGenerator):
"""
Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports gpt-4 and gpt-3.5-turbo
family of models.
A Generator component that uses OpenAI's large language models (LLMs) on Azure to generate text.
It supports gpt-4 and gpt-3.5-turbo family of models.
Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs`
@ -59,6 +60,8 @@ class AzureOpenAIGenerator(OpenAIGenerator):
generation_kwargs: Optional[Dict[str, Any]] = None,
):
"""
Initialize the Azure OpenAI Generator.
:param azure_endpoint: The endpoint of the deployed model, e.g. `https://example-resource.azure.openai.com/`
:param api_version: The version of the API to use. Defaults to 2023-05-15
:param azure_deployment: The deployment of the model, usually the model name.

View File

@ -14,6 +14,8 @@ logger = logging.getLogger(__name__)
class AzureOpenAIChatGenerator(OpenAIChatGenerator):
"""
A Chat Generator component that uses the Azure OpenAI API to generate text.
Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
@ -76,6 +78,8 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
generation_kwargs: Optional[Dict[str, Any]] = None,
):
"""
Initialize the Azure OpenAI Chat Generator component.
:param azure_endpoint: The endpoint of the deployed model, e.g. `"https://example-resource.azure.openai.com/"`
:param api_version: The version of the API to use. Defaults to 2023-05-15
:param azure_deployment: The deployment of the model, usually the model name.

View File

@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
@component
class HuggingFaceAPIChatGenerator:
"""
A Chat Generator component that uses Hugging Face APIs to generate text.
This component can be used to generate text using different Hugging Face APIs with the ChatMessage format:
- [Free Serverless Inference API](https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

View File

@ -32,6 +32,8 @@ PIPELINE_SUPPORTED_TASKS = ["text-generation", "text2text-generation"]
@component
class HuggingFaceLocalChatGenerator:
"""
A Chat Generator component that uses models available on Hugging Face Hub to generate chat responses locally.
The `HuggingFaceLocalChatGenerator` class is a component designed for generating chat responses using models from
Hugging Face's model hub. It is tailored for local runtime text generation tasks and provides a convenient interface
for working with chat-based models, such as `HuggingFaceH4/zephyr-7b-beta` or `meta-llama/Llama-2-7b-chat-hf`
@ -78,6 +80,8 @@ class HuggingFaceLocalChatGenerator:
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
):
"""
Initializes the HuggingFaceLocalChatGenerator component.
:param model: The name or path of a Hugging Face model for text generation,
for example, `mistralai/Mistral-7B-Instruct-v0.2`, `TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ`, etc.
The important aspect of the model is that it should be a chat model and that it supports ChatML messaging

View File

@ -24,6 +24,8 @@ logger = logging.getLogger(__name__)
@component
class HuggingFaceTGIChatGenerator:
"""
A Chat-based text generation component using Hugging Face's Text Generation Inference (TGI) framework.
Enables text generation using HuggingFace Hub hosted chat-based LLMs. This component is designed to seamlessly
inference chat-based models deployed on the Text Generation Inference (TGI) backend.
@ -147,6 +149,8 @@ class HuggingFaceTGIChatGenerator:
def warm_up(self) -> None:
"""
Warm up the tokenizer by loading it from the model.
If the url is not provided, check if the model is deployed on the free tier of the HF inference API.
Load the tokenizer
"""

View File

@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
@component
class OpenAIChatGenerator:
"""
A Chat Generator component that uses the OpenAI API to generate text.
Enables text generation using OpenAI's large language models (LLMs). It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
@ -71,6 +73,8 @@ class OpenAIChatGenerator:
generation_kwargs: Optional[Dict[str, Any]] = None,
):
"""
Initializes the OpenAIChatGenerator component.
Creates an instance of OpenAIChatGenerator. Unless specified otherwise in the `model`, this is for OpenAI's
GPT-3.5 model.
@ -206,6 +210,7 @@ class OpenAIChatGenerator:
def _connect_chunks(self, chunk: Any, chunks: List[StreamingChunk]) -> ChatMessage:
"""
Connects the streaming chunks into a single ChatMessage.
:param chunk: The last chunk returned by the OpenAI API.
:param chunks: The list of all chunks returned by the OpenAI API.
"""
@ -256,6 +261,7 @@ class OpenAIChatGenerator:
def _build_message(self, completion: ChatCompletion, choice: Choice) -> ChatMessage:
"""
Converts the non-streaming response from the OpenAI API to a ChatMessage.
:param completion: The completion returned by the OpenAI API.
:param choice: The choice returned by the OpenAI API.
:return: The ChatMessage.
@ -287,6 +293,7 @@ class OpenAIChatGenerator:
def _build_chunk(self, chunk: ChatCompletionChunk) -> StreamingChunk:
"""
Converts the streaming response chunk from the OpenAI API to a StreamingChunk.
:param chunk: The chunk returned by the OpenAI API.
:param choice: The choice returned by the OpenAI API.
:return: The StreamingChunk.
@ -311,6 +318,7 @@ class OpenAIChatGenerator:
def _check_finish_reason(self, message: ChatMessage) -> None:
"""
Check the `finish_reason` returned with the OpenAI completions.
If the `finish_reason` is `length` or `content_filter`, log a warning.
:param message: The message returned by the LLM.
"""

View File

@ -23,6 +23,8 @@ logger = logging.getLogger(__name__)
@component
class HuggingFaceAPIGenerator:
"""
A Generator component that uses Hugging Face APIs to generate text.
This component can be used to generate text using different Hugging Face APIs:
- [Free Serverless Inference API]((https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

View File

@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
@component
class OpenAIGenerator:
"""
Text generation component using OpenAI's large language models (LLMs).
Enables text generation using OpenAI's large language models (LLMs). It supports gpt-4 and gpt-3.5-turbo
family of models.
@ -258,6 +260,7 @@ class OpenAIGenerator:
def _check_finish_reason(self, message: ChatMessage) -> None:
"""
Check the `finish_reason` returned with the OpenAI completions.
If the `finish_reason` is `length`, log a warning to the user.
:param message:

View File

@ -7,6 +7,7 @@ from haystack.utils import deserialize_callable, serialize_callable
def print_streaming_chunk(chunk: StreamingChunk) -> None:
"""
Default callback function for streaming responses.
Prints the tokens of the first completion to stdout as soon as they are received
"""
print(chunk.content, flush=True, end="")
@ -15,6 +16,7 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
def serialize_callback_handler(streaming_callback: Callable[[StreamingChunk], None]) -> str:
"""
Serializes the streaming callback handler.
:param streaming_callback:
The streaming callback handler function
:returns:
@ -26,6 +28,7 @@ def serialize_callback_handler(streaming_callback: Callable[[StreamingChunk], No
def deserialize_callback_handler(callback_name: str) -> Optional[Callable[[StreamingChunk], None]]:
"""
Deserializes the streaming callback handler.
:param callback_name:
The full path of the streaming callback handler function
:returns:

View File

@ -137,6 +137,7 @@ class DocumentJoiner:
def _reciprocal_rank_fusion(self, document_lists):
"""
Merge multiple lists of Documents and assign scores based on reciprocal rank fusion.
The constant k is set to 61 (60 was suggested by the original paper,
plus 1 as python lists are 0-based and the paper used 1-based ranking).
"""

View File

@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
@component(is_greedy=True)
class Multiplexer:
"""
A component which receives data connections from multiple components and distributes them to multiple components.
`Multiplexer` offers the ability to both receive data connections from multiple other
components and to distribute it to various other components, enhancing the functionality of complex data
processing pipelines.
@ -125,6 +127,8 @@ class Multiplexer:
def run(self, **kwargs):
"""
The run method of the `Multiplexer` component.
Multiplexes the input data from the upstream connected components and distributes it to the downstream connected
components.

View File

@ -12,6 +12,8 @@ logger = logging.getLogger(__name__)
@component
class DocumentCleaner:
"""
Cleans the text in the documents.
Cleans up text documents by removing extra whitespaces, empty lines, specified substrings, regexes,
page headers and footers (in this order).
@ -38,6 +40,8 @@ class DocumentCleaner:
remove_regex: Optional[str] = None,
):
"""
Initialize the DocumentCleaner.
:param remove_empty_lines: Whether to remove empty lines.
:param remove_extra_whitespaces: Whether to remove extra whitespaces.
:param remove_repeated_substrings: Whether to remove repeated substrings (headers/footers) from pages.
@ -97,6 +101,7 @@ class DocumentCleaner:
def _remove_empty_lines(self, text: str) -> str:
"""
Remove empty lines and lines that contain nothing but whitespaces from text.
:param text: Text to clean.
:returns: The text without empty lines.
"""
@ -107,6 +112,7 @@ class DocumentCleaner:
def _remove_extra_whitespaces(self, text: str) -> str:
"""
Remove extra whitespaces from text.
:param text: Text to clean.
:returns: The text without extra whitespaces.
"""
@ -115,6 +121,7 @@ class DocumentCleaner:
def _remove_regex(self, text: str, regex: str) -> str:
"""
Remove substrings that match the specified regex from the text.
:param text: Text to clean.
:param regex: Regex to match and replace substrings by "".
:returns: The text without the substrings that match the regex.
@ -124,6 +131,7 @@ class DocumentCleaner:
def _remove_substrings(self, text: str, substrings: List[str]) -> str:
"""
Remove all specified substrings from the text.
:param text: Text to clean.
:param substrings: Substrings to remove.
:returns: The text without the specified substrings.
@ -135,6 +143,7 @@ class DocumentCleaner:
def _remove_repeated_substrings(self, text: str) -> str:
"""
Remove any substrings from the text that occur repeatedly on every page. For example headers or footers.
Pages in the text need to be separated by form feed character "\f".
:param text: Text to clean.
:returns: The text without the repeated substrings.
@ -148,6 +157,7 @@ class DocumentCleaner:
) -> str:
"""
Heuristic to find footers and headers across different pages by searching for the longest common string.
Pages in the text need to be separated by form feed character "\f".
For headers, we only search in the first n_chars characters (for footer: last n_chars).
Note: This heuristic uses exact matches and therefore works well for footers like "Copyright 2019 by XXX",
@ -182,6 +192,7 @@ class DocumentCleaner:
def _ngram(self, seq: str, n: int) -> Generator[str, None, None]:
"""
Return all ngrams of length n from a text sequence. Each ngram consists of n words split by whitespace.
:param seq: The sequence to generate ngrams from.
:param n: The length of the ngrams to generate.
:returns: A Generator generating all ngrams of length n from the given sequence.
@ -202,6 +213,7 @@ class DocumentCleaner:
def _allngram(self, seq: str, min_ngram: int, max_ngram: int) -> Set[str]:
"""
Generates all possible ngrams from a given sequence of text.
Considering all ngram lengths between the minimum and maximum length.
:param seq: The sequence to generate ngrams from.
@ -217,6 +229,7 @@ class DocumentCleaner:
def _find_longest_common_ngram(self, sequences: List[str], min_ngram: int = 3, max_ngram: int = 30) -> str:
"""
Find the longest common ngram across a list of text sequences (e.g. start of pages).
Considering all ngram lengths between the minimum and maximum length. Helpful for finding footers, headers etc.
Empty sequences are ignored.

View File

@ -23,6 +23,8 @@ class DocumentSplitter:
split_overlap: int = 0,
):
"""
Initialize the DocumentSplitter.
:param split_by: The unit by which the document should be split. Choose from "word" for splitting by " ",
"sentence" for splitting by ".", "page" for splitting by "\\f" or "passage" for splitting by "\\n\\n".
:param split_length: The maximum number of units in each split.
@ -42,6 +44,8 @@ class DocumentSplitter:
@component.output_types(documents=List[Document])
def run(self, documents: List[Document]):
"""
Split documents into smaller parts.
Splits documents by the unit expressed in `split_by`, with a length of `split_length`
and an overlap of `split_overlap`.

View File

@ -8,10 +8,12 @@ from haystack import component
@component
class TextCleaner:
"""
A preprocessor component to clean text data. It can remove substrings matching a list of regular expressions,
convert text to lowercase, remove punctuation, and remove numbers.
A PreProcessor component to clean text data.
This is useful to cleanup text data before evaluation.
It can remove substrings matching a list of regular expressions, convert text to lowercase, remove punctuation,
and remove numbers.
This is useful to clean up text data before evaluation.
"""
def __init__(
@ -22,6 +24,8 @@ class TextCleaner:
remove_numbers: bool = False,
):
"""
Initialize the TextCleaner component.
:param remove_regexps: A list of regular expressions. If provided, it removes substrings
matching these regular expressions from the text.
:param convert_to_lowercase: If True, converts all characters to lowercase.

View File

@ -6,6 +6,8 @@ from haystack import Document, component
@component
class LostInTheMiddleRanker:
"""
A LostInTheMiddle Ranker.
Ranks documents based on the 'lost in the middle' order so that the most relevant documents are either at the
beginning or end, while the least relevant are in the middle.
@ -33,6 +35,8 @@ class LostInTheMiddleRanker:
def __init__(self, word_count_threshold: Optional[int] = None, top_k: Optional[int] = None):
"""
Initialize the LostInTheMiddleRanker.
If 'word_count_threshold' is specified, this ranker includes all documents up until the point where adding
another document would exceed the 'word_count_threshold'. The last document that causes the threshold to
be breached will be included in the resulting list of documents, but all subsequent documents will be

View File

@ -141,6 +141,7 @@ class MetaFieldRanker:
):
"""
Ranks a list of Documents based on the selected meta field by:
1. Sorting the Documents by the meta field in descending or ascending order.
2. Merging the rankings from the previous component and based on the meta field according to ranking mode and
weight.
@ -337,8 +338,10 @@ class MetaFieldRanker:
@staticmethod
def _calculate_rrf(rank: int, k: int = 61) -> float:
"""
Calculates the reciprocal rank fusion. The constant K is set to 61 (60 was suggested by the original paper,
plus 1 as python lists are 0-based and the [paper](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) used 1-based ranking).
Calculates the reciprocal rank fusion.
The constant K is set to 61 (60 was suggested by the original paper, plus 1 as python lists are 0-based and
the [paper](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) used 1-based ranking).
"""
return 1 / (k + rank)
@ -346,6 +349,7 @@ class MetaFieldRanker:
def _calc_linear_score(rank: int, amount: int) -> float:
"""
Calculate the meta field score as a linear score between the greatest and the lowest score in the list.
This linear scaling is useful for:
- Reducing the effect of outliers
- Creating scores that are meaningfully distributed in the range [0,1],

View File

@ -15,6 +15,8 @@ with LazyImport(message="Run 'pip install \"sentence-transformers>=2.2.0\"'") as
@component
class SentenceTransformersDiversityRanker:
"""
A Diversity Ranker based on Sentence Transformers.
Implements a document ranking algorithm that orders documents in such a way as to maximize the overall diversity
of the documents.

View File

@ -455,6 +455,8 @@ class ExtractiveReader:
self, answers: List[ExtractedAnswer], overlap_threshold: Optional[float]
) -> List[ExtractedAnswer]:
"""
De-duplicates overlapping Extractive Answers.
De-duplicates overlapping Extractive Answers from the same document based on how much the spans of the
answers overlap.

View File

@ -163,6 +163,8 @@ class ConditionalRouter:
def run(self, **kwargs):
"""
Executes the routing logic.
Executes the routing logic by evaluating the specified boolean condition expressions for each route in the order they are listed.
The method directs the flow of data to the output specified in the first route whose `condition` is True.

View File

@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
@component
class FileTypeRouter:
"""
Groups a list of data sources by their MIME types.
FileTypeRouter groups a list of data sources (file paths or byte streams) by their MIME types, allowing
for flexible routing of files to different components based on their content type. It supports both exact MIME type
matching and pattern matching using regular expressions.
@ -50,6 +52,8 @@ class FileTypeRouter:
def __init__(self, mime_types: List[str]):
"""
Initialize the FileTypeRouter component.
:param mime_types: A list of file mime types to consider when routing files
(e.g. `["text/plain", "audio/x-wav", "image/jpeg"]`).
"""

View File

@ -72,6 +72,8 @@ class MetadataRouter:
def run(self, documents: List[Document]):
"""
Route the documents.
Route the documents to different edges based on their fields content and the rules specified during initialization.
If a document does not match any of the rules, it is routed to a connection named "unmatched".

View File

@ -44,6 +44,8 @@ class TextLanguageRouter:
def __init__(self, languages: Optional[List[str]] = None):
"""
Initialize the TextLanguageRouter component.
:param languages: A list of languages in ISO code, each corresponding to a different output connection.
For supported languages, see the [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages).
If not specified, the default is `["en"]`.
@ -57,6 +59,7 @@ class TextLanguageRouter:
def run(self, text: str) -> Dict[str, str]:
"""
Route the text to one of different output connections based on its language.
If the text does not match any of the languages specified at initialization, it is routed to
a connection named "unmatched".

View File

@ -21,6 +21,7 @@ with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'") a
class TransformersZeroShotTextRouter:
"""
Routes a text input onto different output connections depending on which label it has been categorized into.
This is useful for routing queries to different models in a pipeline depending on their categorization.
The set of labels to be used for categorization can be specified.
@ -102,6 +103,8 @@ class TransformersZeroShotTextRouter:
huggingface_pipeline_kwargs: Optional[Dict[str, Any]] = None,
):
"""
Initializes the TransformersZeroShotTextRouter.
:param labels: The set of possible class labels to classify each sequence into. Can be a single label,
a string of comma-separated labels, or a list of labels.
:param multi_label: Whether or not multiple candidate labels can be true.
@ -187,8 +190,9 @@ class TransformersZeroShotTextRouter:
@component.output_types(documents=Dict[str, str])
def run(self, text: str):
"""
Run the TransformersZeroShotTextRouter. This method routes the text to one of the different edges based on which label
it has been categorized into.
Run the TransformersZeroShotTextRouter.
This method routes the text to one of the different edges based on which label it has been categorized into.
:param text: A str to route to one of the different edges.
:returns:

View File

@ -56,6 +56,7 @@ class TopPSampler:
def run(self, documents: List[Document], top_p: Optional[float] = None):
"""
Filters documents using top-p sampling based on their scores.
If the specified top_p results in no documents being selected (especially in cases of a low top_p value), the
method returns the document with the highest similarity score.
@ -113,6 +114,7 @@ class TopPSampler:
def _collect_scores(self, documents: List[Document]) -> List[float]:
"""
Collect the scores from the documents' metadata.
:param documents: List of Documents.
:return: List of scores.
"""

View File

@ -77,6 +77,8 @@ class JsonSchemaValidator:
def __init__(self, json_schema: Optional[Dict[str, Any]] = None, error_template: Optional[str] = None):
"""
Initialize the JsonSchemaValidator component.
:param json_schema: A dictionary representing the [JSON schema](https://json-schema.org/) against which
the messages' content is validated.
:param error_template: A custom template string for formatting the error message in case of validation failure.
@ -186,8 +188,9 @@ class JsonSchemaValidator:
def _recursive_json_to_object(self, data: Any) -> Any:
"""
Recursively traverses a data structure (dictionary or list), converting any string values
that are valid JSON objects into dictionary objects, and returns a new data structure.
Convert any string values that are valid JSON objects into dictionary objects.
Returns a new data structure.
:param data: The data structure to be traversed.
:return: A new data structure with JSON strings converted to dictionary objects.

View File

@ -41,6 +41,8 @@ class SearchApiWebSearch:
search_params: Optional[Dict[str, Any]] = None,
):
"""
Initialize the SearchApiWebSearch component.
:param api_key: API key for the SearchApi API
:param top_k: Number of documents to return.
:param allowed_domains: List of domains to limit the search to.

View File

@ -44,6 +44,8 @@ class SerperDevWebSearch:
search_params: Optional[Dict[str, Any]] = None,
):
"""
Initialize the SerperDevWebSearch component.
:param api_key: API key for the Serper API.
:param top_k: Number of documents to return.
:param allowed_domains: List of domains to limit the search to.

View File

@ -50,6 +50,7 @@ class DocumentWriter:
def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""

View File

@ -93,10 +93,10 @@ _COMPONENT_PRE_INIT_CALLBACK: ContextVar[Optional[Callable]] = ContextVar("compo
@contextmanager
def _hook_component_init(callback: Callable):
"""
Context manager to set a callback that will be invoked
before a component's constructor is called. The callback
receives the component class and the init parameters (as keyword
arguments) and can modify the init parameters in place.
Context manager to set a callback that will be invoked before a component's constructor is called.
The callback receives the component class and the init parameters (as keyword arguments) and can modify the init
parameters in place.
:param callback:
Callback function to invoke.
@ -165,8 +165,7 @@ class ComponentMeta(type):
def __call__(cls, *args, **kwargs):
"""
This method is called when clients instantiate a Component and
runs before __new__ and __init__.
This method is called when clients instantiate a Component and runs before __new__ and __init__.
"""
# This will call __new__ then __init__, giving us back the Component instance
pre_init_hook = _COMPONENT_PRE_INIT_CALLBACK.get()
@ -234,6 +233,7 @@ class ComponentMeta(type):
def _component_repr(component: Component) -> str:
"""
All Components override their __repr__ method with this one.
It prints the component name and the input/output sockets.
"""
result = object.__repr__(component)
@ -325,8 +325,7 @@ class _Component:
def set_output_types(self, instance, **types):
"""
Method that specifies the output types when the 'run' method is not decorated
with 'component.output_types'.
Method that specifies the output types when the 'run' method is not decorated with 'component.output_types'.
Use as:
@ -364,6 +363,8 @@ class _Component:
def output_types_decorator(run_method):
"""
Decorator that sets the output types of the decorated method.
This happens at class creation time, and since we don't have the decorated
class available here, we temporarily store the output types as an attribute of
the decorated method. The ComponentMeta metaclass will use this data to create
@ -390,9 +391,9 @@ class _Component:
def copy_class_namespace(namespace):
"""
This is the callback that `typing.new_class` will use
to populate the newly created class. We just copy
the whole namespace from the decorated class.
This is the callback that `typing.new_class` will use to populate the newly created class.
Simply copy the whole namespace from the decorated class.
"""
for key, val in dict(cls.__dict__).items():
# __dict__ and __weakref__ are class-bound, we should let Python recreate them.

View File

@ -102,8 +102,8 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str:
"""
Converts a Networkx graph into Mermaid syntax.
The output of this function can be used in the documentation with `mermaid` codeblocks, and it will
be automatically rendered.
The output of this function can be used in the documentation with `mermaid` codeblocks and will be
automatically rendered.
"""
# Copy the graph to avoid modifying the original
graph = _prepare_for_drawing(graph.copy())

View File

@ -241,10 +241,11 @@ class Pipeline:
callbacks: Optional[DeserializationCallbacks] = None,
) -> "Pipeline":
"""
Creates a `Pipeline` object from the string representation read from the file-like object passed in the `fp` argument.
Creates a `Pipeline` object a string representation.
The string representation is read from the file-like object passed in the `fp` argument.
:param data:
The string representation of the pipeline, can be `str`, `bytes` or `bytearray`.
:param fp:
A file-like object ready to be read from.
:param marshaller:
@ -312,7 +313,7 @@ class Pipeline:
Connects two components together.
All components to connect must exist in the pipeline.
If connecting to an component that has several output connections, specify the inputs and output names as
If connecting to a component that has several output connections, specify the inputs and output names as
'component_name.connections_name'.
:param sender:
@ -598,6 +599,8 @@ class Pipeline:
def _validate_input(self, data: Dict[str, Any]):
"""
Validates pipeline input data.
Validates that data:
* Each Component name actually exists in the Pipeline
* Each Component is not missing any input
@ -1047,6 +1050,8 @@ class Pipeline:
def _prepare_component_input_data(self, data: Dict[str, Any]) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Any]]:
"""
Prepares input data for pipeline components.
Organizes input data for pipeline components and identifies any inputs that are not matched to any
component's input slots.

View File

@ -22,9 +22,10 @@ class PredefinedPipeline(Enum):
class PipelineTemplate:
"""
The PipelineTemplate class enables the straightforward creation of flexible and configurable pipelines using Jinja2 templated YAML files.
The PipelineTemplate enables the creation of flexible and configurable pipelines.
Specifically designed to simplify the setup of complex data processing pipelines for
The PipelineTemplate class enables the straightforward creation of flexible and configurable pipelines using
Jinja2 templated YAML files. Specifically designed to simplify the setup of complex data processing pipelines for
a range of NLP tasksincluding question answering, retriever augmented generation (RAG), document indexing, among
others - PipelineTemplate empowers users to dynamically generate pipeline configurations from templates and
customize components as necessary. Its design philosophy centers on providing an accessible, yet powerful, tool
@ -63,9 +64,9 @@ class PipelineTemplate:
"""
Initialize a PipelineTemplate.
Besides calling the constructor directly, a set of utility methods is provided
for conveniently create an instance of `PipelineTemplate` from different sources. See `from_string`,
`from_file`, `from_predefined` and `from_url`.
Besides calling the constructor directly, a set of utility methods is provided to conveniently create an
instance of `PipelineTemplate` from different sources. See `from_string`, `from_file`, `from_predefined`
and `from_url`.
:param template_content: The raw template source to use in the template.
"""
@ -106,7 +107,9 @@ class PipelineTemplate:
@classmethod
def from_predefined(cls, predefined_pipeline: PredefinedPipeline) -> "PipelineTemplate":
"""
Create a PipelineTemplate from a predefined template. See `PredefinedPipeline` for available options.
Create a PipelineTemplate from a predefined template.
See `PredefinedPipeline` for available options.
:param predefined_pipeline: The predefined pipeline to use.
:returns: An instance of `PipelineTemplate `.

View File

@ -123,8 +123,7 @@ def default_to_dict(obj: Any, **init_parameters) -> Dict[str, Any]:
"""
Utility function to serialize an object to a dictionary.
This is mostly necessary for Components, but it can be used by any object.
This is mostly necessary for components but can be used by any object.
`init_parameters` are parameters passed to the object class `__init__`.
They must be defined explicitly as they'll be used when creating a new
instance of `obj` with `from_dict`. Omitting them might cause deserialisation
@ -165,7 +164,7 @@ def default_from_dict(cls: Type[object], data: Dict[str, Any]) -> Any:
"""
Utility function to deserialize a dictionary to an object.
This is mostly necessary for Components but, it can be used by any object.
This is mostly necessary for components but can be used by any object.
The function will raise a `DeserializationError` if the `type` field in `data` is
missing or it doesn't match the type of `cls`.

View File

@ -8,7 +8,7 @@ class SparseEmbedding:
def __init__(self, indices: List[int], values: List[float]):
"""
Initialize a sparse embedding.
Initialize a SparseEmbedding object.
:param indices: List of indices of non-zero elements in the embedding.
:param values: List of values of non-zero elements in the embedding.
@ -22,7 +22,7 @@ class SparseEmbedding:
def to_dict(self):
"""
Convert the sparse embedding to a dictionary.
Convert the SparseEmbedding object to a dictionary.
:returns:
Serialized sparse embedding.
@ -32,7 +32,7 @@ class SparseEmbedding:
@classmethod
def from_dict(cls, sparse_embedding_dict):
"""
Deserializes the sparse embedding from a dictionary.
Deserializes the sparse embedding from a dictionary.
:param sparse_embedding_dict:
Dictionary to deserialize from.