docs: adding missing docstrings for run and run_batch methods (#5609)

* docstrings for run methods * updates from pr review * wrong article * fix style --------- Co-authored-by: anakin87 <stefanofiorucci@gmail.com> Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
2025-11-25 22:46:21 +00:00 · 2023-10-13 11:23:26 +02:00 · 2023-10-13 11:23:26 +02:00 · 41fd0c5458
commit 41fd0c5458
parent b507f1a124
8 changed files with 102 additions and 4 deletions
--- a/haystack/nodes/answer_generator/base.py
+++ b/haystack/nodes/answer_generator/base.py
@ -24,11 +24,11 @@ class BaseGenerator(BaseComponent):
        """
        Abstract method to generate answers.

-        :param query: Query
+        :param query: Query string.
        :param documents: Related documents (for example, coming from a retriever) the answer should be based on.
        :param top_k: Number of returned answers.
-        :param max_tokens: THe maximum number of tokens the generated answer can have.
-        :return: Generated answers plus additional infos in a dict
+        :param max_tokens: The maximum number of tokens the generated answer can have.
+        :return: Generated answers plus additional infos in a dict.
        """
        pass

@ -41,6 +41,14 @@ class BaseGenerator(BaseComponent):
        add_isolated_node_eval: bool = False,
        max_tokens: Optional[int] = None,
    ):  # type: ignore
+        """
+        :param query: Query string.
+        :param documents: List of Documents the answer should be based on.
+        :param top_k: The maximum number of answers to return.
+        :param labels: Labels to be used for evaluation.
+        :param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation.
+        :param max_tokens: The maximum number of tokens the generated answer can have.
+        """
        if documents:
            results = self.predict(query=query, documents=documents, top_k=top_k, max_tokens=max_tokens)
        else:
@ -66,6 +74,14 @@ class BaseGenerator(BaseComponent):
        add_isolated_node_eval: bool = False,
        max_tokens: Optional[int] = None,
    ):
+        """
+        :param queries: List of query strings.
+        :param documents: List of list of Documents the answer should be based on.
+        :param top_k: The maximum number of answers to return.
+        :param labels: Labels to be used for evaluation.
+        :param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation.
+        :param max_tokens: The maximum number of tokens the generated answer can have.
+        """
        results = self.predict_batch(
            queries=queries, documents=documents, top_k=top_k, batch_size=batch_size, max_tokens=max_tokens
        )
--- a/haystack/nodes/document_classifier/base.py
+++ b/haystack/nodes/document_classifier/base.py
@ -28,6 +28,10 @@ class BaseDocumentClassifier(BaseComponent):
        pass

    def run(self, documents: Union[List[dict], List[Document]], root_node: str):  # type: ignore
+        """
+        :param documents: A list of Document objects.
+        :param root_node: The root node of the pipeline's graph.
+        """
        self.query_count += 1
        if documents:
            predict = self.timing(self.predict, "query_time")
@ -48,6 +52,10 @@ class BaseDocumentClassifier(BaseComponent):
        return output, "output_1"

    def run_batch(self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None):  # type: ignore
+        """
+        :param documents: List of list of Documents.
+        :param batch_size: Number of Documents to process at a time.
+        """
        predict_batch = self.timing(self.predict_batch, "query_time")
        results = predict_batch(documents=documents, batch_size=batch_size)
        output = {"documents": results}
--- a/haystack/nodes/image_to_text/base.py
+++ b/haystack/nodes/image_to_text/base.py
@ -29,6 +29,10 @@ class BaseImageToText(BaseComponent):
        pass

    def run(self, file_paths: Optional[List[str]] = None, documents: Optional[List[Document]] = None):  # type: ignore
+        """
+        :param file_paths: Paths to the images for which you want to generate captions.
+        :param documents: List of image Documents to process into text.
+        """
        if file_paths is None and documents is None:
            raise ValueError("You must either specify documents or image file_paths to process.")

@ -48,4 +52,8 @@ class BaseImageToText(BaseComponent):
    def run_batch(  # type: ignore
        self, file_paths: Optional[List[str]] = None, documents: Optional[List[Document]] = None
    ):
+        """
+        :param file_paths: Paths to the images for which you want to generate captions.
+        :param documents: List of image Documents to process into text.
+        """
        return self.run(file_paths=file_paths, documents=documents)
--- a/haystack/nodes/ranker/base.py
+++ b/haystack/nodes/ranker/base.py
@ -65,6 +65,11 @@ class BaseRanker(BaseComponent):
        return docs_with_meta

    def run(self, query: str, documents: List[Document], top_k: Optional[int] = None):  # type: ignore
+        """
+        :param query: Query string.
+        :param documents: List of Documents to process.
+        :param top_k: The maximum number of Documents to return.
+        """
        self.query_count += 1
        if documents:
            predict = self.timing(self.predict, "query_time")
@ -85,6 +90,12 @@ class BaseRanker(BaseComponent):
        top_k: Optional[int] = None,
        batch_size: Optional[int] = None,
    ):
+        """
+        :param queries: List of query strings.
+        :param documents: List of list of Documents to process.
+        :param top_k: The maximum number of answers to return.
+        :param batch_size: Number of Documents to process at a time.
+        """
        self.query_count = +len(queries)
        predict_batch = self.timing(self.predict_batch, "query_time")
        results = predict_batch(queries=queries, documents=documents, top_k=top_k, batch_size=batch_size)
--- a/haystack/nodes/reader/base.py
+++ b/haystack/nodes/reader/base.py
@ -91,6 +91,13 @@ class BaseReader(BaseComponent):
        return answer

    def run(self, query: str, documents: List[Document], top_k: Optional[int] = None, labels: Optional[MultiLabel] = None, add_isolated_node_eval: bool = False):  # type: ignore
+        """
+        :param query: Query string.
+        :param documents: List of Documents in which Reader looks for answers.
+        :param top_k: The maximum number of answers to return.
+        :param labels: Labels to be used for evaluation.
+        :param add_isolated_node_eval: If True, the reader will be evaluated in isolation (i.e. without a retriever).
+        """
        self.query_count += 1
        predict = self.timing(self.predict, "query_time")
        # Remove empty text documents before making predictions
@ -150,6 +157,13 @@ class BaseReader(BaseComponent):
        labels: Optional[List[MultiLabel]] = None,
        add_isolated_node_eval: bool = False,
    ):
+        """
+        :param queries: List of query strings.
+        :param documents: List of lists of Document in which Reader looks for answers.
+        :param top_k: The maximum number of answers to return.
+        :param labels: Labels to be used for evaluation.
+        :param add_isolated_node_eval: If True, the reader will be evaluated in isolation (i.e. without a retriever).
+        """
        self.query_count += len(queries)

        # Remove empty documents before making predictions
--- a/haystack/nodes/retriever/base.py
+++ b/haystack/nodes/retriever/base.py
@ -248,6 +248,18 @@ class BaseRetriever(BaseComponent):
        headers: Optional[Dict[str, str]] = None,
        scale_score: Optional[bool] = None,
    ):
+        """
+        :param root_node: The root node of the pipeline's graph.
+        :param query: Query string.
+        :param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field.
+        :param top_k: How many documents to return per query.
+        :param documents: List of Documents to Retrieve.
+        :param index: The name of the index in the DocumentStore from which to retrieve documents.
+        :param headers: Custom HTTP headers to pass to document store client if supported (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='} for basic authentication).
+        :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]).
+                            If true (default), similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant.
+                            Otherwise, raw similarity scores (e.g. cosine or dot_product) will be used.
+        """
        if root_node == "Query":
            if query is None:
                raise HaystackError(
@ -281,6 +293,15 @@ class BaseRetriever(BaseComponent):
        index: Optional[str] = None,
        headers: Optional[Dict[str, str]] = None,
    ):
+        """
+        :param root_node: The root node of the pipeline's graph.
+        :param queries: The list of query strings.
+        :param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field.
+        :param top_k: How many documents to return per query.
+        :param documents: List of Documents of Retrieve.
+        :param index: The name of the index in the DocumentStore from which to retrieve documents.
+        :param headers: Custom HTTP headers to pass to document store client if supported (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='} for basic authentication).
+        """
        if root_node == "Query":
            if queries is None:
                raise HaystackError(
--- a/haystack/nodes/summarizer/base.py
+++ b/haystack/nodes/summarizer/base.py
@ -30,6 +30,9 @@ class BaseSummarizer(BaseComponent):
        pass

    def run(self, documents: List[Document]):  # type: ignore
+        """
+        :param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
+        """
        results: Dict = {"documents": []}

        if documents:
@ -40,6 +43,10 @@ class BaseSummarizer(BaseComponent):
    def run_batch(  # type: ignore
        self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
    ):
+        """
+        :param documents: List of related documents.
+        :param batch_size: Number of Documents to process at a time.
+        """
        results = self.predict_batch(documents=documents, batch_size=batch_size)

        return {"documents": results}, "output_1"
--- a/haystack/nodes/translator/base.py
+++ b/haystack/nodes/translator/base.py
@ -43,7 +43,14 @@ class BaseTranslator(BaseComponent):
        answers: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
        dict_key: Optional[str] = None,
    ):
-        """Method that gets executed when this class is used as a Node in a Haystack Pipeline"""
+        """
+        Method that gets executed when this class is used as a Node in a Haystack Pipeline.
+        :param results: Generated QA pairs to translate.
+        :param query: The query string to translate.
+        :param documents: The documents to translate.
+        :param answers: Passes the answers to the TranslationWrapperPipeline. See [Haystack documentation](https://docs.haystack.deepset.ai/docs/ready_made_pipelines#translationwrapperpipeline) for more details.
+        :param dict_key: If you pass a dictionary in `documents`, you can specify here the field which shall be translated.
+        """
        translation_results = {}

        if results is not None:
@ -80,6 +87,12 @@ class BaseTranslator(BaseComponent):
        answers: Optional[Union[List[Answer], List[List[Answer]]]] = None,
        batch_size: Optional[int] = None,
    ):
+        """
+        :param queries: List of query strings to translate.
+        :param documents: The documents to translate.
+        :param answers:
+        :param batch_size: Number of records to process at a time.
+        """
        translation_results = {}
        if queries:
            translation_results["queries"] = self.translate_batch(queries=queries)