mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-25 22:46:21 +00:00
docs: adding missing docstrings for run and run_batch methods (#5609)
* docstrings for run methods * updates from pr review * wrong article * fix style --------- Co-authored-by: anakin87 <stefanofiorucci@gmail.com> Co-authored-by: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com>
This commit is contained in:
parent
b507f1a124
commit
41fd0c5458
@ -24,11 +24,11 @@ class BaseGenerator(BaseComponent):
|
||||
"""
|
||||
Abstract method to generate answers.
|
||||
|
||||
:param query: Query
|
||||
:param query: Query string.
|
||||
:param documents: Related documents (for example, coming from a retriever) the answer should be based on.
|
||||
:param top_k: Number of returned answers.
|
||||
:param max_tokens: THe maximum number of tokens the generated answer can have.
|
||||
:return: Generated answers plus additional infos in a dict
|
||||
:param max_tokens: The maximum number of tokens the generated answer can have.
|
||||
:return: Generated answers plus additional infos in a dict.
|
||||
"""
|
||||
pass
|
||||
|
||||
@ -41,6 +41,14 @@ class BaseGenerator(BaseComponent):
|
||||
add_isolated_node_eval: bool = False,
|
||||
max_tokens: Optional[int] = None,
|
||||
): # type: ignore
|
||||
"""
|
||||
:param query: Query string.
|
||||
:param documents: List of Documents the answer should be based on.
|
||||
:param top_k: The maximum number of answers to return.
|
||||
:param labels: Labels to be used for evaluation.
|
||||
:param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation.
|
||||
:param max_tokens: The maximum number of tokens the generated answer can have.
|
||||
"""
|
||||
if documents:
|
||||
results = self.predict(query=query, documents=documents, top_k=top_k, max_tokens=max_tokens)
|
||||
else:
|
||||
@ -66,6 +74,14 @@ class BaseGenerator(BaseComponent):
|
||||
add_isolated_node_eval: bool = False,
|
||||
max_tokens: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
:param queries: List of query strings.
|
||||
:param documents: List of list of Documents the answer should be based on.
|
||||
:param top_k: The maximum number of answers to return.
|
||||
:param labels: Labels to be used for evaluation.
|
||||
:param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation.
|
||||
:param max_tokens: The maximum number of tokens the generated answer can have.
|
||||
"""
|
||||
results = self.predict_batch(
|
||||
queries=queries, documents=documents, top_k=top_k, batch_size=batch_size, max_tokens=max_tokens
|
||||
)
|
||||
|
||||
@ -28,6 +28,10 @@ class BaseDocumentClassifier(BaseComponent):
|
||||
pass
|
||||
|
||||
def run(self, documents: Union[List[dict], List[Document]], root_node: str): # type: ignore
|
||||
"""
|
||||
:param documents: A list of Document objects.
|
||||
:param root_node: The root node of the pipeline's graph.
|
||||
"""
|
||||
self.query_count += 1
|
||||
if documents:
|
||||
predict = self.timing(self.predict, "query_time")
|
||||
@ -48,6 +52,10 @@ class BaseDocumentClassifier(BaseComponent):
|
||||
return output, "output_1"
|
||||
|
||||
def run_batch(self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None): # type: ignore
|
||||
"""
|
||||
:param documents: List of list of Documents.
|
||||
:param batch_size: Number of Documents to process at a time.
|
||||
"""
|
||||
predict_batch = self.timing(self.predict_batch, "query_time")
|
||||
results = predict_batch(documents=documents, batch_size=batch_size)
|
||||
output = {"documents": results}
|
||||
|
||||
@ -29,6 +29,10 @@ class BaseImageToText(BaseComponent):
|
||||
pass
|
||||
|
||||
def run(self, file_paths: Optional[List[str]] = None, documents: Optional[List[Document]] = None): # type: ignore
|
||||
"""
|
||||
:param file_paths: Paths to the images for which you want to generate captions.
|
||||
:param documents: List of image Documents to process into text.
|
||||
"""
|
||||
if file_paths is None and documents is None:
|
||||
raise ValueError("You must either specify documents or image file_paths to process.")
|
||||
|
||||
@ -48,4 +52,8 @@ class BaseImageToText(BaseComponent):
|
||||
def run_batch( # type: ignore
|
||||
self, file_paths: Optional[List[str]] = None, documents: Optional[List[Document]] = None
|
||||
):
|
||||
"""
|
||||
:param file_paths: Paths to the images for which you want to generate captions.
|
||||
:param documents: List of image Documents to process into text.
|
||||
"""
|
||||
return self.run(file_paths=file_paths, documents=documents)
|
||||
|
||||
@ -65,6 +65,11 @@ class BaseRanker(BaseComponent):
|
||||
return docs_with_meta
|
||||
|
||||
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None): # type: ignore
|
||||
"""
|
||||
:param query: Query string.
|
||||
:param documents: List of Documents to process.
|
||||
:param top_k: The maximum number of Documents to return.
|
||||
"""
|
||||
self.query_count += 1
|
||||
if documents:
|
||||
predict = self.timing(self.predict, "query_time")
|
||||
@ -85,6 +90,12 @@ class BaseRanker(BaseComponent):
|
||||
top_k: Optional[int] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
:param queries: List of query strings.
|
||||
:param documents: List of list of Documents to process.
|
||||
:param top_k: The maximum number of answers to return.
|
||||
:param batch_size: Number of Documents to process at a time.
|
||||
"""
|
||||
self.query_count = +len(queries)
|
||||
predict_batch = self.timing(self.predict_batch, "query_time")
|
||||
results = predict_batch(queries=queries, documents=documents, top_k=top_k, batch_size=batch_size)
|
||||
|
||||
@ -91,6 +91,13 @@ class BaseReader(BaseComponent):
|
||||
return answer
|
||||
|
||||
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None, labels: Optional[MultiLabel] = None, add_isolated_node_eval: bool = False): # type: ignore
|
||||
"""
|
||||
:param query: Query string.
|
||||
:param documents: List of Documents in which Reader looks for answers.
|
||||
:param top_k: The maximum number of answers to return.
|
||||
:param labels: Labels to be used for evaluation.
|
||||
:param add_isolated_node_eval: If True, the reader will be evaluated in isolation (i.e. without a retriever).
|
||||
"""
|
||||
self.query_count += 1
|
||||
predict = self.timing(self.predict, "query_time")
|
||||
# Remove empty text documents before making predictions
|
||||
@ -150,6 +157,13 @@ class BaseReader(BaseComponent):
|
||||
labels: Optional[List[MultiLabel]] = None,
|
||||
add_isolated_node_eval: bool = False,
|
||||
):
|
||||
"""
|
||||
:param queries: List of query strings.
|
||||
:param documents: List of lists of Document in which Reader looks for answers.
|
||||
:param top_k: The maximum number of answers to return.
|
||||
:param labels: Labels to be used for evaluation.
|
||||
:param add_isolated_node_eval: If True, the reader will be evaluated in isolation (i.e. without a retriever).
|
||||
"""
|
||||
self.query_count += len(queries)
|
||||
|
||||
# Remove empty documents before making predictions
|
||||
|
||||
@ -248,6 +248,18 @@ class BaseRetriever(BaseComponent):
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
scale_score: Optional[bool] = None,
|
||||
):
|
||||
"""
|
||||
:param root_node: The root node of the pipeline's graph.
|
||||
:param query: Query string.
|
||||
:param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field.
|
||||
:param top_k: How many documents to return per query.
|
||||
:param documents: List of Documents to Retrieve.
|
||||
:param index: The name of the index in the DocumentStore from which to retrieve documents.
|
||||
:param headers: Custom HTTP headers to pass to document store client if supported (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='} for basic authentication).
|
||||
:param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]).
|
||||
If true (default), similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant.
|
||||
Otherwise, raw similarity scores (e.g. cosine or dot_product) will be used.
|
||||
"""
|
||||
if root_node == "Query":
|
||||
if query is None:
|
||||
raise HaystackError(
|
||||
@ -281,6 +293,15 @@ class BaseRetriever(BaseComponent):
|
||||
index: Optional[str] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
:param root_node: The root node of the pipeline's graph.
|
||||
:param queries: The list of query strings.
|
||||
:param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field.
|
||||
:param top_k: How many documents to return per query.
|
||||
:param documents: List of Documents of Retrieve.
|
||||
:param index: The name of the index in the DocumentStore from which to retrieve documents.
|
||||
:param headers: Custom HTTP headers to pass to document store client if supported (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='} for basic authentication).
|
||||
"""
|
||||
if root_node == "Query":
|
||||
if queries is None:
|
||||
raise HaystackError(
|
||||
|
||||
@ -30,6 +30,9 @@ class BaseSummarizer(BaseComponent):
|
||||
pass
|
||||
|
||||
def run(self, documents: List[Document]): # type: ignore
|
||||
"""
|
||||
:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
|
||||
"""
|
||||
results: Dict = {"documents": []}
|
||||
|
||||
if documents:
|
||||
@ -40,6 +43,10 @@ class BaseSummarizer(BaseComponent):
|
||||
def run_batch( # type: ignore
|
||||
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
|
||||
):
|
||||
"""
|
||||
:param documents: List of related documents.
|
||||
:param batch_size: Number of Documents to process at a time.
|
||||
"""
|
||||
results = self.predict_batch(documents=documents, batch_size=batch_size)
|
||||
|
||||
return {"documents": results}, "output_1"
|
||||
|
||||
@ -43,7 +43,14 @@ class BaseTranslator(BaseComponent):
|
||||
answers: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
||||
dict_key: Optional[str] = None,
|
||||
):
|
||||
"""Method that gets executed when this class is used as a Node in a Haystack Pipeline"""
|
||||
"""
|
||||
Method that gets executed when this class is used as a Node in a Haystack Pipeline.
|
||||
:param results: Generated QA pairs to translate.
|
||||
:param query: The query string to translate.
|
||||
:param documents: The documents to translate.
|
||||
:param answers: Passes the answers to the TranslationWrapperPipeline. See [Haystack documentation](https://docs.haystack.deepset.ai/docs/ready_made_pipelines#translationwrapperpipeline) for more details.
|
||||
:param dict_key: If you pass a dictionary in `documents`, you can specify here the field which shall be translated.
|
||||
"""
|
||||
translation_results = {}
|
||||
|
||||
if results is not None:
|
||||
@ -80,6 +87,12 @@ class BaseTranslator(BaseComponent):
|
||||
answers: Optional[Union[List[Answer], List[List[Answer]]]] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
:param queries: List of query strings to translate.
|
||||
:param documents: The documents to translate.
|
||||
:param answers:
|
||||
:param batch_size: Number of records to process at a time.
|
||||
"""
|
||||
translation_results = {}
|
||||
if queries:
|
||||
translation_results["queries"] = self.translate_batch(queries=queries)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user