refactor: remove deprecated parameters from Summarizer (#3740)

* remove deprecated parameters

* remove deprecation/removal test
This commit is contained in:
Stefano Fiorucci 2022-12-29 11:07:47 +01:00 committed by GitHub
parent b8fff837b4
commit 136928714c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 10 additions and 63 deletions

View File

@ -14,43 +14,34 @@ class BaseSummarizer(BaseComponent):
outgoing_edges = 1
@abstractmethod
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
"""
Abstract method for creating a summary.
:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
:return: List of Documents, where Document.meta["summary"] contains the summarization
"""
pass
@abstractmethod
def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
pass
def run(self, documents: List[Document], generate_single_summary: Optional[bool] = None): # type: ignore
def run(self, documents: List[Document]): # type: ignore
results: Dict = {"documents": []}
if documents:
results["documents"] = self.predict(documents=documents, generate_single_summary=generate_single_summary)
results["documents"] = self.predict(documents=documents)
return results, "output_1"
def run_batch( # type: ignore
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
):
results = self.predict_batch(
documents=documents, batch_size=batch_size, generate_single_summary=generate_single_summary
)
results = self.predict_batch(documents=documents, batch_size=batch_size)
return {"documents": results}, "output_1"

View File

@ -60,8 +60,6 @@ class TransformersSummarizer(BaseSummarizer):
min_length: int = 5,
use_gpu: bool = True,
clean_up_tokenization_spaces: bool = True,
separator_for_single_summary: str = " ",
generate_single_summary: bool = False,
batch_size: int = 16,
progress_bar: bool = True,
use_auth_token: Optional[Union[str, bool]] = None,
@ -81,9 +79,6 @@ class TransformersSummarizer(BaseSummarizer):
:param min_length: Minimum length of summarized text
:param use_gpu: Whether to use GPU (if available).
:param clean_up_tokenization_spaces: Whether or not to clean up the potential extra spaces in the text output
:param separator_for_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/reference/other-api#module-document_merger).
:param batch_size: Number of documents to process at a time.
:param progress_bar: Whether to show a progress bar.
:param use_auth_token: The API token used to download private models from Huggingface.
@ -98,11 +93,6 @@ class TransformersSummarizer(BaseSummarizer):
"""
super().__init__()
if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1:
logger.warning(
@ -128,21 +118,14 @@ class TransformersSummarizer(BaseSummarizer):
self.batch_size = batch_size
self.progress_bar = progress_bar
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
"""
Produce the summarization from the supplied documents.
These document can for example be retrieved via the Retriever.
:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
:return: List of Documents, where Document.meta["summary"] contains the summarization
"""
if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)
if self.min_length > self.max_length:
raise AttributeError("min_length cannot be greater than max_length")
@ -183,10 +166,7 @@ class TransformersSummarizer(BaseSummarizer):
return result
def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
"""
Produce the summarization from the supplied documents.
@ -194,15 +174,8 @@ class TransformersSummarizer(BaseSummarizer):
:param documents: Single list of related documents or list of lists of related documents
(e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
:param batch_size: Number of Documents to process at a time.
"""
if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)
if self.min_length > self.max_length:
raise AttributeError("min_length cannot be greater than max_length")

View File

@ -263,14 +263,11 @@ class MockSeq2SegGenerator(BaseGenerator):
class MockSummarizer(BaseSummarizer):
def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
pass
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
pass

View File

@ -86,20 +86,6 @@ def test_summarization_pipeline(document_store, retriever, summarizer):
assert " The Eiffel Tower in Paris has officially opened its doors to the public." == answers[0]["answer"]
haystack_version = tuple(int(num) for num in haystack.__version__.split(".")[:2])
fail_in_v1_12 = pytest.mark.xfail(
haystack_version >= (1, 12),
reason="'generate_single_summary' should be removed in v1.12, as it was deprecated in v1.10",
)
@fail_in_v1_12
def test_generate_single_summary_deprecated():
summarizer = TransformersSummarizer(model_name_or_path="hf-internal-testing/tiny-random-bart", use_gpu=False)
with pytest.raises(ValueError):
summarizer.predict([Document(content="irrelevant")], generate_single_summary=True)
#
# Document Merger + Summarizer tests
#