mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 03:57:19 +00:00
refactor: remove deprecated parameters from Summarizer (#3740)
* remove deprecated parameters * remove deprecation/removal test
This commit is contained in:
parent
b8fff837b4
commit
136928714c
@ -14,43 +14,34 @@ class BaseSummarizer(BaseComponent):
|
||||
outgoing_edges = 1
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
|
||||
def predict(self, documents: List[Document]) -> List[Document]:
|
||||
"""
|
||||
Abstract method for creating a summary.
|
||||
|
||||
:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
|
||||
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
|
||||
:return: List of Documents, where Document.meta["summary"] contains the summarization
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def predict_batch(
|
||||
self,
|
||||
documents: Union[List[Document], List[List[Document]]],
|
||||
generate_single_summary: Optional[bool] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
|
||||
) -> Union[List[Document], List[List[Document]]]:
|
||||
pass
|
||||
|
||||
def run(self, documents: List[Document], generate_single_summary: Optional[bool] = None): # type: ignore
|
||||
def run(self, documents: List[Document]): # type: ignore
|
||||
|
||||
results: Dict = {"documents": []}
|
||||
|
||||
if documents:
|
||||
results["documents"] = self.predict(documents=documents, generate_single_summary=generate_single_summary)
|
||||
results["documents"] = self.predict(documents=documents)
|
||||
|
||||
return results, "output_1"
|
||||
|
||||
def run_batch( # type: ignore
|
||||
self,
|
||||
documents: Union[List[Document], List[List[Document]]],
|
||||
generate_single_summary: Optional[bool] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
|
||||
):
|
||||
|
||||
results = self.predict_batch(
|
||||
documents=documents, batch_size=batch_size, generate_single_summary=generate_single_summary
|
||||
)
|
||||
results = self.predict_batch(documents=documents, batch_size=batch_size)
|
||||
|
||||
return {"documents": results}, "output_1"
|
||||
|
||||
@ -60,8 +60,6 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
min_length: int = 5,
|
||||
use_gpu: bool = True,
|
||||
clean_up_tokenization_spaces: bool = True,
|
||||
separator_for_single_summary: str = " ",
|
||||
generate_single_summary: bool = False,
|
||||
batch_size: int = 16,
|
||||
progress_bar: bool = True,
|
||||
use_auth_token: Optional[Union[str, bool]] = None,
|
||||
@ -81,9 +79,6 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
:param min_length: Minimum length of summarized text
|
||||
:param use_gpu: Whether to use GPU (if available).
|
||||
:param clean_up_tokenization_spaces: Whether or not to clean up the potential extra spaces in the text output
|
||||
:param separator_for_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
|
||||
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
|
||||
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/reference/other-api#module-document_merger).
|
||||
:param batch_size: Number of documents to process at a time.
|
||||
:param progress_bar: Whether to show a progress bar.
|
||||
:param use_auth_token: The API token used to download private models from Huggingface.
|
||||
@ -98,11 +93,6 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
if generate_single_summary is True:
|
||||
raise ValueError(
|
||||
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
|
||||
)
|
||||
|
||||
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
|
||||
if len(self.devices) > 1:
|
||||
logger.warning(
|
||||
@ -128,21 +118,14 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
self.batch_size = batch_size
|
||||
self.progress_bar = progress_bar
|
||||
|
||||
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
|
||||
def predict(self, documents: List[Document]) -> List[Document]:
|
||||
"""
|
||||
Produce the summarization from the supplied documents.
|
||||
These document can for example be retrieved via the Retriever.
|
||||
|
||||
:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
|
||||
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
|
||||
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
|
||||
:return: List of Documents, where Document.meta["summary"] contains the summarization
|
||||
"""
|
||||
if generate_single_summary is True:
|
||||
raise ValueError(
|
||||
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
|
||||
)
|
||||
|
||||
if self.min_length > self.max_length:
|
||||
raise AttributeError("min_length cannot be greater than max_length")
|
||||
|
||||
@ -183,10 +166,7 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
return result
|
||||
|
||||
def predict_batch(
|
||||
self,
|
||||
documents: Union[List[Document], List[List[Document]]],
|
||||
generate_single_summary: Optional[bool] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
|
||||
) -> Union[List[Document], List[List[Document]]]:
|
||||
"""
|
||||
Produce the summarization from the supplied documents.
|
||||
@ -194,15 +174,8 @@ class TransformersSummarizer(BaseSummarizer):
|
||||
|
||||
:param documents: Single list of related documents or list of lists of related documents
|
||||
(e.g. coming from a retriever) that the answer shall be conditioned on.
|
||||
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
|
||||
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
|
||||
:param batch_size: Number of Documents to process at a time.
|
||||
"""
|
||||
if generate_single_summary is True:
|
||||
raise ValueError(
|
||||
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
|
||||
)
|
||||
|
||||
if self.min_length > self.max_length:
|
||||
raise AttributeError("min_length cannot be greater than max_length")
|
||||
|
||||
|
||||
@ -263,14 +263,11 @@ class MockSeq2SegGenerator(BaseGenerator):
|
||||
|
||||
class MockSummarizer(BaseSummarizer):
|
||||
def predict_batch(
|
||||
self,
|
||||
documents: Union[List[Document], List[List[Document]]],
|
||||
generate_single_summary: Optional[bool] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
|
||||
) -> Union[List[Document], List[List[Document]]]:
|
||||
pass
|
||||
|
||||
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
|
||||
def predict(self, documents: List[Document]) -> List[Document]:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@ -86,20 +86,6 @@ def test_summarization_pipeline(document_store, retriever, summarizer):
|
||||
assert " The Eiffel Tower in Paris has officially opened its doors to the public." == answers[0]["answer"]
|
||||
|
||||
|
||||
haystack_version = tuple(int(num) for num in haystack.__version__.split(".")[:2])
|
||||
fail_in_v1_12 = pytest.mark.xfail(
|
||||
haystack_version >= (1, 12),
|
||||
reason="'generate_single_summary' should be removed in v1.12, as it was deprecated in v1.10",
|
||||
)
|
||||
|
||||
|
||||
@fail_in_v1_12
|
||||
def test_generate_single_summary_deprecated():
|
||||
summarizer = TransformersSummarizer(model_name_or_path="hf-internal-testing/tiny-random-bart", use_gpu=False)
|
||||
with pytest.raises(ValueError):
|
||||
summarizer.predict([Document(content="irrelevant")], generate_single_summary=True)
|
||||
|
||||
|
||||
#
|
||||
# Document Merger + Summarizer tests
|
||||
#
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user