From 24b23a2cc718c13645056bfb83d9270e92bf2000 Mon Sep 17 00:00:00 2001 From: Madeesh Kannan Date: Thu, 8 Feb 2024 13:39:45 +0100 Subject: [PATCH] docs: Add missing docstrings to `NamedEntityExtractor` (#6958) * docs: Add missing docstrings to `NamedEntityExtractor` * Update named_entity_extractor.py --------- Co-authored-by: Stefano Fiorucci --- .../extractors/named_entity_extractor.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/haystack/components/extractors/named_entity_extractor.py b/haystack/components/extractors/named_entity_extractor.py index 6ca988fa5..f8d8d717c 100644 --- a/haystack/components/extractors/named_entity_extractor.py +++ b/haystack/components/extractors/named_entity_extractor.py @@ -126,6 +126,9 @@ class NamedEntityExtractor: raise ComponentError(f"Unknown NER backend '{type(backend).__name__}' for extractor") def warm_up(self): + """ + Initialize the named entity extractor backend. + """ try: self._backend.initialize() except Exception as e: @@ -135,6 +138,16 @@ class NamedEntityExtractor: @component.output_types(documents=List[Document]) def run(self, documents: List[Document], batch_size: int = 1) -> Dict[str, Any]: + """ + Run the named-entity extractor. + + :param documents: + Documents to process. + :param batch_size: + Batch size used for processing the documents. + :returns: + The processed documents. + """ texts = [doc.content if doc.content is not None else "" for doc in documents] annotations = self._backend.annotate(texts, batch_size=batch_size) @@ -150,6 +163,9 @@ class NamedEntityExtractor: return {"documents": documents} def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + """ return default_to_dict( self, backend=self._backend.type, @@ -160,6 +176,12 @@ class NamedEntityExtractor: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "NamedEntityExtractor": + """ + Deserialize the component from a dictionary. + + :param data: + The dictionary to deserialize from. + """ try: init_params = data["init_parameters"] init_params["device"] = ComponentDevice.from_dict(init_params["device"])