From 741dd07227e866e4de5fb35a627b465f02bcf940 Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Tue, 13 Aug 2024 14:02:58 +0200 Subject: [PATCH] clean up docstrings: TextCleaner (#8202) * update textcleaner strings * Update haystack/components/preprocessors/text_cleaner.py Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> --------- Co-authored-by: Agnieszka Marzec <97166305+agnieszka-m@users.noreply.github.com> --- .../components/preprocessors/text_cleaner.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/haystack/components/preprocessors/text_cleaner.py b/haystack/components/preprocessors/text_cleaner.py index bce5be63a..ea31a59fc 100644 --- a/haystack/components/preprocessors/text_cleaner.py +++ b/haystack/components/preprocessors/text_cleaner.py @@ -12,14 +12,14 @@ from haystack import component @component class TextCleaner: """ - A PreProcessor component to clean text data. + Cleans text strings. - It can remove substrings matching a list of regular expressions, convert text to lowercase, remove punctuation, - and remove numbers. + It can remove substrings matching a list of regular expressions, convert text to lowercase, + remove punctuation, and remove numbers. + Use it to clean up text data before evaluation. - This is useful to clean up text data before evaluation. + ### Usage example - Usage example: ```python from haystack.components.preprocessors import TextCleaner @@ -38,13 +38,12 @@ class TextCleaner: remove_numbers: bool = False, ): """ - Initialize the TextCleaner component. + Initializes the TextCleaner component. - :param remove_regexps: A list of regular expressions. If provided, it removes substrings - matching these regular expressions from the text. - :param convert_to_lowercase: If True, converts all characters to lowercase. - :param remove_punctuation: If True, removes punctuation from the text. - :param remove_numbers: If True, removes numerical digits from the text. + :param remove_regexps: A list of regex patterns to remove matching substrings from the text. + :param convert_to_lowercase: If `True`, converts all characters to lowercase. + :param remove_punctuation: If `True`, removes punctuation from the text. + :param remove_numbers: If `True`, removes numerical digits from the text. """ self._remove_regexps = remove_regexps self._convert_to_lowercase = convert_to_lowercase