diff --git a/haystack/components/routers/text_language_router.py b/haystack/components/routers/text_language_router.py index 664f342fd..eb8692041 100644 --- a/haystack/components/routers/text_language_router.py +++ b/haystack/components/routers/text_language_router.py @@ -67,7 +67,9 @@ class TextLanguageRouter: def detect_language(self, text: str) -> Optional[str]: try: language = langdetect.detect(text) - except langdetect.LangDetectException: - logger.warning("Langdetect cannot detect the language of text: %s", text) + except langdetect.LangDetectException as exception: + logger.warning("Langdetect cannot detect the language of text. Error: %s", exception) + # Only log the text in debug mode, as it might contain sensitive information + logger.debug("Langdetect cannot detect the language of text: %s", text) language = None return language diff --git a/releasenotes/notes/language-router-logging-6afed7b6b8a7ae78.yaml b/releasenotes/notes/language-router-logging-6afed7b6b8a7ae78.yaml new file mode 100644 index 000000000..bf1815d65 --- /dev/null +++ b/releasenotes/notes/language-router-logging-6afed7b6b8a7ae78.yaml @@ -0,0 +1,12 @@ +--- +security: + - | + Remove the text value from a warning log in the `TextLanguageRouter` to avoid logging sensitive information. + The text can be still be shown by switching to the `debug` log level. + + ```python + import logging + + logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING) + logging.getLogger("haystack").setLevel(logging.DEBUG) + ``` diff --git a/test/components/routers/test_text_language_router.py b/test/components/routers/test_text_language_router.py index e23bcb3e9..4082c02d8 100644 --- a/test/components/routers/test_text_language_router.py +++ b/test/components/routers/test_text_language_router.py @@ -1,5 +1,6 @@ import logging import pytest +from _pytest.logging import LogCaptureFixture from haystack import Document from haystack.components.routers import TextLanguageRouter @@ -38,8 +39,15 @@ class TestTextLanguageRouter: result = classifier.run(text=german_sentence) assert result == {"unmatched": german_sentence} - def test_warning_if_no_language_detected(self, caplog): + def test_warning_if_no_language_detected(self, caplog: LogCaptureFixture): with caplog.at_level(logging.WARNING): classifier = TextLanguageRouter() classifier.run(text=".") + assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text + + def test_warning_if_no_language_detected_if_debug(self, caplog: LogCaptureFixture): + with caplog.at_level(logging.DEBUG): + classifier = TextLanguageRouter() + classifier.run(text=".") + assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text assert "Langdetect cannot detect the language of text: ." in caplog.text