haystack/test/components/routers/test_text_language_router.py

58 lines
2.3 KiB
Python
Raw Normal View History

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import logging
import pytest
from _pytest.logging import LogCaptureFixture
2023-11-24 14:48:43 +01:00
from haystack import Document
from haystack.components.routers import TextLanguageRouter
class TestTextLanguageRouter:
def test_non_string_input(self):
with pytest.raises(TypeError, match="TextLanguageRouter expects a string as input."):
classifier = TextLanguageRouter()
classifier.run(text=Document(content="This is an english sentence."))
def test_list_of_string(self):
with pytest.raises(TypeError, match="TextLanguageRouter expects a string as input."):
classifier = TextLanguageRouter()
classifier.run(text=["This is an english sentence."])
def test_empty_string(self):
classifier = TextLanguageRouter()
result = classifier.run(text="")
assert result == {"unmatched": ""}
def test_detect_language(self):
classifier = TextLanguageRouter()
docs: review `Routers` docstrings (#7234) * wip * review routers * small fixes * Update haystack/components/routers/conditional_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/conditional_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/file_type_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/file_type_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/file_type_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/file_type_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/metadata_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/metadata_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/text_language_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/text_language_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> * Update haystack/components/routers/text_language_router.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> --------- Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
2024-02-28 11:26:22 +01:00
detected_language = classifier._detect_language("This is an english sentence.")
assert detected_language == "en"
def test_route_to_en(self):
classifier = TextLanguageRouter()
english_sentence = "This is an english sentence."
result = classifier.run(text=english_sentence)
assert result == {"en": english_sentence}
def test_route_to_unmatched(self):
classifier = TextLanguageRouter()
german_sentence = "Ein deutscher Satz ohne Verb."
result = classifier.run(text=german_sentence)
assert result == {"unmatched": german_sentence}
def test_warning_if_no_language_detected(self, caplog: LogCaptureFixture):
with caplog.at_level(logging.WARNING):
classifier = TextLanguageRouter()
classifier.run(text=".")
assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text
def test_warning_if_no_language_detected_if_debug(self, caplog: LogCaptureFixture):
with caplog.at_level(logging.DEBUG):
classifier = TextLanguageRouter()
classifier.run(text=".")
assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text
assert "Langdetect cannot detect the language of text: ." in caplog.text