2024-05-09 15:40:36 +02:00
|
|
|
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-05-26 17:22:51 +01:00
|
|
|
|
2023-10-13 10:30:49 +02:00
|
|
|
import logging
|
|
|
|
import pytest
|
2024-02-28 09:45:50 +01:00
|
|
|
from _pytest.logging import LogCaptureFixture
|
2023-10-13 10:30:49 +02:00
|
|
|
|
2023-11-24 14:48:43 +01:00
|
|
|
from haystack import Document
|
|
|
|
from haystack.components.routers import TextLanguageRouter
|
2023-10-13 10:30:49 +02:00
|
|
|
|
|
|
|
|
2023-11-15 13:10:07 +01:00
|
|
|
class TestTextLanguageRouter:
|
2023-10-13 10:30:49 +02:00
|
|
|
def test_non_string_input(self):
|
2024-06-18 17:52:46 +02:00
|
|
|
with pytest.raises(TypeError, match="TextLanguageRouter expects a string as input."):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-31 12:44:04 +01:00
|
|
|
classifier.run(text=Document(content="This is an english sentence."))
|
2023-10-13 10:30:49 +02:00
|
|
|
|
|
|
|
def test_list_of_string(self):
|
2024-06-18 17:52:46 +02:00
|
|
|
with pytest.raises(TypeError, match="TextLanguageRouter expects a string as input."):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-13 10:30:49 +02:00
|
|
|
classifier.run(text=["This is an english sentence."])
|
|
|
|
|
|
|
|
def test_empty_string(self):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-13 10:30:49 +02:00
|
|
|
result = classifier.run(text="")
|
|
|
|
assert result == {"unmatched": ""}
|
|
|
|
|
|
|
|
def test_detect_language(self):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2024-02-28 11:26:22 +01:00
|
|
|
detected_language = classifier._detect_language("This is an english sentence.")
|
2023-10-13 10:30:49 +02:00
|
|
|
assert detected_language == "en"
|
|
|
|
|
|
|
|
def test_route_to_en(self):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-13 10:30:49 +02:00
|
|
|
english_sentence = "This is an english sentence."
|
|
|
|
result = classifier.run(text=english_sentence)
|
|
|
|
assert result == {"en": english_sentence}
|
|
|
|
|
|
|
|
def test_route_to_unmatched(self):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-13 10:30:49 +02:00
|
|
|
german_sentence = "Ein deutscher Satz ohne Verb."
|
|
|
|
result = classifier.run(text=german_sentence)
|
|
|
|
assert result == {"unmatched": german_sentence}
|
|
|
|
|
2024-02-28 09:45:50 +01:00
|
|
|
def test_warning_if_no_language_detected(self, caplog: LogCaptureFixture):
|
2023-10-13 10:30:49 +02:00
|
|
|
with caplog.at_level(logging.WARNING):
|
2023-11-15 13:10:07 +01:00
|
|
|
classifier = TextLanguageRouter()
|
2023-10-13 10:30:49 +02:00
|
|
|
classifier.run(text=".")
|
2024-02-28 09:45:50 +01:00
|
|
|
assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text
|
|
|
|
|
|
|
|
def test_warning_if_no_language_detected_if_debug(self, caplog: LogCaptureFixture):
|
|
|
|
with caplog.at_level(logging.DEBUG):
|
|
|
|
classifier = TextLanguageRouter()
|
|
|
|
classifier.run(text=".")
|
|
|
|
assert "Langdetect cannot detect the language of text. Error: No features in text." in caplog.text
|
2023-10-13 10:30:49 +02:00
|
|
|
assert "Langdetect cannot detect the language of text: ." in caplog.text
|