haystack/test/preview/components/preprocessors/test_text_language_classifier.py
Julian Risch 9f3b6512be
refactor: Remove reimplementations of default from_dict/to_dict and corresponding tests in 2.0 (#6108)
* whisper transcriber

* remove from/to_dict from builders

* remove from/to_dict from embedders

* remove from/to_dict from fetcher, file_converters

* remove from/to_dict from generators, preprocessors

* remove from/to_dict from ranker, reader

* remove from/to_dict from router, sampler, websearch

* pylint

* reno

* refactor import

* remove unused import
2023-10-19 11:17:02 +02:00

53 lines
1.9 KiB
Python

import logging
import pytest
from haystack.preview import Document
from haystack.preview.components.preprocessors import TextLanguageClassifier
class TestTextLanguageClassifier:
@pytest.mark.unit
def test_non_string_input(self):
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
classifier = TextLanguageClassifier()
classifier.run(text=Document(text="This is an english sentence."))
@pytest.mark.unit
def test_list_of_string(self):
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
classifier = TextLanguageClassifier()
classifier.run(text=["This is an english sentence."])
@pytest.mark.unit
def test_empty_string(self):
classifier = TextLanguageClassifier()
result = classifier.run(text="")
assert result == {"unmatched": ""}
@pytest.mark.unit
def test_detect_language(self):
classifier = TextLanguageClassifier()
detected_language = classifier.detect_language("This is an english sentence.")
assert detected_language == "en"
@pytest.mark.unit
def test_route_to_en(self):
classifier = TextLanguageClassifier()
english_sentence = "This is an english sentence."
result = classifier.run(text=english_sentence)
assert result == {"en": english_sentence}
@pytest.mark.unit
def test_route_to_unmatched(self):
classifier = TextLanguageClassifier()
german_sentence = "Ein deutscher Satz ohne Verb."
result = classifier.run(text=german_sentence)
assert result == {"unmatched": german_sentence}
@pytest.mark.unit
def test_warning_if_no_language_detected(self, caplog):
with caplog.at_level(logging.WARNING):
classifier = TextLanguageClassifier()
classifier.run(text=".")
assert "Langdetect cannot detect the language of text: ." in caplog.text