mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-23 08:52:16 +00:00

* Rework Document serialisation Make Document backward compatible Fix InMemoryDocumentStore filters Fix InMemoryDocumentStore.bm25_retrieval Add release notes Fix pylint failures Enhance Document kwargs handling and docstrings Rename Document's text field to content Fix e2e tests Fix SimilarityRanker tests Fix typo in release notes Rename Document's metadata field to meta (#6183) * fix bugs * make linters happy * fix * more fix * match regex --------- Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
53 lines
2.0 KiB
Python
53 lines
2.0 KiB
Python
import logging
|
|
import pytest
|
|
|
|
from haystack.preview import Document
|
|
from haystack.preview.components.preprocessors import TextLanguageClassifier
|
|
|
|
|
|
class TestTextLanguageClassifier:
|
|
@pytest.mark.unit
|
|
def test_non_string_input(self):
|
|
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
|
|
classifier = TextLanguageClassifier()
|
|
classifier.run(text=Document(content="This is an english sentence."))
|
|
|
|
@pytest.mark.unit
|
|
def test_list_of_string(self):
|
|
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
|
|
classifier = TextLanguageClassifier()
|
|
classifier.run(text=["This is an english sentence."])
|
|
|
|
@pytest.mark.unit
|
|
def test_empty_string(self):
|
|
classifier = TextLanguageClassifier()
|
|
result = classifier.run(text="")
|
|
assert result == {"unmatched": ""}
|
|
|
|
@pytest.mark.unit
|
|
def test_detect_language(self):
|
|
classifier = TextLanguageClassifier()
|
|
detected_language = classifier.detect_language("This is an english sentence.")
|
|
assert detected_language == "en"
|
|
|
|
@pytest.mark.unit
|
|
def test_route_to_en(self):
|
|
classifier = TextLanguageClassifier()
|
|
english_sentence = "This is an english sentence."
|
|
result = classifier.run(text=english_sentence)
|
|
assert result == {"en": english_sentence}
|
|
|
|
@pytest.mark.unit
|
|
def test_route_to_unmatched(self):
|
|
classifier = TextLanguageClassifier()
|
|
german_sentence = "Ein deutscher Satz ohne Verb."
|
|
result = classifier.run(text=german_sentence)
|
|
assert result == {"unmatched": german_sentence}
|
|
|
|
@pytest.mark.unit
|
|
def test_warning_if_no_language_detected(self, caplog):
|
|
with caplog.at_level(logging.WARNING):
|
|
classifier = TextLanguageClassifier()
|
|
classifier.run(text=".")
|
|
assert "Langdetect cannot detect the language of text: ." in caplog.text
|