haystack/test/preview/components/preprocessors/test_text_language_classifier.py
Silvano Cerza 7287657f0e
refactor: Rename Document's text field to content (#6181)
* Rework Document serialisation

Make Document backward compatible

Fix InMemoryDocumentStore filters

Fix InMemoryDocumentStore.bm25_retrieval

Add release notes

Fix pylint failures

Enhance Document kwargs handling and docstrings

Rename Document's text field to content

Fix e2e tests

Fix SimilarityRanker tests

Fix typo in release notes

Rename Document's metadata field to meta (#6183)

* fix bugs

* make linters happy

* fix

* more fix

* match regex

---------

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
2023-10-31 12:44:04 +01:00

53 lines
2.0 KiB
Python

import logging
import pytest
from haystack.preview import Document
from haystack.preview.components.preprocessors import TextLanguageClassifier
class TestTextLanguageClassifier:
@pytest.mark.unit
def test_non_string_input(self):
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
classifier = TextLanguageClassifier()
classifier.run(text=Document(content="This is an english sentence."))
@pytest.mark.unit
def test_list_of_string(self):
with pytest.raises(TypeError, match="TextLanguageClassifier expects a str as input."):
classifier = TextLanguageClassifier()
classifier.run(text=["This is an english sentence."])
@pytest.mark.unit
def test_empty_string(self):
classifier = TextLanguageClassifier()
result = classifier.run(text="")
assert result == {"unmatched": ""}
@pytest.mark.unit
def test_detect_language(self):
classifier = TextLanguageClassifier()
detected_language = classifier.detect_language("This is an english sentence.")
assert detected_language == "en"
@pytest.mark.unit
def test_route_to_en(self):
classifier = TextLanguageClassifier()
english_sentence = "This is an english sentence."
result = classifier.run(text=english_sentence)
assert result == {"en": english_sentence}
@pytest.mark.unit
def test_route_to_unmatched(self):
classifier = TextLanguageClassifier()
german_sentence = "Ein deutscher Satz ohne Verb."
result = classifier.run(text=german_sentence)
assert result == {"unmatched": german_sentence}
@pytest.mark.unit
def test_warning_if_no_language_detected(self, caplog):
with caplog.at_level(logging.WARNING):
classifier = TextLanguageClassifier()
classifier.run(text=".")
assert "Langdetect cannot detect the language of text: ." in caplog.text