haystack/test/nodes/test_image_to_text.py
Christian Clauss 9405eb90ee
ci: Fix invalid escape sequences in Python code (#5802)
* ci: Use ruff in pre-commit to further limit complexity

* Fix invalid escape sequences in Python code

* Delete releasenotes/notes/ruff-4d2504d362035166.yaml
2023-09-14 16:42:48 +02:00

94 lines
3.5 KiB
Python

import pytest
from haystack import Document
from haystack.nodes.image_to_text.transformers import TransformersImageToText
from haystack.nodes.image_to_text.base import BaseImageToText
from haystack.errors import ImageToTextError
@pytest.fixture
def image_file_paths(samples_path):
image_file_names = ["apple.jpg", "car.jpg", "cat.jpg", "galaxy.jpg", "paris.jpg"]
return [str(samples_path / "images" / file_name) for file_name in image_file_names]
@pytest.fixture
def image_docs(image_file_paths):
return [Document(content=image_path, content_type="image") for image_path in image_file_paths]
EXPECTED_CAPTIONS = [
"a red apple is sitting on a pile of hay",
"a white car parked in a parking lot",
"a cat laying in the grass",
"a blurry photo of a blurry shot of a black object",
"a city with a large building and a clock tower",
]
@pytest.fixture
def image_to_text():
return TransformersImageToText(
model_name_or_path="nlpconnect/vit-gpt2-image-captioning",
devices=["cpu"],
generation_kwargs={"max_new_tokens": 50},
)
@pytest.mark.integration
def test_image_to_text_from_files(image_to_text, image_file_paths):
assert isinstance(image_to_text, BaseImageToText)
results = image_to_text.run(file_paths=image_file_paths)
image_paths = [doc.meta["image_path"] for doc in results[0]["documents"]]
assert image_paths == image_file_paths
generated_captions = [doc.content for doc in results[0]["documents"]]
assert generated_captions == EXPECTED_CAPTIONS
@pytest.mark.integration
def test_image_to_text_from_documents(image_to_text, image_file_paths, image_docs):
results = image_to_text.run(documents=image_docs)
image_paths = [doc.meta["image_path"] for doc in results[0]["documents"]]
assert image_paths == image_file_paths
generated_captions = [doc.content for doc in results[0]["documents"]]
assert generated_captions == EXPECTED_CAPTIONS
@pytest.mark.integration
def test_image_to_text_from_files_and_documents(image_to_text, image_file_paths, image_docs):
results = image_to_text.run(file_paths=image_file_paths[:3], documents=image_docs[3:])
image_paths = [doc.meta["image_path"] for doc in results[0]["documents"]]
assert image_paths == image_file_paths
generated_captions = [doc.content for doc in results[0]["documents"]]
assert generated_captions == EXPECTED_CAPTIONS
@pytest.mark.integration
def test_image_to_text_invalid_image(image_to_text, samples_path):
markdown_path = str(samples_path / "markdown" / "sample.md")
with pytest.raises(ImageToTextError, match="cannot identify image file"):
image_to_text.run(file_paths=[markdown_path])
@pytest.mark.integration
def test_image_to_text_incorrect_path(image_to_text):
with pytest.raises(ImageToTextError, match="Incorrect path"):
image_to_text.run(file_paths=["wrong_path.jpg"])
@pytest.mark.integration
def test_image_to_text_not_image_document(image_to_text):
textual_document = Document(content="this document is textual", content_type="text")
with pytest.raises(ValueError, match="The ImageToText node only supports image documents."):
image_to_text.run(documents=[textual_document])
@pytest.mark.integration
def test_image_to_text_unsupported_model_after_loading():
with pytest.raises(
ValueError,
match=r"The model 'deepset/minilm-uncased-squad2' \(class 'BertForQuestionAnswering'\) is not supported for ImageToText",
):
_ = TransformersImageToText(model_name_or_path="deepset/minilm-uncased-squad2")