haystack/test/dataclasses/test_image_content.py
2025-07-22 09:46:47 +02:00

195 lines
7.9 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import base64
import logging
from unittest.mock import Mock, patch
import httpx
import pytest
from PIL import Image
from haystack.dataclasses.image_content import ImageContent
def test_image_content_init(base64_image_string):
image_content = ImageContent(
base64_image=base64_image_string, mime_type="image/png", detail="auto", meta={"key": "value"}
)
assert image_content.base64_image == base64_image_string
assert image_content.mime_type == "image/png"
assert image_content.detail == "auto"
assert image_content.meta == {"key": "value"}
assert image_content.validation
def test_image_content_init_with_invalid_base64_string():
with pytest.raises(ValueError):
ImageContent(base64_image="invalid_base64_string")
def test_image_content_init_with_invalid_base64_string_and_validation_false():
image_content = ImageContent(base64_image="invalid_base64_string", validation=False)
assert image_content.base64_image == "invalid_base64_string"
assert image_content.mime_type is None
assert image_content.detail is None
assert image_content.meta == {}
assert not image_content.validation
def test_image_content_init_with_invalid_mime_type(test_files_path, base64_image_string):
with pytest.raises(ValueError):
ImageContent(base64_image=base64_image_string, mime_type="text/xml")
with open(test_files_path / "docx" / "sample_docx.docx", "rb") as docx_file:
docx_base64 = base64.b64encode(docx_file.read()).decode("utf-8")
with pytest.raises(ValueError):
ImageContent(base64_image=docx_base64)
def test_image_content_init_with_invalid_mime_type_and_validation_false(test_files_path, base64_image_string):
image_content = ImageContent(base64_image=base64_image_string, mime_type="text/xml", validation=False)
assert image_content.base64_image == base64_image_string
assert image_content.mime_type == "text/xml"
assert image_content.detail is None
assert image_content.meta == {}
assert not image_content.validation
with open(test_files_path / "docx" / "sample_docx.docx", "rb") as docx_file:
docx_base64 = base64.b64encode(docx_file.read()).decode("utf-8")
image_content = ImageContent(base64_image=docx_base64, validation=False)
assert image_content.base64_image == docx_base64
assert image_content.mime_type is None
assert image_content.detail is None
assert image_content.meta == {}
assert not image_content.validation
def test_image_content_mime_type_guessing(test_files_path):
image_path = test_files_path / "images" / "apple.jpg"
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
image_content = ImageContent(base64_image=base64_image)
assert image_content.mime_type == "image/jpeg"
# do not guess mime type if mime type is provided
image_content = ImageContent(base64_image=base64_image, mime_type="image/png")
assert image_content.mime_type == "image/png"
def test_image_content_show_in_jupyter(test_files_path):
image_path = test_files_path / "images" / "apple.jpg"
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
image_content = ImageContent(base64_image=base64_image)
with (
patch("haystack.dataclasses.image_content.is_in_jupyter", return_value=True),
patch("IPython.display.display") as mock_display,
):
image_content.show()
mock_display.assert_called_once()
displayed_image = mock_display.call_args[0][0]
assert isinstance(displayed_image, Image.Image)
def test_image_content_show_outside_jupyter(test_files_path):
image_path = test_files_path / "images" / "apple.jpg"
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
image_content = ImageContent(base64_image=base64_image)
# mocking is_in_jupyter is not needed because we don't test in a Jupyter notebook
with patch.object(Image.Image, "show") as mock_show:
image_content.show()
mock_show.assert_called_once()
def test_image_content_from_file_path(test_files_path):
image_content = ImageContent.from_file_path(
file_path=test_files_path / "images" / "apple.jpg", size=(100, 100), detail="high", meta={"test": "test"}
)
assert isinstance(image_content.base64_image, str)
assert image_content.mime_type == "image/jpeg"
assert image_content.detail == "high"
assert image_content.meta == {"test": "test", "file_path": str(test_files_path / "images" / "apple.jpg")}
def test_image_content_from_file_path_pdf_unsupported(test_files_path, caplog):
with pytest.raises(IndexError):
ImageContent.from_file_path(
file_path=test_files_path / "pdf" / "sample_pdf_1.pdf",
size=(100, 100),
detail="high",
meta={"test": "test"},
)
assert "Could not convert file" in caplog.text
assert "PDF" in caplog.text
def test_image_content_from_file_path_non_existing(test_files_path, caplog):
caplog.set_level(logging.WARNING)
with pytest.raises(IndexError):
ImageContent.from_file_path(file_path=test_files_path / "images" / "non_existing.jpg")
assert "No such file" in caplog.text
def test_image_content_from_url(test_files_path):
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
with open(test_files_path / "images" / "apple.jpg", "rb") as image_file:
image_bytes = image_file.read()
mock_response = Mock(status_code=200, content=image_bytes, headers={"Content-Type": "image/jpeg"})
mock_get.return_value = mock_response
image_content = ImageContent.from_url(
url="https://example.com/apple.jpg", size=(100, 100), detail="high", meta={"test": "test"}
)
assert isinstance(image_content.base64_image, str)
assert image_content.mime_type == "image/jpeg"
assert image_content.detail == "high"
assert image_content.meta == {"test": "test", "url": "https://example.com/apple.jpg", "content_type": "image/jpeg"}
def test_image_content_from_url_bad_request():
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
mock_get.side_effect = httpx.HTTPStatusError("403 Client Error", request=Mock(), response=Mock())
with pytest.raises(httpx.HTTPStatusError):
ImageContent.from_url(url="https://non_existent_website_dot.com/image.jpg", retry_attempts=0, timeout=1)
def test_image_content_from_url_wrong_mime_type_text():
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
mock_response = Mock(status_code=200, text="a text", headers={"Content-Type": "text/plain"})
mock_get.return_value = mock_response
with pytest.raises(ValueError):
ImageContent.from_url(
url="https://example.com/text.txt", size=(100, 100), detail="high", meta={"test": "test"}
)
def test_image_content_from_url_wrong_mime_type_pdf(test_files_path):
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
with open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb") as pdf_file:
pdf_bytes = pdf_file.read()
mock_response = Mock(status_code=200, content=pdf_bytes, headers={"Content-Type": "application/pdf"})
mock_get.return_value = mock_response
with pytest.raises(ValueError):
ImageContent.from_url(
url="https://example.com/sample_pdf_1.pdf", size=(100, 100), detail="high", meta={"test": "test"}
)
@pytest.mark.integration
def test_image_content_from_url_wrong_mime_type():
with pytest.raises(ValueError):
ImageContent.from_url(url="https://example.com", size=(100, 100), detail="high", meta={"test": "test"})