mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-30 00:30:09 +00:00
195 lines
7.9 KiB
Python
195 lines
7.9 KiB
Python
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import base64
|
|
import logging
|
|
from unittest.mock import Mock, patch
|
|
|
|
import httpx
|
|
import pytest
|
|
from PIL import Image
|
|
|
|
from haystack.dataclasses.image_content import ImageContent
|
|
|
|
|
|
def test_image_content_init(base64_image_string):
|
|
image_content = ImageContent(
|
|
base64_image=base64_image_string, mime_type="image/png", detail="auto", meta={"key": "value"}
|
|
)
|
|
assert image_content.base64_image == base64_image_string
|
|
assert image_content.mime_type == "image/png"
|
|
assert image_content.detail == "auto"
|
|
assert image_content.meta == {"key": "value"}
|
|
assert image_content.validation
|
|
|
|
|
|
def test_image_content_init_with_invalid_base64_string():
|
|
with pytest.raises(ValueError):
|
|
ImageContent(base64_image="invalid_base64_string")
|
|
|
|
|
|
def test_image_content_init_with_invalid_base64_string_and_validation_false():
|
|
image_content = ImageContent(base64_image="invalid_base64_string", validation=False)
|
|
assert image_content.base64_image == "invalid_base64_string"
|
|
assert image_content.mime_type is None
|
|
assert image_content.detail is None
|
|
assert image_content.meta == {}
|
|
assert not image_content.validation
|
|
|
|
|
|
def test_image_content_init_with_invalid_mime_type(test_files_path, base64_image_string):
|
|
with pytest.raises(ValueError):
|
|
ImageContent(base64_image=base64_image_string, mime_type="text/xml")
|
|
|
|
with open(test_files_path / "docx" / "sample_docx.docx", "rb") as docx_file:
|
|
docx_base64 = base64.b64encode(docx_file.read()).decode("utf-8")
|
|
with pytest.raises(ValueError):
|
|
ImageContent(base64_image=docx_base64)
|
|
|
|
|
|
def test_image_content_init_with_invalid_mime_type_and_validation_false(test_files_path, base64_image_string):
|
|
image_content = ImageContent(base64_image=base64_image_string, mime_type="text/xml", validation=False)
|
|
assert image_content.base64_image == base64_image_string
|
|
assert image_content.mime_type == "text/xml"
|
|
assert image_content.detail is None
|
|
assert image_content.meta == {}
|
|
assert not image_content.validation
|
|
|
|
with open(test_files_path / "docx" / "sample_docx.docx", "rb") as docx_file:
|
|
docx_base64 = base64.b64encode(docx_file.read()).decode("utf-8")
|
|
image_content = ImageContent(base64_image=docx_base64, validation=False)
|
|
assert image_content.base64_image == docx_base64
|
|
assert image_content.mime_type is None
|
|
assert image_content.detail is None
|
|
assert image_content.meta == {}
|
|
assert not image_content.validation
|
|
|
|
|
|
def test_image_content_mime_type_guessing(test_files_path):
|
|
image_path = test_files_path / "images" / "apple.jpg"
|
|
with open(image_path, "rb") as image_file:
|
|
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
image_content = ImageContent(base64_image=base64_image)
|
|
assert image_content.mime_type == "image/jpeg"
|
|
|
|
# do not guess mime type if mime type is provided
|
|
image_content = ImageContent(base64_image=base64_image, mime_type="image/png")
|
|
assert image_content.mime_type == "image/png"
|
|
|
|
|
|
def test_image_content_show_in_jupyter(test_files_path):
|
|
image_path = test_files_path / "images" / "apple.jpg"
|
|
with open(image_path, "rb") as image_file:
|
|
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
image_content = ImageContent(base64_image=base64_image)
|
|
|
|
with (
|
|
patch("haystack.dataclasses.image_content.is_in_jupyter", return_value=True),
|
|
patch("IPython.display.display") as mock_display,
|
|
):
|
|
image_content.show()
|
|
|
|
mock_display.assert_called_once()
|
|
displayed_image = mock_display.call_args[0][0]
|
|
assert isinstance(displayed_image, Image.Image)
|
|
|
|
|
|
def test_image_content_show_outside_jupyter(test_files_path):
|
|
image_path = test_files_path / "images" / "apple.jpg"
|
|
with open(image_path, "rb") as image_file:
|
|
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
image_content = ImageContent(base64_image=base64_image)
|
|
|
|
# mocking is_in_jupyter is not needed because we don't test in a Jupyter notebook
|
|
with patch.object(Image.Image, "show") as mock_show:
|
|
image_content.show()
|
|
mock_show.assert_called_once()
|
|
|
|
|
|
def test_image_content_from_file_path(test_files_path):
|
|
image_content = ImageContent.from_file_path(
|
|
file_path=test_files_path / "images" / "apple.jpg", size=(100, 100), detail="high", meta={"test": "test"}
|
|
)
|
|
|
|
assert isinstance(image_content.base64_image, str)
|
|
assert image_content.mime_type == "image/jpeg"
|
|
assert image_content.detail == "high"
|
|
assert image_content.meta == {"test": "test", "file_path": str(test_files_path / "images" / "apple.jpg")}
|
|
|
|
|
|
def test_image_content_from_file_path_pdf_unsupported(test_files_path, caplog):
|
|
with pytest.raises(IndexError):
|
|
ImageContent.from_file_path(
|
|
file_path=test_files_path / "pdf" / "sample_pdf_1.pdf",
|
|
size=(100, 100),
|
|
detail="high",
|
|
meta={"test": "test"},
|
|
)
|
|
|
|
assert "Could not convert file" in caplog.text
|
|
assert "PDF" in caplog.text
|
|
|
|
|
|
def test_image_content_from_file_path_non_existing(test_files_path, caplog):
|
|
caplog.set_level(logging.WARNING)
|
|
|
|
with pytest.raises(IndexError):
|
|
ImageContent.from_file_path(file_path=test_files_path / "images" / "non_existing.jpg")
|
|
assert "No such file" in caplog.text
|
|
|
|
|
|
def test_image_content_from_url(test_files_path):
|
|
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
|
|
with open(test_files_path / "images" / "apple.jpg", "rb") as image_file:
|
|
image_bytes = image_file.read()
|
|
mock_response = Mock(status_code=200, content=image_bytes, headers={"Content-Type": "image/jpeg"})
|
|
mock_get.return_value = mock_response
|
|
|
|
image_content = ImageContent.from_url(
|
|
url="https://example.com/apple.jpg", size=(100, 100), detail="high", meta={"test": "test"}
|
|
)
|
|
|
|
assert isinstance(image_content.base64_image, str)
|
|
assert image_content.mime_type == "image/jpeg"
|
|
assert image_content.detail == "high"
|
|
assert image_content.meta == {"test": "test", "url": "https://example.com/apple.jpg", "content_type": "image/jpeg"}
|
|
|
|
|
|
def test_image_content_from_url_bad_request():
|
|
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
|
|
mock_get.side_effect = httpx.HTTPStatusError("403 Client Error", request=Mock(), response=Mock())
|
|
|
|
with pytest.raises(httpx.HTTPStatusError):
|
|
ImageContent.from_url(url="https://non_existent_website_dot.com/image.jpg", retry_attempts=0, timeout=1)
|
|
|
|
|
|
def test_image_content_from_url_wrong_mime_type_text():
|
|
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
|
|
mock_response = Mock(status_code=200, text="a text", headers={"Content-Type": "text/plain"})
|
|
mock_get.return_value = mock_response
|
|
|
|
with pytest.raises(ValueError):
|
|
ImageContent.from_url(
|
|
url="https://example.com/text.txt", size=(100, 100), detail="high", meta={"test": "test"}
|
|
)
|
|
|
|
|
|
def test_image_content_from_url_wrong_mime_type_pdf(test_files_path):
|
|
with patch("haystack.components.fetchers.link_content.httpx.Client.get") as mock_get:
|
|
with open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb") as pdf_file:
|
|
pdf_bytes = pdf_file.read()
|
|
mock_response = Mock(status_code=200, content=pdf_bytes, headers={"Content-Type": "application/pdf"})
|
|
mock_get.return_value = mock_response
|
|
|
|
with pytest.raises(ValueError):
|
|
ImageContent.from_url(
|
|
url="https://example.com/sample_pdf_1.pdf", size=(100, 100), detail="high", meta={"test": "test"}
|
|
)
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_image_content_from_url_wrong_mime_type():
|
|
with pytest.raises(ValueError):
|
|
ImageContent.from_url(url="https://example.com", size=(100, 100), detail="high", meta={"test": "test"})
|