unstructured/test_unstructured/embed/test_mixedbreadai.py
Roman Isecke 9049e4e2be
feat/remove ingest code, use new dep for tests (#3595)
### Description
Alternative to https://github.com/Unstructured-IO/unstructured/pull/3572
but maintaining all ingest tests, running them by pulling in the latest
version of unstructured-ingest.

---------

Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com>
Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com>
Co-authored-by: Christine Straub <christinemstraub@gmail.com>
Co-authored-by: christinestraub <christinestraub@users.noreply.github.com>
2024-10-15 10:01:34 -05:00

42 lines
1.3 KiB
Python

from unstructured.documents.elements import Text
from unstructured.embed.mixedbreadai import (
MixedbreadAIEmbeddingConfig,
MixedbreadAIEmbeddingEncoder,
)
def test_embed_documents_does_not_break_element_to_dict(mocker):
mock_client = mocker.MagicMock()
def mock_embeddings(
model,
normalized,
encoding_format,
truncation_strategy,
request_options,
input,
):
mock_response = mocker.MagicMock()
mock_response.data = [mocker.MagicMock(embedding=[i, i + 1]) for i in range(len(input))]
return mock_response
mock_client.embeddings.side_effect = mock_embeddings
# Mock get_client to return our mock_client
mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client)
encoder = MixedbreadAIEmbeddingEncoder(
config=MixedbreadAIEmbeddingConfig(
api_key="api_key", model_name="mixedbread-ai/mxbai-embed-large-v1"
)
)
elements = encoder.embed_documents(
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
)
assert len(elements) == 2
assert elements[0].to_dict()["text"] == "This is sentence 1"
assert elements[1].to_dict()["text"] == "This is sentence 2"
assert elements[0].embeddings is not None
assert elements[1].embeddings is not None