mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 23:20:35 +00:00

### Description Alternative to https://github.com/Unstructured-IO/unstructured/pull/3572 but maintaining all ingest tests, running them by pulling in the latest version of unstructured-ingest. --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com> Co-authored-by: Christine Straub <christinemstraub@gmail.com> Co-authored-by: christinestraub <christinestraub@users.noreply.github.com>
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
from unstructured.documents.elements import Text
|
|
from unstructured.embed.mixedbreadai import (
|
|
MixedbreadAIEmbeddingConfig,
|
|
MixedbreadAIEmbeddingEncoder,
|
|
)
|
|
|
|
|
|
def test_embed_documents_does_not_break_element_to_dict(mocker):
|
|
mock_client = mocker.MagicMock()
|
|
|
|
def mock_embeddings(
|
|
model,
|
|
normalized,
|
|
encoding_format,
|
|
truncation_strategy,
|
|
request_options,
|
|
input,
|
|
):
|
|
mock_response = mocker.MagicMock()
|
|
mock_response.data = [mocker.MagicMock(embedding=[i, i + 1]) for i in range(len(input))]
|
|
return mock_response
|
|
|
|
mock_client.embeddings.side_effect = mock_embeddings
|
|
|
|
# Mock get_client to return our mock_client
|
|
mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client)
|
|
|
|
encoder = MixedbreadAIEmbeddingEncoder(
|
|
config=MixedbreadAIEmbeddingConfig(
|
|
api_key="api_key", model_name="mixedbread-ai/mxbai-embed-large-v1"
|
|
)
|
|
)
|
|
|
|
elements = encoder.embed_documents(
|
|
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
|
|
)
|
|
assert len(elements) == 2
|
|
assert elements[0].to_dict()["text"] == "This is sentence 1"
|
|
assert elements[1].to_dict()["text"] == "This is sentence 2"
|
|
assert elements[0].embeddings is not None
|
|
assert elements[1].embeddings is not None
|