mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-09 01:55:55 +00:00

Original PR was #3069. Merged in to a feature branch to fix dependency and linting issues. Application code changes from the original PR were already reviewed and approved. ------------ Original PR description: Adding VoyageAI embeddings Voyage AI’s embedding models and rerankers are state-of-the-art in retrieval accuracy. --------- Co-authored-by: fzowl <160063452+fzowl@users.noreply.github.com> Co-authored-by: Liuhong99 <39693953+Liuhong99@users.noreply.github.com>
22 lines
921 B
Python
22 lines
921 B
Python
from unstructured.documents.elements import Text
|
|
from unstructured.embed.voyageai import VoyageAIEmbeddingConfig, VoyageAIEmbeddingEncoder
|
|
|
|
|
|
def test_embed_documents_does_not_break_element_to_dict(mocker):
|
|
# Mocked client with the desired behavior for embed_documents
|
|
mock_client = mocker.MagicMock()
|
|
mock_client.embed_documents.return_value = [1, 2]
|
|
|
|
# Mock create_client to return our mock_client
|
|
mocker.patch.object(VoyageAIEmbeddingEncoder, "create_client", return_value=mock_client)
|
|
|
|
encoder = VoyageAIEmbeddingEncoder(
|
|
config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-law-2")
|
|
)
|
|
elements = encoder.embed_documents(
|
|
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
|
|
)
|
|
assert len(elements) == 2
|
|
assert elements[0].to_dict()["text"] == "This is sentence 1"
|
|
assert elements[1].to_dict()["text"] == "This is sentence 2"
|