unstructured/test_unstructured/embed/test_embed_huggingface.py
Mallori Harrell 00635744ed
feat: Adds local embedding model (#1619)
This PR adds a local embedding model option as an alternative to using
our OpenAI embedding brick. This brick uses LangChain's
HuggingFacEmbeddings.
2023-10-19 11:51:36 -05:00

24 lines
862 B
Python

from unstructured.documents.elements import Text
from unstructured.embed.huggingface import HuggingFaceEmbeddingEncoder
def test_embed_documents_does_not_break_element_to_dict(mocker):
# Mocked client with the desired behavior for embed_documents
mock_client = mocker.MagicMock()
mock_client.embed_documents.return_value = [1, 2]
# Mock get_openai_client to return our mock_client
mocker.patch.object(
HuggingFaceEmbeddingEncoder,
"get_huggingface_client",
return_value=mock_client,
)
encoder = HuggingFaceEmbeddingEncoder()
elements = encoder.embed_documents(
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
)
assert len(elements) == 2
assert elements[0].to_dict()["text"] == "This is sentence 1"
assert elements[1].to_dict()["text"] == "This is sentence 2"