mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 10:03:07 +00:00 
			
		
		
		
	 9049e4e2be
			
		
	
	
		9049e4e2be
		
			
		
	
	
	
	
		
			
			### Description Alternative to https://github.com/Unstructured-IO/unstructured/pull/3572 but maintaining all ingest tests, running them by pulling in the latest version of unstructured-ingest. --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 <rbiseck3@users.noreply.github.com> Co-authored-by: Christine Straub <christinemstraub@gmail.com> Co-authored-by: christinestraub <christinestraub@users.noreply.github.com>
		
			
				
	
	
		
			42 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			42 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from unstructured.documents.elements import Text
 | |
| from unstructured.embed.mixedbreadai import (
 | |
|     MixedbreadAIEmbeddingConfig,
 | |
|     MixedbreadAIEmbeddingEncoder,
 | |
| )
 | |
| 
 | |
| 
 | |
| def test_embed_documents_does_not_break_element_to_dict(mocker):
 | |
|     mock_client = mocker.MagicMock()
 | |
| 
 | |
|     def mock_embeddings(
 | |
|         model,
 | |
|         normalized,
 | |
|         encoding_format,
 | |
|         truncation_strategy,
 | |
|         request_options,
 | |
|         input,
 | |
|     ):
 | |
|         mock_response = mocker.MagicMock()
 | |
|         mock_response.data = [mocker.MagicMock(embedding=[i, i + 1]) for i in range(len(input))]
 | |
|         return mock_response
 | |
| 
 | |
|     mock_client.embeddings.side_effect = mock_embeddings
 | |
| 
 | |
|     # Mock get_client to return our mock_client
 | |
|     mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client)
 | |
| 
 | |
|     encoder = MixedbreadAIEmbeddingEncoder(
 | |
|         config=MixedbreadAIEmbeddingConfig(
 | |
|             api_key="api_key", model_name="mixedbread-ai/mxbai-embed-large-v1"
 | |
|         )
 | |
|     )
 | |
| 
 | |
|     elements = encoder.embed_documents(
 | |
|         elements=[Text("This is sentence 1"), Text("This is sentence 2")],
 | |
|     )
 | |
|     assert len(elements) == 2
 | |
|     assert elements[0].to_dict()["text"] == "This is sentence 1"
 | |
|     assert elements[1].to_dict()["text"] == "This is sentence 2"
 | |
|     assert elements[0].embeddings is not None
 | |
|     assert elements[1].embeddings is not None
 |