| 
									
										
										
										
											2025-01-25 00:11:00 +01:00
										 |  |  | """
 | 
					
						
							|  |  |  | Jina Embedding Interface Module | 
					
						
							|  |  |  | ========================== | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This module provides interfaces for interacting with jina system, | 
					
						
							|  |  |  | including embedding capabilities. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Author: Lightrag team | 
					
						
							|  |  |  | Created: 2024-01-24 | 
					
						
							|  |  |  | License: MIT License | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Copyright (c) 2024 Lightrag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Permission is hereby granted, free of charge, to any person obtaining a copy | 
					
						
							|  |  |  | of this software and associated documentation files (the "Software"), to deal | 
					
						
							|  |  |  | in the Software without restriction, including without limitation the rights | 
					
						
							|  |  |  | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
					
						
							|  |  |  | copies of the Software, and to permit persons to whom the Software is | 
					
						
							|  |  |  | furnished to do so, subject to the following conditions: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Version: 1.0.0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Change Log: | 
					
						
							|  |  |  | - 1.0.0 (2024-01-24): Initial release | 
					
						
							|  |  |  |     * Added embedding generation | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Dependencies: | 
					
						
							|  |  |  |     - tenacity | 
					
						
							|  |  |  |     - numpy | 
					
						
							|  |  |  |     - pipmaster | 
					
						
							|  |  |  |     - Python >= 3.10 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Usage: | 
					
						
							|  |  |  |     from llm_interfaces.jina import jina_embed | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __version__ = "1.0.0" | 
					
						
							|  |  |  | __author__ = "lightrag Team" | 
					
						
							|  |  |  | __status__ = "Production" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							| 
									
										
										
										
											2025-01-25 00:55:07 +01:00
										 |  |  | import pipmaster as pm  # Pipmaster for dynamic library install | 
					
						
							| 
									
										
										
										
											2025-01-25 00:11:00 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # install specific modules | 
					
						
							|  |  |  | if not pm.is_installed("lmdeploy"): | 
					
						
							|  |  |  |     pm.install("lmdeploy") | 
					
						
							|  |  |  | if not pm.is_installed("tenacity"): | 
					
						
							|  |  |  |     pm.install("tenacity") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import numpy as np | 
					
						
							|  |  |  | import aiohttp | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | async def fetch_data(url, headers, data): | 
					
						
							|  |  |  |     async with aiohttp.ClientSession() as session: | 
					
						
							|  |  |  |         async with session.post(url, headers=headers, json=data) as response: | 
					
						
							|  |  |  |             response_json = await response.json() | 
					
						
							|  |  |  |             data_list = response_json.get("data", []) | 
					
						
							|  |  |  |             return data_list | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | async def jina_embed( | 
					
						
							|  |  |  |     texts: list[str], | 
					
						
							|  |  |  |     dimensions: int = 1024, | 
					
						
							|  |  |  |     late_chunking: bool = False, | 
					
						
							|  |  |  |     base_url: str = None, | 
					
						
							|  |  |  |     api_key: str = None, | 
					
						
							|  |  |  | ) -> np.ndarray: | 
					
						
							|  |  |  |     if api_key: | 
					
						
							|  |  |  |         os.environ["JINA_API_KEY"] = api_key | 
					
						
							|  |  |  |     url = "https://api.jina.ai/v1/embeddings" if not base_url else base_url | 
					
						
							|  |  |  |     headers = { | 
					
						
							|  |  |  |         "Content-Type": "application/json", | 
					
						
							|  |  |  |         "Authorization": f"Bearer {os.environ['JINA_API_KEY']}", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     data = { | 
					
						
							|  |  |  |         "model": "jina-embeddings-v3", | 
					
						
							|  |  |  |         "normalized": True, | 
					
						
							|  |  |  |         "embedding_type": "float", | 
					
						
							|  |  |  |         "dimensions": f"{dimensions}", | 
					
						
							|  |  |  |         "late_chunking": late_chunking, | 
					
						
							|  |  |  |         "input": texts, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     data_list = await fetch_data(url, headers, data) | 
					
						
							|  |  |  |     return np.array([dp["embedding"] for dp in data_list]) |