""" Jina Embedding Interface Module ========================== This module provides interfaces for interacting with jina system, including embedding capabilities. Author: Lightrag team Created: 2024-01-24 License: MIT License Copyright (c) 2024 Lightrag Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: Version: 1.0.0 Change Log: - 1.0.0 (2024-01-24): Initial release * Added embedding generation Dependencies: - tenacity - numpy - pipmaster - Python >= 3.10 Usage: from llm_interfaces.jina import jina_embed """ __version__ = "1.0.0" __author__ = "lightrag Team" __status__ = "Production" import os import pipmaster as pm # Pipmaster for dynamic library install # install specific modules if not pm.is_installed("lmdeploy"): pm.install("lmdeploy") if not pm.is_installed("tenacity"): pm.install("tenacity") import numpy as np import aiohttp async def fetch_data(url, headers, data): async with aiohttp.ClientSession() as session: async with session.post(url, headers=headers, json=data) as response: response_json = await response.json() data_list = response_json.get("data", []) return data_list async def jina_embed( texts: list[str], dimensions: int = 1024, late_chunking: bool = False, base_url: str = None, api_key: str = None, ) -> np.ndarray: if api_key: os.environ["JINA_API_KEY"] = api_key url = "https://api.jina.ai/v1/embeddings" if not base_url else base_url headers = { "Content-Type": "application/json", "Authorization": f"Bearer {os.environ['JINA_API_KEY']}", } data = { "model": "jina-embeddings-v3", "normalized": True, "embedding_type": "float", "dimensions": f"{dimensions}", "late_chunking": late_chunking, "input": texts, } data_list = await fetch_data(url, headers, data) return np.array([dp["embedding"] for dp in data_list])