| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | import logging | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  | from collections.abc import Callable, Generator, Iterable, Sequence | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  | from typing import IO, Any, Literal, Optional, Union, cast, overload | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-08 09:34:11 +08:00
										 |  |  | from configs import dify_config | 
					
						
							| 
									
										
										
										
											2024-10-17 19:12:42 +08:00
										 |  |  | from core.entities.embedding_type import EmbeddingInputType | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle | 
					
						
							|  |  |  | from core.entities.provider_entities import ModelLoadBalancingConfiguration | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.errors.error import ProviderTokenNotInitError | 
					
						
							|  |  |  | from core.model_runtime.callbacks.base_callback import Callback | 
					
						
							|  |  |  | from core.model_runtime.entities.llm_entities import LLMResult | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.model_runtime.entities.model_entities import ModelType | 
					
						
							|  |  |  | from core.model_runtime.entities.rerank_entities import RerankResult | 
					
						
							|  |  |  | from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeConnectionError, InvokeRateLimitError | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | 
					
						
							|  |  |  | from core.model_runtime.model_providers.__base.moderation_model import ModerationModel | 
					
						
							|  |  |  | from core.model_runtime.model_providers.__base.rerank_model import RerankModel | 
					
						
							|  |  |  | from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel | 
					
						
							|  |  |  | from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel | 
					
						
							| 
									
										
										
										
											2024-02-01 18:11:57 +08:00
										 |  |  | from core.model_runtime.model_providers.__base.tts_model import TTSModel | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.provider_manager import ProviderManager | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | from extensions.ext_redis import redis_client | 
					
						
							|  |  |  | from models.provider import ProviderType | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | logger = logging.getLogger(__name__) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ModelInstance: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Model instance class | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, provider_model_bundle: ProviderModelBundle, model: str) -> None: | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         self.provider_model_bundle = provider_model_bundle | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         self.model = model | 
					
						
							|  |  |  |         self.provider = provider_model_bundle.configuration.provider.provider | 
					
						
							|  |  |  |         self.credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model) | 
					
						
							| 
									
										
										
										
											2024-04-08 18:51:46 +08:00
										 |  |  |         self.model_type_instance = self.provider_model_bundle.model_type_instance | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         self.load_balancing_manager = self._get_load_balancing_manager( | 
					
						
							|  |  |  |             configuration=provider_model_bundle.configuration, | 
					
						
							|  |  |  |             model_type=provider_model_bundle.model_type_instance.model_type, | 
					
						
							|  |  |  |             model=model, | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             credentials=self.credentials, | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-26 20:45:03 +09:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _fetch_credentials_from_bundle(provider_model_bundle: ProviderModelBundle, model: str) -> dict: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Fetch credentials from provider model bundle | 
					
						
							|  |  |  |         :param provider_model_bundle: provider model bundle | 
					
						
							|  |  |  |         :param model: model name | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         configuration = provider_model_bundle.configuration | 
					
						
							|  |  |  |         model_type = provider_model_bundle.model_type_instance.model_type | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |         credentials = configuration.get_current_credentials(model_type=model_type, model=model) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if credentials is None: | 
					
						
							|  |  |  |             raise ProviderTokenNotInitError(f"Model {model} credentials is not initialized.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return credentials | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-26 20:45:03 +09:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def _get_load_balancing_manager( | 
					
						
							|  |  |  |         configuration: ProviderConfiguration, model_type: ModelType, model: str, credentials: dict | 
					
						
							|  |  |  |     ) -> Optional["LBModelManager"]: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Get load balancing model credentials | 
					
						
							|  |  |  |         :param configuration: provider configuration | 
					
						
							|  |  |  |         :param model_type: model type | 
					
						
							|  |  |  |         :param model: model name | 
					
						
							|  |  |  |         :param credentials: model credentials | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if configuration.model_settings and configuration.using_provider_type == ProviderType.CUSTOM: | 
					
						
							|  |  |  |             current_model_setting = None | 
					
						
							|  |  |  |             # check if model is disabled by admin | 
					
						
							|  |  |  |             for model_setting in configuration.model_settings: | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |                 if model_setting.model_type == model_type and model_setting.model == model: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |                     current_model_setting = model_setting | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # check if load balancing is enabled | 
					
						
							|  |  |  |             if current_model_setting and current_model_setting.load_balancing_configs: | 
					
						
							|  |  |  |                 # use load balancing proxy to choose credentials | 
					
						
							|  |  |  |                 lb_model_manager = LBModelManager( | 
					
						
							|  |  |  |                     tenant_id=configuration.tenant_id, | 
					
						
							|  |  |  |                     provider=configuration.provider.provider, | 
					
						
							|  |  |  |                     model_type=model_type, | 
					
						
							|  |  |  |                     model=model, | 
					
						
							|  |  |  |                     load_balancing_configs=current_model_setting.load_balancing_configs, | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |                     managed_credentials=credentials if configuration.custom_configuration.provider else None, | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |                 ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 return lb_model_manager | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |     @overload | 
					
						
							|  |  |  |     def invoke_llm( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         prompt_messages: list[PromptMessage], | 
					
						
							|  |  |  |         model_parameters: Optional[dict] = None, | 
					
						
							|  |  |  |         tools: Sequence[PromptMessageTool] | None = None, | 
					
						
							|  |  |  |         stop: Optional[list[str]] = None, | 
					
						
							|  |  |  |         stream: Literal[True] = True, | 
					
						
							|  |  |  |         user: Optional[str] = None, | 
					
						
							|  |  |  |         callbacks: Optional[list[Callback]] = None, | 
					
						
							|  |  |  |     ) -> Generator: ... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @overload | 
					
						
							|  |  |  |     def invoke_llm( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         prompt_messages: list[PromptMessage], | 
					
						
							|  |  |  |         model_parameters: Optional[dict] = None, | 
					
						
							|  |  |  |         tools: Sequence[PromptMessageTool] | None = None, | 
					
						
							|  |  |  |         stop: Optional[list[str]] = None, | 
					
						
							|  |  |  |         stream: Literal[False] = False, | 
					
						
							|  |  |  |         user: Optional[str] = None, | 
					
						
							|  |  |  |         callbacks: Optional[list[Callback]] = None, | 
					
						
							|  |  |  |     ) -> LLMResult: ... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @overload | 
					
						
							|  |  |  |     def invoke_llm( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         prompt_messages: list[PromptMessage], | 
					
						
							|  |  |  |         model_parameters: Optional[dict] = None, | 
					
						
							|  |  |  |         tools: Sequence[PromptMessageTool] | None = None, | 
					
						
							|  |  |  |         stop: Optional[list[str]] = None, | 
					
						
							|  |  |  |         stream: bool = True, | 
					
						
							|  |  |  |         user: Optional[str] = None, | 
					
						
							|  |  |  |         callbacks: Optional[list[Callback]] = None, | 
					
						
							|  |  |  |     ) -> Union[LLMResult, Generator]: ... | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def invoke_llm( | 
					
						
							|  |  |  |         self, | 
					
						
							| 
									
										
										
										
											2024-11-22 16:30:22 +08:00
										 |  |  |         prompt_messages: Sequence[PromptMessage], | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |         model_parameters: Optional[dict] = None, | 
					
						
							|  |  |  |         tools: Sequence[PromptMessageTool] | None = None, | 
					
						
							| 
									
										
										
										
											2024-11-22 16:30:22 +08:00
										 |  |  |         stop: Optional[Sequence[str]] = None, | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |         stream: bool = True, | 
					
						
							|  |  |  |         user: Optional[str] = None, | 
					
						
							|  |  |  |         callbacks: Optional[list[Callback]] = None, | 
					
						
							|  |  |  |     ) -> Union[LLMResult, Generator]: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke large language model | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param prompt_messages: prompt messages | 
					
						
							|  |  |  |         :param model_parameters: model parameters | 
					
						
							|  |  |  |         :param tools: tools for tool calling | 
					
						
							|  |  |  |         :param stop: stop words | 
					
						
							|  |  |  |         :param stream: is stream response | 
					
						
							|  |  |  |         :param user: unique user id | 
					
						
							|  |  |  |         :param callbacks: callbacks | 
					
						
							|  |  |  |         :return: full response or stream response chunk generator result | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, LargeLanguageModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not LargeLanguageModel") | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             Union[LLMResult, Generator], | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 prompt_messages=prompt_messages, | 
					
						
							|  |  |  |                 model_parameters=model_parameters, | 
					
						
							|  |  |  |                 tools=tools, | 
					
						
							|  |  |  |                 stop=stop, | 
					
						
							|  |  |  |                 stream=stream, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |                 callbacks=callbacks, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def get_llm_num_tokens( | 
					
						
							| 
									
										
										
										
											2025-04-11 18:04:49 +09:00
										 |  |  |         self, prompt_messages: Sequence[PromptMessage], tools: Optional[Sequence[PromptMessageTool]] = None | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     ) -> int: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Get number of tokens for llm | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param prompt_messages: prompt messages | 
					
						
							|  |  |  |         :param tools: tools for tool calling | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, LargeLanguageModel): | 
					
						
							|  |  |  |             raise Exception("Model type instance is not LargeLanguageModel") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             int, | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.get_num_tokens, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 prompt_messages=prompt_messages, | 
					
						
							|  |  |  |                 tools=tools, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-24 21:53:50 +08:00
										 |  |  |     def invoke_text_embedding( | 
					
						
							|  |  |  |         self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT | 
					
						
							|  |  |  |     ) -> TextEmbeddingResult: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke large language model | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param texts: texts to embed | 
					
						
							|  |  |  |         :param user: unique user id | 
					
						
							| 
									
										
										
										
											2024-09-24 21:53:50 +08:00
										 |  |  |         :param input_type: input type | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         :return: embeddings result | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, TextEmbeddingModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not TextEmbeddingModel") | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             TextEmbeddingResult, | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 texts=texts, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |                 input_type=input_type, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |     def get_text_embedding_num_tokens(self, texts: list[str]) -> list[int]: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Get number of tokens for text embedding | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param texts: texts to embed | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, TextEmbeddingModel): | 
					
						
							|  |  |  |             raise Exception("Model type instance is not TextEmbeddingModel") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |             list[int], | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.get_num_tokens, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 texts=texts, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def invoke_rerank( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         query: str, | 
					
						
							|  |  |  |         docs: list[str], | 
					
						
							|  |  |  |         score_threshold: Optional[float] = None, | 
					
						
							|  |  |  |         top_n: Optional[int] = None, | 
					
						
							|  |  |  |         user: Optional[str] = None, | 
					
						
							|  |  |  |     ) -> RerankResult: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke rerank model | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param query: search query | 
					
						
							|  |  |  |         :param docs: docs for reranking | 
					
						
							|  |  |  |         :param score_threshold: score threshold | 
					
						
							|  |  |  |         :param top_n: top n | 
					
						
							|  |  |  |         :param user: unique user id | 
					
						
							|  |  |  |         :return: rerank result | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, RerankModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not RerankModel") | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(RerankModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             RerankResult, | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 query=query, | 
					
						
							|  |  |  |                 docs=docs, | 
					
						
							|  |  |  |                 score_threshold=score_threshold, | 
					
						
							|  |  |  |                 top_n=top_n, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def invoke_moderation(self, text: str, user: Optional[str] = None) -> bool: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke moderation model | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param text: text to moderate | 
					
						
							|  |  |  |         :param user: unique user id | 
					
						
							|  |  |  |         :return: false if text is safe, true otherwise | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, ModerationModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not ModerationModel") | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(ModerationModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             bool, | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 text=text, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None) -> str: | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke large language model | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param file: audio file | 
					
						
							|  |  |  |         :param user: unique user id | 
					
						
							|  |  |  |         :return: text for given audio file | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, Speech2TextModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not Speech2TextModel") | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(Speech2TextModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             str, | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 file=file, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-24 01:05:37 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 10:43:49 +08:00
										 |  |  |     def invoke_tts(self, content_text: str, tenant_id: str, voice: str, user: Optional[str] = None) -> Iterable[bytes]: | 
					
						
							| 
									
										
										
										
											2024-01-24 01:05:37 +08:00
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-02-15 22:41:18 +08:00
										 |  |  |         Invoke large language tts model | 
					
						
							| 
									
										
										
										
											2024-01-24 01:05:37 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         :param content_text: text content to be translated | 
					
						
							| 
									
										
										
										
											2024-02-15 22:41:18 +08:00
										 |  |  |         :param tenant_id: user tenant id | 
					
						
							|  |  |  |         :param voice: model timbre | 
					
						
							| 
									
										
										
										
											2024-08-16 13:19:01 +07:00
										 |  |  |         :param user: unique user id | 
					
						
							| 
									
										
										
										
											2024-01-24 01:05:37 +08:00
										 |  |  |         :return: text for given audio file | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, TTSModel): | 
					
						
							| 
									
										
										
										
											2024-02-08 14:11:10 +08:00
										 |  |  |             raise Exception("Model type instance is not TTSModel") | 
					
						
							| 
									
										
										
										
											2024-01-24 01:05:37 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(TTSModel, self.model_type_instance) | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         return cast( | 
					
						
							|  |  |  |             Iterable[bytes], | 
					
						
							|  |  |  |             self._round_robin_invoke( | 
					
						
							|  |  |  |                 function=self.model_type_instance.invoke, | 
					
						
							|  |  |  |                 model=self.model, | 
					
						
							|  |  |  |                 credentials=self.credentials, | 
					
						
							|  |  |  |                 content_text=content_text, | 
					
						
							|  |  |  |                 user=user, | 
					
						
							|  |  |  |                 tenant_id=tenant_id, | 
					
						
							|  |  |  |                 voice=voice, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |     def _round_robin_invoke(self, function: Callable[..., Any], *args, **kwargs) -> Any: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Round-robin invoke | 
					
						
							|  |  |  |         :param function: function to invoke | 
					
						
							|  |  |  |         :param args: function args | 
					
						
							|  |  |  |         :param kwargs: function kwargs | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not self.load_balancing_manager: | 
					
						
							|  |  |  |             return function(*args, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         last_exception: Union[InvokeRateLimitError, InvokeAuthorizationError, InvokeConnectionError, None] = None | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         while True: | 
					
						
							|  |  |  |             lb_config = self.load_balancing_manager.fetch_next() | 
					
						
							|  |  |  |             if not lb_config: | 
					
						
							|  |  |  |                 if not last_exception: | 
					
						
							|  |  |  |                     raise ProviderTokenNotInitError("Model credentials is not initialized.") | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     raise last_exception | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             try: | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |                 if "credentials" in kwargs: | 
					
						
							|  |  |  |                     del kwargs["credentials"] | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |                 return function(*args, **kwargs, credentials=lb_config.credentials) | 
					
						
							|  |  |  |             except InvokeRateLimitError as e: | 
					
						
							|  |  |  |                 # expire in 60 seconds | 
					
						
							|  |  |  |                 self.load_balancing_manager.cooldown(lb_config, expire=60) | 
					
						
							|  |  |  |                 last_exception = e | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             except (InvokeAuthorizationError, InvokeConnectionError) as e: | 
					
						
							|  |  |  |                 # expire in 10 seconds | 
					
						
							|  |  |  |                 self.load_balancing_manager.cooldown(lb_config, expire=10) | 
					
						
							|  |  |  |                 last_exception = e | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             except Exception as e: | 
					
						
							|  |  |  |                 raise e | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-09 20:28:24 +08:00
										 |  |  |     def get_tts_voices(self, language: Optional[str] = None) -> list: | 
					
						
							| 
									
										
										
										
											2024-02-15 22:41:18 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Invoke large language tts model voices | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         :param language: tts language | 
					
						
							|  |  |  |         :return: tts model voices | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not isinstance(self.model_type_instance, TTSModel): | 
					
						
							|  |  |  |             raise Exception("Model type instance is not TTSModel") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.model_type_instance = cast(TTSModel, self.model_type_instance) | 
					
						
							|  |  |  |         return self.model_type_instance.get_tts_model_voices( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             model=self.model, credentials=self.credentials, language=language | 
					
						
							| 
									
										
										
										
											2024-02-15 22:41:18 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class ModelManager: | 
					
						
							|  |  |  |     def __init__(self) -> None: | 
					
						
							|  |  |  |         self._provider_manager = ProviderManager() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_model_instance(self, tenant_id: str, provider: str, model_type: ModelType, model: str) -> ModelInstance: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Get model instance | 
					
						
							|  |  |  |         :param tenant_id: tenant id | 
					
						
							|  |  |  |         :param provider: provider name | 
					
						
							|  |  |  |         :param model_type: model type | 
					
						
							|  |  |  |         :param model: model name | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-01-10 20:48:16 +08:00
										 |  |  |         if not provider: | 
					
						
							|  |  |  |             return self.get_default_model_instance(tenant_id, model_type) | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         provider_model_bundle = self._provider_manager.get_provider_model_bundle( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             tenant_id=tenant_id, provider=provider, model_type=model_type | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return ModelInstance(provider_model_bundle, model) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-17 17:05:13 +08:00
										 |  |  |     def get_default_provider_model_name(self, tenant_id: str, model_type: ModelType) -> tuple[str | None, str | None]: | 
					
						
							| 
									
										
										
										
											2024-08-20 23:16:43 -04:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Return first provider and the first model in the provider | 
					
						
							|  |  |  |         :param tenant_id: tenant id | 
					
						
							|  |  |  |         :param model_type: model type | 
					
						
							|  |  |  |         :return: provider name, model name | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         return self._provider_manager.get_first_provider_first_model(tenant_id, model_type) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     def get_default_model_instance(self, tenant_id: str, model_type: ModelType) -> ModelInstance: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Get default model instance | 
					
						
							|  |  |  |         :param tenant_id: tenant id | 
					
						
							|  |  |  |         :param model_type: model type | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |         default_model_entity = self._provider_manager.get_default_model(tenant_id=tenant_id, model_type=model_type) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if not default_model_entity: | 
					
						
							|  |  |  |             raise ProviderTokenNotInitError(f"Default model not found for {model_type}") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.get_model_instance( | 
					
						
							|  |  |  |             tenant_id=tenant_id, | 
					
						
							|  |  |  |             provider=default_model_entity.provider.provider, | 
					
						
							|  |  |  |             model_type=model_type, | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             model=default_model_entity.model, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class LBModelManager: | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def __init__( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         tenant_id: str, | 
					
						
							|  |  |  |         provider: str, | 
					
						
							|  |  |  |         model_type: ModelType, | 
					
						
							|  |  |  |         model: str, | 
					
						
							|  |  |  |         load_balancing_configs: list[ModelLoadBalancingConfiguration], | 
					
						
							|  |  |  |         managed_credentials: Optional[dict] = None, | 
					
						
							|  |  |  |     ) -> None: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Load balancing model manager | 
					
						
							| 
									
										
										
										
											2024-08-16 13:19:01 +07:00
										 |  |  |         :param tenant_id: tenant_id | 
					
						
							|  |  |  |         :param provider: provider | 
					
						
							|  |  |  |         :param model_type: model_type | 
					
						
							|  |  |  |         :param model: model name | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         :param load_balancing_configs: all load balancing configurations | 
					
						
							|  |  |  |         :param managed_credentials: credentials if load balancing configuration name is __inherit__ | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         self._tenant_id = tenant_id | 
					
						
							|  |  |  |         self._provider = provider | 
					
						
							|  |  |  |         self._model_type = model_type | 
					
						
							|  |  |  |         self._model = model | 
					
						
							|  |  |  |         self._load_balancing_configs = load_balancing_configs | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-22 22:58:22 +08:00
										 |  |  |         for load_balancing_config in self._load_balancing_configs[:]:  # Iterate over a shallow copy of the list | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |             if load_balancing_config.name == "__inherit__": | 
					
						
							|  |  |  |                 if not managed_credentials: | 
					
						
							|  |  |  |                     # remove __inherit__ if managed credentials is not provided | 
					
						
							|  |  |  |                     self._load_balancing_configs.remove(load_balancing_config) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     load_balancing_config.credentials = managed_credentials | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def fetch_next(self) -> Optional[ModelLoadBalancingConfiguration]: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Get next model load balancing config | 
					
						
							|  |  |  |         Strategy: Round Robin | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         cache_key = "model_lb_index:{}:{}:{}:{}".format( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             self._tenant_id, self._provider, self._model_type.value, self._model | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         cooldown_load_balancing_configs = [] | 
					
						
							|  |  |  |         max_index = len(self._load_balancing_configs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         while True: | 
					
						
							|  |  |  |             current_index = redis_client.incr(cache_key) | 
					
						
							| 
									
										
										
										
											2024-06-20 15:16:21 +08:00
										 |  |  |             current_index = cast(int, current_index) | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |             if current_index >= 10000000: | 
					
						
							|  |  |  |                 current_index = 1 | 
					
						
							|  |  |  |                 redis_client.set(cache_key, current_index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             redis_client.expire(cache_key, 3600) | 
					
						
							|  |  |  |             if current_index > max_index: | 
					
						
							|  |  |  |                 current_index = current_index % max_index | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             real_index = current_index - 1 | 
					
						
							|  |  |  |             if real_index > max_index: | 
					
						
							|  |  |  |                 real_index = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |             config: ModelLoadBalancingConfiguration = self._load_balancing_configs[real_index] | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if self.in_cooldown(config): | 
					
						
							|  |  |  |                 cooldown_load_balancing_configs.append(config) | 
					
						
							|  |  |  |                 if len(cooldown_load_balancing_configs) >= len(self._load_balancing_configs): | 
					
						
							|  |  |  |                     # all configs are in cooldown | 
					
						
							|  |  |  |                     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-08 09:34:11 +08:00
										 |  |  |             if dify_config.DEBUG: | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |                 logger.info( | 
					
						
							|  |  |  |                     f"Model LB\nid: {config.id}\nname:{config.name}\n" | 
					
						
							|  |  |  |                     f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n" | 
					
						
							|  |  |  |                     f"model_type: {self._model_type.value}\nmodel: {self._model}" | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             return config | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def cooldown(self, config: ModelLoadBalancingConfiguration, expire: int = 60) -> None: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Cooldown model load balancing config | 
					
						
							|  |  |  |         :param config: model load balancing config | 
					
						
							|  |  |  |         :param expire: cooldown time | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             self._tenant_id, self._provider, self._model_type.value, self._model, config.id | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |         redis_client.setex(cooldown_cache_key, expire, "true") | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def in_cooldown(self, config: ModelLoadBalancingConfiguration) -> bool: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check if model load balancing config is in cooldown | 
					
						
							|  |  |  |         :param config: model load balancing config | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             self._tenant_id, self._provider, self._model_type.value, self._model, config.id | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-24 18:38:51 +08:00
										 |  |  |         res: bool = redis_client.exists(cooldown_cache_key) | 
					
						
							| 
									
										
										
										
											2024-06-20 15:16:21 +08:00
										 |  |  |         return res | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-26 20:45:03 +09:00
										 |  |  |     @staticmethod | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |     def get_config_in_cooldown_and_ttl( | 
					
						
							|  |  |  |         tenant_id: str, provider: str, model_type: ModelType, model: str, config_id: str | 
					
						
							|  |  |  |     ) -> tuple[bool, int]: | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         Get model load balancing config is in cooldown and ttl | 
					
						
							|  |  |  |         :param tenant_id: workspace id | 
					
						
							|  |  |  |         :param provider: provider name | 
					
						
							|  |  |  |         :param model_type: model type | 
					
						
							|  |  |  |         :param model: model name | 
					
						
							|  |  |  |         :param config_id: model load balancing config id | 
					
						
							|  |  |  |         :return: | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format( | 
					
						
							| 
									
										
										
										
											2024-09-10 15:00:25 +08:00
										 |  |  |             tenant_id, provider, model_type.value, model, config_id | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ttl = redis_client.ttl(cooldown_cache_key) | 
					
						
							|  |  |  |         if ttl == -2: | 
					
						
							|  |  |  |             return False, 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-20 15:16:21 +08:00
										 |  |  |         ttl = cast(int, ttl) | 
					
						
							| 
									
										
										
										
											2024-06-05 00:13:04 +08:00
										 |  |  |         return True, ttl |