mirror of
				https://github.com/langgenius/dify.git
				synced 2025-10-30 18:33:30 +00:00 
			
		
		
		
	 403e2d58b9
			
		
	
	
		403e2d58b9
		
			
		
	
	
	
	
		
			
			Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: xhe <xw897002528@gmail.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: takatost <takatost@gmail.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Novice Lee <novicelee@NoviPro.local> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: AkaraChen <akarachen@outlook.com> Co-authored-by: Yi <yxiaoisme@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: AkaraChen <85140972+AkaraChen@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Hiroki Nagai <82458324+nagaihiroki-git@users.noreply.github.com> Co-authored-by: Gen Sato <52241300+halogen22@users.noreply.github.com> Co-authored-by: eux <euxuuu@gmail.com> Co-authored-by: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com> Co-authored-by: lotsik <lotsik@mail.ru> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: gakkiyomi <gakkiyomi@aliyun.com> Co-authored-by: CN-P5 <heibai2006@gmail.com> Co-authored-by: CN-P5 <heibai2006@qq.com> Co-authored-by: Chuehnone <1897025+chuehnone@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Boris Feld <lothiraldan@gmail.com> Co-authored-by: mbo <himabo@gmail.com> Co-authored-by: mabo <mabo@aeyes.ai> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: jiandanfeng <chenjh3@wangsu.com> Co-authored-by: zhu-an <70234959+xhdd123321@users.noreply.github.com> Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com> Co-authored-by: 海狸大師 <86974027+yenslife@users.noreply.github.com> Co-authored-by: Xu Song <xusong.vip@gmail.com> Co-authored-by: rayshaw001 <396301947@163.com> Co-authored-by: Ding Jiatong <dingjiatong@gmail.com> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: le0zh <newlight@qq.com> Co-authored-by: zhuxinliang <zhuxinliang@didiglobal.com> Co-authored-by: k-zaku <zaku99@outlook.jp> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: jiangbo721 <365065261@qq.com> Co-authored-by: 刘江波 <jiangbo721@163.com> Co-authored-by: Shun Miyazawa <34241526+miya@users.noreply.github.com> Co-authored-by: EricPan <30651140+Egfly@users.noreply.github.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: sino <sino2322@gmail.com> Co-authored-by: Jhvcc <37662342+Jhvcc@users.noreply.github.com> Co-authored-by: lowell <lowell.hu@zkteco.in> Co-authored-by: Boris Polonsky <BorisPolonsky@users.noreply.github.com> Co-authored-by: Ademílson Tonato <ademilsonft@outlook.com> Co-authored-by: Ademílson Tonato <ademilson.tonato@refurbed.com> Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com> Co-authored-by: Yueh-Po Peng (Yabi) <94939112+y10ab1@users.noreply.github.com> Co-authored-by: Jason <ggbbddjm@gmail.com> Co-authored-by: Xin Zhang <sjhpzx@gmail.com> Co-authored-by: yjc980121 <3898524+yjc980121@users.noreply.github.com> Co-authored-by: heyszt <36215648+hieheihei@users.noreply.github.com> Co-authored-by: Abdullah AlOsaimi <osaimiacc@gmail.com> Co-authored-by: Abdullah AlOsaimi <189027247+osaimi@users.noreply.github.com> Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: Hash Brown <hi@xzd.me> Co-authored-by: zuodongxu <192560071+zuodongxu@users.noreply.github.com> Co-authored-by: Masashi Tomooka <tmokmss@users.noreply.github.com> Co-authored-by: aplio <ryo.091219@gmail.com> Co-authored-by: Obada Khalili <54270856+obadakhalili@users.noreply.github.com> Co-authored-by: Nam Vu <zuzoovn@gmail.com> Co-authored-by: Kei YAMAZAKI <1715090+kei-yamazaki@users.noreply.github.com> Co-authored-by: TechnoHouse <13776377+deephbz@users.noreply.github.com> Co-authored-by: Riddhimaan-Senapati <114703025+Riddhimaan-Senapati@users.noreply.github.com> Co-authored-by: MaFee921 <31881301+2284730142@users.noreply.github.com> Co-authored-by: te-chan <t-nakanome@sakura-is.co.jp> Co-authored-by: HQidea <HQidea@users.noreply.github.com> Co-authored-by: Joshbly <36315710+Joshbly@users.noreply.github.com> Co-authored-by: xhe <xw897002528@gmail.com> Co-authored-by: weiwenyan-dev <154779315+weiwenyan-dev@users.noreply.github.com> Co-authored-by: ex_wenyan.wei <ex_wenyan.wei@tcl.com> Co-authored-by: engchina <12236799+engchina@users.noreply.github.com> Co-authored-by: engchina <atjapan2015@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: 呆萌闷油瓶 <253605712@qq.com> Co-authored-by: Kemal <kemalmeler@outlook.com> Co-authored-by: Lazy_Frog <4590648+lazyFrogLOL@users.noreply.github.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: Steven sun <98230804+Tuyohai@users.noreply.github.com> Co-authored-by: steven <sunzwj@digitalchina.com> Co-authored-by: Kalo Chin <91766386+fdb02983rhy@users.noreply.github.com> Co-authored-by: Katy Tao <34019945+KatyTao@users.noreply.github.com> Co-authored-by: depy <42985524+h4ckdepy@users.noreply.github.com> Co-authored-by: 胡春东 <gycm520@gmail.com> Co-authored-by: Junjie.M <118170653@qq.com> Co-authored-by: MuYu <mr.muzea@gmail.com> Co-authored-by: Naoki Takashima <39912547+takatea@users.noreply.github.com> Co-authored-by: Summer-Gu <37869445+gubinjie@users.noreply.github.com> Co-authored-by: Fei He <droxer.he@gmail.com> Co-authored-by: ybalbert001 <120714773+ybalbert001@users.noreply.github.com> Co-authored-by: Yuanbo Li <ybalbert@amazon.com> Co-authored-by: douxc <7553076+douxc@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Wu Jiayang <62842862+Wu-Jiayang@users.noreply.github.com> Co-authored-by: Your Name <you@example.com> Co-authored-by: kimjion <45935338+kimjion@users.noreply.github.com> Co-authored-by: AugNSo <song.tiankai@icloud.com> Co-authored-by: llinvokerl <38915183+llinvokerl@users.noreply.github.com> Co-authored-by: liusurong.lsr <liusurong.lsr@alibaba-inc.com> Co-authored-by: Vasu Negi <vasu-negi@users.noreply.github.com> Co-authored-by: Hundredwz <1808096180@qq.com> Co-authored-by: Xiyuan Chen <52963600+GareArc@users.noreply.github.com>
		
			
				
	
	
		
			596 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			596 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| from collections.abc import Callable, Generator, Iterable, Sequence
 | |
| from typing import IO, Any, Literal, Optional, Union, cast, overload
 | |
| 
 | |
| from configs import dify_config
 | |
| from core.entities.embedding_type import EmbeddingInputType
 | |
| from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 | |
| from core.entities.provider_entities import ModelLoadBalancingConfiguration
 | |
| from core.errors.error import ProviderTokenNotInitError
 | |
| from core.model_runtime.callbacks.base_callback import Callback
 | |
| from core.model_runtime.entities.llm_entities import LLMResult
 | |
| from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
 | |
| from core.model_runtime.entities.model_entities import ModelType
 | |
| from core.model_runtime.entities.rerank_entities import RerankResult
 | |
| from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 | |
| from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeConnectionError, InvokeRateLimitError
 | |
| from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 | |
| from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
 | |
| from core.model_runtime.model_providers.__base.rerank_model import RerankModel
 | |
| from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
 | |
| from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 | |
| from core.model_runtime.model_providers.__base.tts_model import TTSModel
 | |
| from core.provider_manager import ProviderManager
 | |
| from extensions.ext_redis import redis_client
 | |
| from models.provider import ProviderType
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class ModelInstance:
 | |
|     """
 | |
|     Model instance class
 | |
|     """
 | |
| 
 | |
|     def __init__(self, provider_model_bundle: ProviderModelBundle, model: str) -> None:
 | |
|         self.provider_model_bundle = provider_model_bundle
 | |
|         self.model = model
 | |
|         self.provider = provider_model_bundle.configuration.provider.provider
 | |
|         self.credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model)
 | |
|         self.model_type_instance = self.provider_model_bundle.model_type_instance
 | |
|         self.load_balancing_manager = self._get_load_balancing_manager(
 | |
|             configuration=provider_model_bundle.configuration,
 | |
|             model_type=provider_model_bundle.model_type_instance.model_type,
 | |
|             model=model,
 | |
|             credentials=self.credentials,
 | |
|         )
 | |
| 
 | |
|     @staticmethod
 | |
|     def _fetch_credentials_from_bundle(provider_model_bundle: ProviderModelBundle, model: str) -> dict:
 | |
|         """
 | |
|         Fetch credentials from provider model bundle
 | |
|         :param provider_model_bundle: provider model bundle
 | |
|         :param model: model name
 | |
|         :return:
 | |
|         """
 | |
|         configuration = provider_model_bundle.configuration
 | |
|         model_type = provider_model_bundle.model_type_instance.model_type
 | |
|         credentials = configuration.get_current_credentials(model_type=model_type, model=model)
 | |
| 
 | |
|         if credentials is None:
 | |
|             raise ProviderTokenNotInitError(f"Model {model} credentials is not initialized.")
 | |
| 
 | |
|         return credentials
 | |
| 
 | |
|     @staticmethod
 | |
|     def _get_load_balancing_manager(
 | |
|         configuration: ProviderConfiguration, model_type: ModelType, model: str, credentials: dict
 | |
|     ) -> Optional["LBModelManager"]:
 | |
|         """
 | |
|         Get load balancing model credentials
 | |
|         :param configuration: provider configuration
 | |
|         :param model_type: model type
 | |
|         :param model: model name
 | |
|         :param credentials: model credentials
 | |
|         :return:
 | |
|         """
 | |
|         if configuration.model_settings and configuration.using_provider_type == ProviderType.CUSTOM:
 | |
|             current_model_setting = None
 | |
|             # check if model is disabled by admin
 | |
|             for model_setting in configuration.model_settings:
 | |
|                 if model_setting.model_type == model_type and model_setting.model == model:
 | |
|                     current_model_setting = model_setting
 | |
|                     break
 | |
| 
 | |
|             # check if load balancing is enabled
 | |
|             if current_model_setting and current_model_setting.load_balancing_configs:
 | |
|                 # use load balancing proxy to choose credentials
 | |
|                 lb_model_manager = LBModelManager(
 | |
|                     tenant_id=configuration.tenant_id,
 | |
|                     provider=configuration.provider.provider,
 | |
|                     model_type=model_type,
 | |
|                     model=model,
 | |
|                     load_balancing_configs=current_model_setting.load_balancing_configs,
 | |
|                     managed_credentials=credentials if configuration.custom_configuration.provider else None,
 | |
|                 )
 | |
| 
 | |
|                 return lb_model_manager
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     @overload
 | |
|     def invoke_llm(
 | |
|         self,
 | |
|         prompt_messages: list[PromptMessage],
 | |
|         model_parameters: Optional[dict] = None,
 | |
|         tools: Sequence[PromptMessageTool] | None = None,
 | |
|         stop: Optional[list[str]] = None,
 | |
|         stream: Literal[True] = True,
 | |
|         user: Optional[str] = None,
 | |
|         callbacks: Optional[list[Callback]] = None,
 | |
|     ) -> Generator: ...
 | |
| 
 | |
|     @overload
 | |
|     def invoke_llm(
 | |
|         self,
 | |
|         prompt_messages: list[PromptMessage],
 | |
|         model_parameters: Optional[dict] = None,
 | |
|         tools: Sequence[PromptMessageTool] | None = None,
 | |
|         stop: Optional[list[str]] = None,
 | |
|         stream: Literal[False] = False,
 | |
|         user: Optional[str] = None,
 | |
|         callbacks: Optional[list[Callback]] = None,
 | |
|     ) -> LLMResult: ...
 | |
| 
 | |
|     @overload
 | |
|     def invoke_llm(
 | |
|         self,
 | |
|         prompt_messages: list[PromptMessage],
 | |
|         model_parameters: Optional[dict] = None,
 | |
|         tools: Sequence[PromptMessageTool] | None = None,
 | |
|         stop: Optional[list[str]] = None,
 | |
|         stream: bool = True,
 | |
|         user: Optional[str] = None,
 | |
|         callbacks: Optional[list[Callback]] = None,
 | |
|     ) -> Union[LLMResult, Generator]: ...
 | |
| 
 | |
|     def invoke_llm(
 | |
|         self,
 | |
|         prompt_messages: Sequence[PromptMessage],
 | |
|         model_parameters: Optional[dict] = None,
 | |
|         tools: Sequence[PromptMessageTool] | None = None,
 | |
|         stop: Optional[Sequence[str]] = None,
 | |
|         stream: bool = True,
 | |
|         user: Optional[str] = None,
 | |
|         callbacks: Optional[list[Callback]] = None,
 | |
|     ) -> Union[LLMResult, Generator]:
 | |
|         """
 | |
|         Invoke large language model
 | |
| 
 | |
|         :param prompt_messages: prompt messages
 | |
|         :param model_parameters: model parameters
 | |
|         :param tools: tools for tool calling
 | |
|         :param stop: stop words
 | |
|         :param stream: is stream response
 | |
|         :param user: unique user id
 | |
|         :param callbacks: callbacks
 | |
|         :return: full response or stream response chunk generator result
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, LargeLanguageModel):
 | |
|             raise Exception("Model type instance is not LargeLanguageModel")
 | |
| 
 | |
|         self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             Union[LLMResult, Generator],
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 prompt_messages=prompt_messages,
 | |
|                 model_parameters=model_parameters,
 | |
|                 tools=tools,
 | |
|                 stop=stop,
 | |
|                 stream=stream,
 | |
|                 user=user,
 | |
|                 callbacks=callbacks,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def get_llm_num_tokens(
 | |
|         self, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None
 | |
|     ) -> int:
 | |
|         """
 | |
|         Get number of tokens for llm
 | |
| 
 | |
|         :param prompt_messages: prompt messages
 | |
|         :param tools: tools for tool calling
 | |
|         :return:
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, LargeLanguageModel):
 | |
|             raise Exception("Model type instance is not LargeLanguageModel")
 | |
| 
 | |
|         self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             int,
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.get_num_tokens,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 prompt_messages=prompt_messages,
 | |
|                 tools=tools,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def invoke_text_embedding(
 | |
|         self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
 | |
|     ) -> TextEmbeddingResult:
 | |
|         """
 | |
|         Invoke large language model
 | |
| 
 | |
|         :param texts: texts to embed
 | |
|         :param user: unique user id
 | |
|         :param input_type: input type
 | |
|         :return: embeddings result
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, TextEmbeddingModel):
 | |
|             raise Exception("Model type instance is not TextEmbeddingModel")
 | |
| 
 | |
|         self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             TextEmbeddingResult,
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 texts=texts,
 | |
|                 user=user,
 | |
|                 input_type=input_type,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def get_text_embedding_num_tokens(self, texts: list[str]) -> list[int]:
 | |
|         """
 | |
|         Get number of tokens for text embedding
 | |
| 
 | |
|         :param texts: texts to embed
 | |
|         :return:
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, TextEmbeddingModel):
 | |
|             raise Exception("Model type instance is not TextEmbeddingModel")
 | |
| 
 | |
|         self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             list[int],
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.get_num_tokens,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 texts=texts,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def invoke_rerank(
 | |
|         self,
 | |
|         query: str,
 | |
|         docs: list[str],
 | |
|         score_threshold: Optional[float] = None,
 | |
|         top_n: Optional[int] = None,
 | |
|         user: Optional[str] = None,
 | |
|     ) -> RerankResult:
 | |
|         """
 | |
|         Invoke rerank model
 | |
| 
 | |
|         :param query: search query
 | |
|         :param docs: docs for reranking
 | |
|         :param score_threshold: score threshold
 | |
|         :param top_n: top n
 | |
|         :param user: unique user id
 | |
|         :return: rerank result
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, RerankModel):
 | |
|             raise Exception("Model type instance is not RerankModel")
 | |
| 
 | |
|         self.model_type_instance = cast(RerankModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             RerankResult,
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 query=query,
 | |
|                 docs=docs,
 | |
|                 score_threshold=score_threshold,
 | |
|                 top_n=top_n,
 | |
|                 user=user,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def invoke_moderation(self, text: str, user: Optional[str] = None) -> bool:
 | |
|         """
 | |
|         Invoke moderation model
 | |
| 
 | |
|         :param text: text to moderate
 | |
|         :param user: unique user id
 | |
|         :return: false if text is safe, true otherwise
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, ModerationModel):
 | |
|             raise Exception("Model type instance is not ModerationModel")
 | |
| 
 | |
|         self.model_type_instance = cast(ModerationModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             bool,
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 text=text,
 | |
|                 user=user,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None) -> str:
 | |
|         """
 | |
|         Invoke large language model
 | |
| 
 | |
|         :param file: audio file
 | |
|         :param user: unique user id
 | |
|         :return: text for given audio file
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, Speech2TextModel):
 | |
|             raise Exception("Model type instance is not Speech2TextModel")
 | |
| 
 | |
|         self.model_type_instance = cast(Speech2TextModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             str,
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 file=file,
 | |
|                 user=user,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def invoke_tts(self, content_text: str, tenant_id: str, voice: str, user: Optional[str] = None) -> Iterable[bytes]:
 | |
|         """
 | |
|         Invoke large language tts model
 | |
| 
 | |
|         :param content_text: text content to be translated
 | |
|         :param tenant_id: user tenant id
 | |
|         :param voice: model timbre
 | |
|         :param user: unique user id
 | |
|         :return: text for given audio file
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, TTSModel):
 | |
|             raise Exception("Model type instance is not TTSModel")
 | |
| 
 | |
|         self.model_type_instance = cast(TTSModel, self.model_type_instance)
 | |
|         return cast(
 | |
|             Iterable[bytes],
 | |
|             self._round_robin_invoke(
 | |
|                 function=self.model_type_instance.invoke,
 | |
|                 model=self.model,
 | |
|                 credentials=self.credentials,
 | |
|                 content_text=content_text,
 | |
|                 user=user,
 | |
|                 tenant_id=tenant_id,
 | |
|                 voice=voice,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     def _round_robin_invoke(self, function: Callable[..., Any], *args, **kwargs) -> Any:
 | |
|         """
 | |
|         Round-robin invoke
 | |
|         :param function: function to invoke
 | |
|         :param args: function args
 | |
|         :param kwargs: function kwargs
 | |
|         :return:
 | |
|         """
 | |
|         if not self.load_balancing_manager:
 | |
|             return function(*args, **kwargs)
 | |
| 
 | |
|         last_exception: Union[InvokeRateLimitError, InvokeAuthorizationError, InvokeConnectionError, None] = None
 | |
|         while True:
 | |
|             lb_config = self.load_balancing_manager.fetch_next()
 | |
|             if not lb_config:
 | |
|                 if not last_exception:
 | |
|                     raise ProviderTokenNotInitError("Model credentials is not initialized.")
 | |
|                 else:
 | |
|                     raise last_exception
 | |
| 
 | |
|             try:
 | |
|                 if "credentials" in kwargs:
 | |
|                     del kwargs["credentials"]
 | |
|                 return function(*args, **kwargs, credentials=lb_config.credentials)
 | |
|             except InvokeRateLimitError as e:
 | |
|                 # expire in 60 seconds
 | |
|                 self.load_balancing_manager.cooldown(lb_config, expire=60)
 | |
|                 last_exception = e
 | |
|                 continue
 | |
|             except (InvokeAuthorizationError, InvokeConnectionError) as e:
 | |
|                 # expire in 10 seconds
 | |
|                 self.load_balancing_manager.cooldown(lb_config, expire=10)
 | |
|                 last_exception = e
 | |
|                 continue
 | |
|             except Exception as e:
 | |
|                 raise e
 | |
| 
 | |
|     def get_tts_voices(self, language: Optional[str] = None) -> list:
 | |
|         """
 | |
|         Invoke large language tts model voices
 | |
| 
 | |
|         :param language: tts language
 | |
|         :return: tts model voices
 | |
|         """
 | |
|         if not isinstance(self.model_type_instance, TTSModel):
 | |
|             raise Exception("Model type instance is not TTSModel")
 | |
| 
 | |
|         self.model_type_instance = cast(TTSModel, self.model_type_instance)
 | |
|         return self.model_type_instance.get_tts_model_voices(
 | |
|             model=self.model, credentials=self.credentials, language=language
 | |
|         )
 | |
| 
 | |
| 
 | |
| class ModelManager:
 | |
|     def __init__(self) -> None:
 | |
|         self._provider_manager = ProviderManager()
 | |
| 
 | |
|     def get_model_instance(self, tenant_id: str, provider: str, model_type: ModelType, model: str) -> ModelInstance:
 | |
|         """
 | |
|         Get model instance
 | |
|         :param tenant_id: tenant id
 | |
|         :param provider: provider name
 | |
|         :param model_type: model type
 | |
|         :param model: model name
 | |
|         :return:
 | |
|         """
 | |
|         if not provider:
 | |
|             return self.get_default_model_instance(tenant_id, model_type)
 | |
| 
 | |
|         provider_model_bundle = self._provider_manager.get_provider_model_bundle(
 | |
|             tenant_id=tenant_id, provider=provider, model_type=model_type
 | |
|         )
 | |
| 
 | |
|         return ModelInstance(provider_model_bundle, model)
 | |
| 
 | |
|     def get_default_provider_model_name(self, tenant_id: str, model_type: ModelType) -> tuple[str | None, str | None]:
 | |
|         """
 | |
|         Return first provider and the first model in the provider
 | |
|         :param tenant_id: tenant id
 | |
|         :param model_type: model type
 | |
|         :return: provider name, model name
 | |
|         """
 | |
|         return self._provider_manager.get_first_provider_first_model(tenant_id, model_type)
 | |
| 
 | |
|     def get_default_model_instance(self, tenant_id: str, model_type: ModelType) -> ModelInstance:
 | |
|         """
 | |
|         Get default model instance
 | |
|         :param tenant_id: tenant id
 | |
|         :param model_type: model type
 | |
|         :return:
 | |
|         """
 | |
|         default_model_entity = self._provider_manager.get_default_model(tenant_id=tenant_id, model_type=model_type)
 | |
| 
 | |
|         if not default_model_entity:
 | |
|             raise ProviderTokenNotInitError(f"Default model not found for {model_type}")
 | |
| 
 | |
|         return self.get_model_instance(
 | |
|             tenant_id=tenant_id,
 | |
|             provider=default_model_entity.provider.provider,
 | |
|             model_type=model_type,
 | |
|             model=default_model_entity.model,
 | |
|         )
 | |
| 
 | |
| 
 | |
| class LBModelManager:
 | |
|     def __init__(
 | |
|         self,
 | |
|         tenant_id: str,
 | |
|         provider: str,
 | |
|         model_type: ModelType,
 | |
|         model: str,
 | |
|         load_balancing_configs: list[ModelLoadBalancingConfiguration],
 | |
|         managed_credentials: Optional[dict] = None,
 | |
|     ) -> None:
 | |
|         """
 | |
|         Load balancing model manager
 | |
|         :param tenant_id: tenant_id
 | |
|         :param provider: provider
 | |
|         :param model_type: model_type
 | |
|         :param model: model name
 | |
|         :param load_balancing_configs: all load balancing configurations
 | |
|         :param managed_credentials: credentials if load balancing configuration name is __inherit__
 | |
|         """
 | |
|         self._tenant_id = tenant_id
 | |
|         self._provider = provider
 | |
|         self._model_type = model_type
 | |
|         self._model = model
 | |
|         self._load_balancing_configs = load_balancing_configs
 | |
| 
 | |
|         for load_balancing_config in self._load_balancing_configs[:]:  # Iterate over a shallow copy of the list
 | |
|             if load_balancing_config.name == "__inherit__":
 | |
|                 if not managed_credentials:
 | |
|                     # remove __inherit__ if managed credentials is not provided
 | |
|                     self._load_balancing_configs.remove(load_balancing_config)
 | |
|                 else:
 | |
|                     load_balancing_config.credentials = managed_credentials
 | |
| 
 | |
|     def fetch_next(self) -> Optional[ModelLoadBalancingConfiguration]:
 | |
|         """
 | |
|         Get next model load balancing config
 | |
|         Strategy: Round Robin
 | |
|         :return:
 | |
|         """
 | |
|         cache_key = "model_lb_index:{}:{}:{}:{}".format(
 | |
|             self._tenant_id, self._provider, self._model_type.value, self._model
 | |
|         )
 | |
| 
 | |
|         cooldown_load_balancing_configs = []
 | |
|         max_index = len(self._load_balancing_configs)
 | |
| 
 | |
|         while True:
 | |
|             current_index = redis_client.incr(cache_key)
 | |
|             current_index = cast(int, current_index)
 | |
|             if current_index >= 10000000:
 | |
|                 current_index = 1
 | |
|                 redis_client.set(cache_key, current_index)
 | |
| 
 | |
|             redis_client.expire(cache_key, 3600)
 | |
|             if current_index > max_index:
 | |
|                 current_index = current_index % max_index
 | |
| 
 | |
|             real_index = current_index - 1
 | |
|             if real_index > max_index:
 | |
|                 real_index = 0
 | |
| 
 | |
|             config: ModelLoadBalancingConfiguration = self._load_balancing_configs[real_index]
 | |
| 
 | |
|             if self.in_cooldown(config):
 | |
|                 cooldown_load_balancing_configs.append(config)
 | |
|                 if len(cooldown_load_balancing_configs) >= len(self._load_balancing_configs):
 | |
|                     # all configs are in cooldown
 | |
|                     return None
 | |
| 
 | |
|                 continue
 | |
| 
 | |
|             if dify_config.DEBUG:
 | |
|                 logger.info(
 | |
|                     f"Model LB\nid: {config.id}\nname:{config.name}\n"
 | |
|                     f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n"
 | |
|                     f"model_type: {self._model_type.value}\nmodel: {self._model}"
 | |
|                 )
 | |
| 
 | |
|             return config
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     def cooldown(self, config: ModelLoadBalancingConfiguration, expire: int = 60) -> None:
 | |
|         """
 | |
|         Cooldown model load balancing config
 | |
|         :param config: model load balancing config
 | |
|         :param expire: cooldown time
 | |
|         :return:
 | |
|         """
 | |
|         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
 | |
|             self._tenant_id, self._provider, self._model_type.value, self._model, config.id
 | |
|         )
 | |
| 
 | |
|         redis_client.setex(cooldown_cache_key, expire, "true")
 | |
| 
 | |
|     def in_cooldown(self, config: ModelLoadBalancingConfiguration) -> bool:
 | |
|         """
 | |
|         Check if model load balancing config is in cooldown
 | |
|         :param config: model load balancing config
 | |
|         :return:
 | |
|         """
 | |
|         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
 | |
|             self._tenant_id, self._provider, self._model_type.value, self._model, config.id
 | |
|         )
 | |
| 
 | |
|         res: bool = redis_client.exists(cooldown_cache_key)
 | |
|         return res
 | |
| 
 | |
|     @staticmethod
 | |
|     def get_config_in_cooldown_and_ttl(
 | |
|         tenant_id: str, provider: str, model_type: ModelType, model: str, config_id: str
 | |
|     ) -> tuple[bool, int]:
 | |
|         """
 | |
|         Get model load balancing config is in cooldown and ttl
 | |
|         :param tenant_id: workspace id
 | |
|         :param provider: provider name
 | |
|         :param model_type: model type
 | |
|         :param model: model name
 | |
|         :param config_id: model load balancing config id
 | |
|         :return:
 | |
|         """
 | |
|         cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
 | |
|             tenant_id, provider, model_type.value, model, config_id
 | |
|         )
 | |
| 
 | |
|         ttl = redis_client.ttl(cooldown_cache_key)
 | |
|         if ttl == -2:
 | |
|             return False, 0
 | |
| 
 | |
|         ttl = cast(int, ttl)
 | |
|         return True, ttl
 |