| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2024-04-18 20:24:05 +08:00
										 |  |  | from collections.abc import Generator | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | import pytest | 
					
						
							| 
									
										
										
										
											2024-04-18 20:24:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta | 
					
						
							| 
									
										
										
										
											2024-04-18 20:24:05 +08:00
										 |  |  | from core.model_runtime.entities.message_entities import ( | 
					
						
							|  |  |  |     AssistantPromptMessage, | 
					
						
							|  |  |  |     PromptMessageTool, | 
					
						
							|  |  |  |     SystemPromptMessage, | 
					
						
							|  |  |  |     UserPromptMessage, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | from core.model_runtime.errors.validate import CredentialsValidateFailedError | 
					
						
							|  |  |  | from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """FOR MOCK FIXTURES, DO NOT REMOVE""" | 
					
						
							|  |  |  | from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock | 
					
						
							|  |  |  | from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-12 12:34:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(CredentialsValidateFailedError): | 
					
						
							|  |  |  |         model.validate_credentials( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |                 "model_uid": "www " + os.environ.get("XINFERENCE_CHAT_MODEL_UID"), | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(CredentialsValidateFailedError): | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model.validate_credentials(model="aaaaa", credentials={"server_url": "", "model_uid": ""}) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     model.validate_credentials( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"), | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response = model.invoke( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         prompt_messages=[ | 
					
						
							|  |  |  |             SystemPromptMessage( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 content="You are a helpful AI assistant.", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             UserPromptMessage(content="Hello World!"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ], | 
					
						
							|  |  |  |         model_parameters={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "temperature": 0.7, | 
					
						
							|  |  |  |             "top_p": 1.0, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stop=["you"], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         user="abc-123", | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stream=False, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(response, LLMResult) | 
					
						
							|  |  |  |     assert len(response.message.content) > 0 | 
					
						
							|  |  |  |     assert response.usage.total_tokens > 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response = model.invoke( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         prompt_messages=[ | 
					
						
							|  |  |  |             SystemPromptMessage( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 content="You are a helpful AI assistant.", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             UserPromptMessage(content="Hello World!"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ], | 
					
						
							|  |  |  |         model_parameters={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "temperature": 0.7, | 
					
						
							|  |  |  |             "top_p": 1.0, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stop=["you"], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         stream=True, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         user="abc-123", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(response, Generator) | 
					
						
							|  |  |  |     for chunk in response: | 
					
						
							|  |  |  |         assert isinstance(chunk, LLMResultChunk) | 
					
						
							|  |  |  |         assert isinstance(chunk.delta, LLMResultChunkDelta) | 
					
						
							|  |  |  |         assert isinstance(chunk.delta.message, AssistantPromptMessage) | 
					
						
							|  |  |  |         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2024-09-08 12:14:11 +07:00
										 |  |  |     Function calling of xinference does not support stream mode currently | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | """
 | 
					
						
							|  |  |  | # def test_invoke_stream_chat_model_with_functions(): | 
					
						
							|  |  |  | #     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     response = model.invoke( | 
					
						
							|  |  |  | #         model='ChatGLM3-6b', | 
					
						
							|  |  |  | #         credentials={ | 
					
						
							|  |  |  | #             'server_url': os.environ.get('XINFERENCE_SERVER_URL'), | 
					
						
							|  |  |  | #             'model_type': 'text-generation', | 
					
						
							|  |  |  | #             'model_name': 'ChatGLM3', | 
					
						
							|  |  |  | #             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID') | 
					
						
							|  |  |  | #         }, | 
					
						
							|  |  |  | #         prompt_messages=[ | 
					
						
							|  |  |  | #             SystemPromptMessage( | 
					
						
							|  |  |  | #                 content='你是一个天气机器人,可以通过调用函数来获取天气信息', | 
					
						
							|  |  |  | #             ), | 
					
						
							|  |  |  | #             UserPromptMessage( | 
					
						
							|  |  |  | #                 content='波士顿天气如何?' | 
					
						
							|  |  |  | #             ) | 
					
						
							|  |  |  | #         ], | 
					
						
							|  |  |  | #         model_parameters={ | 
					
						
							|  |  |  | #             'temperature': 0, | 
					
						
							|  |  |  | #             'top_p': 1.0, | 
					
						
							|  |  |  | #         }, | 
					
						
							|  |  |  | #         stop=['you'], | 
					
						
							|  |  |  | #         user='abc-123', | 
					
						
							|  |  |  | #         stream=True, | 
					
						
							|  |  |  | #         tools=[ | 
					
						
							|  |  |  | #             PromptMessageTool( | 
					
						
							|  |  |  | #                 name='get_current_weather', | 
					
						
							|  |  |  | #                 description='Get the current weather in a given location', | 
					
						
							|  |  |  | #                 parameters={ | 
					
						
							|  |  |  | #                     "type": "object", | 
					
						
							|  |  |  | #                     "properties": { | 
					
						
							|  |  |  | #                         "location": { | 
					
						
							|  |  |  | #                         "type": "string", | 
					
						
							|  |  |  | #                             "description": "The city and state e.g. San Francisco, CA" | 
					
						
							|  |  |  | #                         }, | 
					
						
							|  |  |  | #                         "unit": { | 
					
						
							|  |  |  | #                             "type": "string", | 
					
						
							|  |  |  | #                             "enum": ["celsius", "fahrenheit"] | 
					
						
							|  |  |  | #                         } | 
					
						
							|  |  |  | #                     }, | 
					
						
							|  |  |  | #                     "required": [ | 
					
						
							|  |  |  | #                         "location" | 
					
						
							|  |  |  | #                     ] | 
					
						
							|  |  |  | #                 } | 
					
						
							|  |  |  | #             ) | 
					
						
							|  |  |  | #         ] | 
					
						
							|  |  |  | #     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     assert isinstance(response, Generator) | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | #     call: LLMResultChunk = None | 
					
						
							|  |  |  | #     chunks = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     for chunk in response: | 
					
						
							|  |  |  | #         chunks.append(chunk) | 
					
						
							|  |  |  | #         assert isinstance(chunk, LLMResultChunk) | 
					
						
							|  |  |  | #         assert isinstance(chunk.delta, LLMResultChunkDelta) | 
					
						
							|  |  |  | #         assert isinstance(chunk.delta.message, AssistantPromptMessage) | 
					
						
							|  |  |  | #         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #         if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0: | 
					
						
							|  |  |  | #             call = chunk | 
					
						
							|  |  |  | #             break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     assert call is not None | 
					
						
							|  |  |  | #     assert call.delta.message.tool_calls[0].function.name == 'get_current_weather' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # def test_invoke_chat_model_with_functions(): | 
					
						
							|  |  |  | #     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     response = model.invoke( | 
					
						
							|  |  |  | #         model='ChatGLM3-6b', | 
					
						
							|  |  |  | #         credentials={ | 
					
						
							|  |  |  | #             'server_url': os.environ.get('XINFERENCE_SERVER_URL'), | 
					
						
							|  |  |  | #             'model_type': 'text-generation', | 
					
						
							|  |  |  | #             'model_name': 'ChatGLM3', | 
					
						
							|  |  |  | #             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID') | 
					
						
							|  |  |  | #         }, | 
					
						
							|  |  |  | #         prompt_messages=[ | 
					
						
							|  |  |  | #             UserPromptMessage( | 
					
						
							|  |  |  | #                 content='What is the weather like in San Francisco?' | 
					
						
							|  |  |  | #             ) | 
					
						
							|  |  |  | #         ], | 
					
						
							|  |  |  | #         model_parameters={ | 
					
						
							|  |  |  | #             'temperature': 0.7, | 
					
						
							|  |  |  | #             'top_p': 1.0, | 
					
						
							|  |  |  | #         }, | 
					
						
							|  |  |  | #         stop=['you'], | 
					
						
							|  |  |  | #         user='abc-123', | 
					
						
							|  |  |  | #         stream=False, | 
					
						
							|  |  |  | #         tools=[ | 
					
						
							|  |  |  | #             PromptMessageTool( | 
					
						
							|  |  |  | #                 name='get_current_weather', | 
					
						
							|  |  |  | #                 description='Get the current weather in a given location', | 
					
						
							|  |  |  | #                 parameters={ | 
					
						
							|  |  |  | #                     "type": "object", | 
					
						
							|  |  |  | #                     "properties": { | 
					
						
							|  |  |  | #                         "location": { | 
					
						
							|  |  |  | #                         "type": "string", | 
					
						
							|  |  |  | #                             "description": "The city and state e.g. San Francisco, CA" | 
					
						
							|  |  |  | #                         }, | 
					
						
							|  |  |  | #                         "unit": { | 
					
						
							|  |  |  | #                             "type": "string", | 
					
						
							|  |  |  | #                             "enum": [ | 
					
						
							|  |  |  | #                                 "c", | 
					
						
							|  |  |  | #                                 "f" | 
					
						
							|  |  |  | #                             ] | 
					
						
							|  |  |  | #                         } | 
					
						
							|  |  |  | #                     }, | 
					
						
							|  |  |  | #                     "required": [ | 
					
						
							|  |  |  | #                         "location" | 
					
						
							|  |  |  | #                     ] | 
					
						
							|  |  |  | #                 } | 
					
						
							|  |  |  | #             ) | 
					
						
							|  |  |  | #         ] | 
					
						
							|  |  |  | #     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #     assert isinstance(response, LLMResult) | 
					
						
							|  |  |  | #     assert len(response.message.content) > 0 | 
					
						
							|  |  |  | #     assert response.usage.total_tokens > 0 | 
					
						
							|  |  |  | #     assert response.message.tool_calls[0].function.name == 'get_current_weather' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(CredentialsValidateFailedError): | 
					
						
							|  |  |  |         model.validate_credentials( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             model="alapaca", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |                 "model_uid": "www " + os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with pytest.raises(CredentialsValidateFailedError): | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model.validate_credentials(model="alapaca", credentials={"server_url": "", "model_uid": ""}) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     model.validate_credentials( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="alapaca", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response = model.invoke( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="alapaca", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         prompt_messages=[UserPromptMessage(content="the United States is")], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         model_parameters={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "temperature": 0.7, | 
					
						
							|  |  |  |             "top_p": 1.0, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stop=["you"], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         user="abc-123", | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stream=False, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(response, LLMResult) | 
					
						
							|  |  |  |     assert len(response.message.content) > 0 | 
					
						
							|  |  |  |     assert response.usage.total_tokens > 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-12 18:09:16 +08:00
										 |  |  | @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True) | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response = model.invoke( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="alapaca", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         prompt_messages=[UserPromptMessage(content="the United States is")], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         model_parameters={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "temperature": 0.7, | 
					
						
							|  |  |  |             "top_p": 1.0, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         stop=["you"], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         stream=True, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         user="abc-123", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(response, Generator) | 
					
						
							|  |  |  |     for chunk in response: | 
					
						
							|  |  |  |         assert isinstance(chunk, LLMResultChunk) | 
					
						
							|  |  |  |         assert isinstance(chunk.delta, LLMResultChunkDelta) | 
					
						
							|  |  |  |         assert isinstance(chunk.delta.message, AssistantPromptMessage) | 
					
						
							|  |  |  |         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  | def test_get_num_tokens(): | 
					
						
							|  |  |  |     model = XinferenceAILargeLanguageModel() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     num_tokens = model.get_num_tokens( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         prompt_messages=[ | 
					
						
							|  |  |  |             SystemPromptMessage( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 content="You are a helpful AI assistant.", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             UserPromptMessage(content="Hello World!"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ], | 
					
						
							|  |  |  |         tools=[ | 
					
						
							|  |  |  |             PromptMessageTool( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 name="get_current_weather", | 
					
						
							|  |  |  |                 description="Get the current weather in a given location", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |                 parameters={ | 
					
						
							|  |  |  |                     "type": "object", | 
					
						
							|  |  |  |                     "properties": { | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                         "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"}, | 
					
						
							|  |  |  |                         "unit": {"type": "string", "enum": ["c", "f"]}, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |                     }, | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                     "required": ["location"], | 
					
						
							|  |  |  |                 }, | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(num_tokens, int) | 
					
						
							|  |  |  |     assert num_tokens == 77 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     num_tokens = model.get_num_tokens( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |         model="ChatGLM3", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         credentials={ | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             "server_url": os.environ.get("XINFERENCE_SERVER_URL"), | 
					
						
							|  |  |  |             "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         }, | 
					
						
							|  |  |  |         prompt_messages=[ | 
					
						
							|  |  |  |             SystemPromptMessage( | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |                 content="You are a helpful AI assistant.", | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |             UserPromptMessage(content="Hello World!"), | 
					
						
							| 
									
										
										
										
											2024-01-02 23:42:00 +08:00
										 |  |  |         ], | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert isinstance(num_tokens, int) | 
					
						
							| 
									
										
										
										
											2024-08-23 23:52:25 +08:00
										 |  |  |     assert num_tokens == 21 |