mirror of
				https://github.com/langgenius/dify.git
				synced 2025-10-25 07:58:40 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			392 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			392 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| from typing import Generator
 | |
| 
 | |
| import pytest
 | |
| from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
 | |
| from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessageTool,
 | |
|                                                           SystemPromptMessage, TextPromptMessageContent,
 | |
|                                                           UserPromptMessage)
 | |
| from core.model_runtime.entities.model_entities import AIModelEntity
 | |
| from core.model_runtime.errors.validate import CredentialsValidateFailedError
 | |
| from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
 | |
| 
 | |
| """FOR MOCK FIXTURES, DO NOT REMOVE"""
 | |
| from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 | |
| from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
 | |
| def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     with pytest.raises(CredentialsValidateFailedError):
 | |
|         model.validate_credentials(
 | |
|             model='ChatGLM3',
 | |
|             credentials={
 | |
|                 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|                 'model_uid': 'www ' + os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     with pytest.raises(CredentialsValidateFailedError):
 | |
|         model.validate_credentials(
 | |
|             model='aaaaa',
 | |
|             credentials={
 | |
|                 'server_url': '',
 | |
|                 'model_uid': ''
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     model.validate_credentials(
 | |
|         model='ChatGLM3',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
|         }
 | |
|     )
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
 | |
| def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     response = model.invoke(
 | |
|         model='ChatGLM3',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             SystemPromptMessage(
 | |
|                 content='You are a helpful AI assistant.',
 | |
|             ),
 | |
|             UserPromptMessage(
 | |
|                 content='Hello World!'
 | |
|             )
 | |
|         ],
 | |
|         model_parameters={
 | |
|             'temperature': 0.7,
 | |
|             'top_p': 1.0,
 | |
|         },
 | |
|         stop=['you'],
 | |
|         user="abc-123",
 | |
|         stream=False
 | |
|     )
 | |
| 
 | |
|     assert isinstance(response, LLMResult)
 | |
|     assert len(response.message.content) > 0
 | |
|     assert response.usage.total_tokens > 0
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
 | |
| def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     response = model.invoke(
 | |
|         model='ChatGLM3',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             SystemPromptMessage(
 | |
|                 content='You are a helpful AI assistant.',
 | |
|             ),
 | |
|             UserPromptMessage(
 | |
|                 content='Hello World!'
 | |
|             )
 | |
|         ],
 | |
|         model_parameters={
 | |
|             'temperature': 0.7,
 | |
|             'top_p': 1.0,
 | |
|         },
 | |
|         stop=['you'],
 | |
|         stream=True,
 | |
|         user="abc-123"
 | |
|     )
 | |
| 
 | |
|     assert isinstance(response, Generator)
 | |
|     for chunk in response:
 | |
|         assert isinstance(chunk, LLMResultChunk)
 | |
|         assert isinstance(chunk.delta, LLMResultChunkDelta)
 | |
|         assert isinstance(chunk.delta.message, AssistantPromptMessage)
 | |
|         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 | |
| """
 | |
|     Funtion calling of xinference does not support stream mode currently
 | |
| """
 | |
| # def test_invoke_stream_chat_model_with_functions():
 | |
| #     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
| #     response = model.invoke(
 | |
| #         model='ChatGLM3-6b',
 | |
| #         credentials={
 | |
| #             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
| #             'model_type': 'text-generation',
 | |
| #             'model_name': 'ChatGLM3',
 | |
| #             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
| #         },
 | |
| #         prompt_messages=[
 | |
| #             SystemPromptMessage(
 | |
| #                 content='你是一个天气机器人,可以通过调用函数来获取天气信息',
 | |
| #             ),
 | |
| #             UserPromptMessage(
 | |
| #                 content='波士顿天气如何?'
 | |
| #             )
 | |
| #         ],
 | |
| #         model_parameters={
 | |
| #             'temperature': 0,
 | |
| #             'top_p': 1.0,
 | |
| #         },
 | |
| #         stop=['you'],
 | |
| #         user='abc-123',
 | |
| #         stream=True,
 | |
| #         tools=[
 | |
| #             PromptMessageTool(
 | |
| #                 name='get_current_weather',
 | |
| #                 description='Get the current weather in a given location',
 | |
| #                 parameters={
 | |
| #                     "type": "object",
 | |
| #                     "properties": {
 | |
| #                         "location": {
 | |
| #                         "type": "string",
 | |
| #                             "description": "The city and state e.g. San Francisco, CA"
 | |
| #                         },
 | |
| #                         "unit": {
 | |
| #                             "type": "string",
 | |
| #                             "enum": ["celsius", "fahrenheit"]
 | |
| #                         }
 | |
| #                     },
 | |
| #                     "required": [
 | |
| #                         "location"
 | |
| #                     ]
 | |
| #                 }
 | |
| #             )
 | |
| #         ]
 | |
| #     )
 | |
| 
 | |
| #     assert isinstance(response, Generator)
 | |
|     
 | |
| #     call: LLMResultChunk = None
 | |
| #     chunks = []
 | |
| 
 | |
| #     for chunk in response:
 | |
| #         chunks.append(chunk)
 | |
| #         assert isinstance(chunk, LLMResultChunk)
 | |
| #         assert isinstance(chunk.delta, LLMResultChunkDelta)
 | |
| #         assert isinstance(chunk.delta.message, AssistantPromptMessage)
 | |
| #         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 | |
| 
 | |
| #         if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
 | |
| #             call = chunk
 | |
| #             break
 | |
| 
 | |
| #     assert call is not None
 | |
| #     assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
 | |
| 
 | |
| # def test_invoke_chat_model_with_functions():
 | |
| #     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
| #     response = model.invoke(
 | |
| #         model='ChatGLM3-6b',
 | |
| #         credentials={
 | |
| #             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
| #             'model_type': 'text-generation',
 | |
| #             'model_name': 'ChatGLM3',
 | |
| #             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
 | |
| #         },
 | |
| #         prompt_messages=[
 | |
| #             UserPromptMessage(
 | |
| #                 content='What is the weather like in San Francisco?'
 | |
| #             )
 | |
| #         ],
 | |
| #         model_parameters={
 | |
| #             'temperature': 0.7,
 | |
| #             'top_p': 1.0,
 | |
| #         },
 | |
| #         stop=['you'],
 | |
| #         user='abc-123',
 | |
| #         stream=False,
 | |
| #         tools=[
 | |
| #             PromptMessageTool(
 | |
| #                 name='get_current_weather',
 | |
| #                 description='Get the current weather in a given location',
 | |
| #                 parameters={
 | |
| #                     "type": "object",
 | |
| #                     "properties": {
 | |
| #                         "location": {
 | |
| #                         "type": "string",
 | |
| #                             "description": "The city and state e.g. San Francisco, CA"
 | |
| #                         },
 | |
| #                         "unit": {
 | |
| #                             "type": "string",
 | |
| #                             "enum": [
 | |
| #                                 "c",
 | |
| #                                 "f"
 | |
| #                             ]
 | |
| #                         }
 | |
| #                     },
 | |
| #                     "required": [
 | |
| #                         "location"
 | |
| #                     ]
 | |
| #                 }
 | |
| #             )
 | |
| #         ]
 | |
| #     )
 | |
| 
 | |
| #     assert isinstance(response, LLMResult)
 | |
| #     assert len(response.message.content) > 0
 | |
| #     assert response.usage.total_tokens > 0
 | |
| #     assert response.message.tool_calls[0].function.name == 'get_current_weather'
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
 | |
| def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     with pytest.raises(CredentialsValidateFailedError):
 | |
|         model.validate_credentials(
 | |
|             model='alapaca',
 | |
|             credentials={
 | |
|                 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|                 'model_uid': 'www ' + os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     with pytest.raises(CredentialsValidateFailedError):
 | |
|         model.validate_credentials(
 | |
|             model='alapaca',
 | |
|             credentials={
 | |
|                 'server_url': '',
 | |
|                 'model_uid': ''
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     model.validate_credentials(
 | |
|         model='alapaca',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|         }
 | |
|     )
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
 | |
| def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     response = model.invoke(
 | |
|         model='alapaca',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             UserPromptMessage(
 | |
|                 content='the United States is'
 | |
|             )
 | |
|         ],
 | |
|         model_parameters={
 | |
|             'temperature': 0.7,
 | |
|             'top_p': 1.0,
 | |
|         },
 | |
|         stop=['you'],
 | |
|         user="abc-123",
 | |
|         stream=False
 | |
|     )
 | |
| 
 | |
|     assert isinstance(response, LLMResult)
 | |
|     assert len(response.message.content) > 0
 | |
|     assert response.usage.total_tokens > 0
 | |
| 
 | |
| @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
 | |
| def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     response = model.invoke(
 | |
|         model='alapaca',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             UserPromptMessage(
 | |
|                 content='the United States is'
 | |
|             )
 | |
|         ],
 | |
|         model_parameters={
 | |
|             'temperature': 0.7,
 | |
|             'top_p': 1.0,
 | |
|         },
 | |
|         stop=['you'],
 | |
|         stream=True,
 | |
|         user="abc-123"
 | |
|     )
 | |
| 
 | |
|     assert isinstance(response, Generator)
 | |
|     for chunk in response:
 | |
|         assert isinstance(chunk, LLMResultChunk)
 | |
|         assert isinstance(chunk.delta, LLMResultChunkDelta)
 | |
|         assert isinstance(chunk.delta.message, AssistantPromptMessage)
 | |
|         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 | |
| 
 | |
| def test_get_num_tokens():
 | |
|     model = XinferenceAILargeLanguageModel()
 | |
| 
 | |
|     num_tokens = model.get_num_tokens(
 | |
|         model='ChatGLM3',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             SystemPromptMessage(
 | |
|                 content='You are a helpful AI assistant.',
 | |
|             ),
 | |
|             UserPromptMessage(
 | |
|                 content='Hello World!'
 | |
|             )
 | |
|         ],
 | |
|         tools=[
 | |
|             PromptMessageTool(
 | |
|                 name='get_current_weather',
 | |
|                 description='Get the current weather in a given location',
 | |
|                 parameters={
 | |
|                     "type": "object",
 | |
|                     "properties": {
 | |
|                         "location": {
 | |
|                         "type": "string",
 | |
|                             "description": "The city and state e.g. San Francisco, CA"
 | |
|                         },
 | |
|                         "unit": {
 | |
|                             "type": "string",
 | |
|                             "enum": [
 | |
|                                 "c",
 | |
|                                 "f"
 | |
|                             ]
 | |
|                         }
 | |
|                     },
 | |
|                     "required": [
 | |
|                         "location"
 | |
|                     ]
 | |
|                 }
 | |
|             )
 | |
|         ]
 | |
|     )
 | |
| 
 | |
|     assert isinstance(num_tokens, int)
 | |
|     assert num_tokens == 77
 | |
| 
 | |
|     num_tokens = model.get_num_tokens(
 | |
|         model='ChatGLM3',
 | |
|         credentials={
 | |
|             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
 | |
|             'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
 | |
|         },
 | |
|         prompt_messages=[
 | |
|             SystemPromptMessage(
 | |
|                 content='You are a helpful AI assistant.',
 | |
|             ),
 | |
|             UserPromptMessage(
 | |
|                 content='Hello World!'
 | |
|             )
 | |
|         ],
 | |
|     )
 | |
| 
 | |
|     assert isinstance(num_tokens, int)
 | |
|     assert num_tokens == 21 | 
