dify/scripts/stress-test/setup/mock_openai_server.py
Asuka Minato bdd85b36a4
ruff check preview (#25653)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-09-16 12:58:12 +08:00

206 lines
6.3 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import time
import uuid
from collections.abc import Iterator
from typing import Any
from flask import Flask, Response, jsonify, request
app = Flask(__name__)
# Mock models list
MODELS = [
{
"id": "gpt-3.5-turbo",
"object": "model",
"created": 1677649963,
"owned_by": "openai",
},
{"id": "gpt-4", "object": "model", "created": 1687882411, "owned_by": "openai"},
{
"id": "text-embedding-ada-002",
"object": "model",
"created": 1671217299,
"owned_by": "openai-internal",
},
]
@app.route("/v1/models", methods=["GET"])
def list_models() -> Any:
"""List available models."""
return jsonify({"object": "list", "data": MODELS})
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions() -> Any:
"""Handle chat completions."""
data = request.json or {}
model = data.get("model", "gpt-3.5-turbo")
messages = data.get("messages", [])
stream = data.get("stream", False)
# Generate mock response
response_content = "This is a mock response from the OpenAI server."
if messages:
last_message = messages[-1].get("content", "")
response_content = f"Mock response to: {last_message[:100]}..."
if stream:
# Streaming response
def generate() -> Iterator[str]:
# Send initial chunk
chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"choices": [
{
"index": 0,
"delta": {"role": "assistant", "content": ""},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(chunk)}\n\n"
# Send content in chunks
words = response_content.split()
for word in words:
chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": word + " "},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(chunk)}\n\n"
time.sleep(0.05) # Simulate streaming delay
# Send final chunk
chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(chunk)}\n\n"
yield "data: [DONE]\n\n"
return Response(generate(), mimetype="text/event-stream")
else:
# Non-streaming response
return jsonify(
{
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion",
"created": int(time.time()),
"model": model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": response_content},
"finish_reason": "stop",
}
],
"usage": {
"prompt_tokens": len(str(messages)),
"completion_tokens": len(response_content.split()),
"total_tokens": len(str(messages)) + len(response_content.split()),
},
}
)
@app.route("/v1/completions", methods=["POST"])
def completions() -> Any:
"""Handle text completions."""
data = request.json or {}
model = data.get("model", "gpt-3.5-turbo-instruct")
prompt = data.get("prompt", "")
response_text = f"Mock completion for prompt: {prompt[:100]}..."
return jsonify(
{
"id": f"cmpl-{uuid.uuid4().hex[:8]}",
"object": "text_completion",
"created": int(time.time()),
"model": model,
"choices": [
{
"text": response_text,
"index": 0,
"logprobs": None,
"finish_reason": "stop",
}
],
"usage": {
"prompt_tokens": len(prompt.split()),
"completion_tokens": len(response_text.split()),
"total_tokens": len(prompt.split()) + len(response_text.split()),
},
}
)
@app.route("/v1/embeddings", methods=["POST"])
def embeddings() -> Any:
"""Handle embeddings requests."""
data = request.json or {}
model = data.get("model", "text-embedding-ada-002")
input_text = data.get("input", "")
# Generate mock embedding (1536 dimensions for ada-002)
mock_embedding = [0.1] * 1536
return jsonify(
{
"object": "list",
"data": [{"object": "embedding", "embedding": mock_embedding, "index": 0}],
"model": model,
"usage": {
"prompt_tokens": len(input_text.split()),
"total_tokens": len(input_text.split()),
},
}
)
@app.route("/v1/models/<model_id>", methods=["GET"])
def get_model(model_id: str) -> tuple[Any, int] | Any:
"""Get specific model details."""
for model in MODELS:
if model["id"] == model_id:
return jsonify(model)
return jsonify({"error": "Model not found"}), 404
@app.route("/health", methods=["GET"])
def health() -> Any:
"""Health check endpoint."""
return jsonify({"status": "healthy"})
if __name__ == "__main__":
print("🚀 Starting Mock OpenAI Server on http://localhost:5004")
print("Available endpoints:")
print(" - GET /v1/models")
print(" - POST /v1/chat/completions")
print(" - POST /v1/completions")
print(" - POST /v1/embeddings")
print(" - GET /v1/models/<model_id>")
print(" - GET /health")
app.run(host="0.0.0.0", port=5004, debug=True)