File size: 2,725 Bytes
0bb292e
75ead56
dbe1281
 
0bb292e
 
2912f0f
d17d4a0
2912f0f
 
dbe1281
6625885
dbe1281
 
 
 
95c86c0
5191811
95c86c0
5191811
95c86c0
 
3db9312
 
5191811
a26b6ee
 
 
 
dbe1281
95c86c0
0bb292e
b0f47a5
75ead56
b0f47a5
72bb84f
b0f47a5
 
5191811
 
 
 
0bb292e
 
 
6625885
0bb292e
dbe1281
95c86c0
 
8fc99cd
dbe1281
95c86c0
b0f47a5
5191811
 
 
 
 
 
 
 
 
 
 
de0884c
5191811
 
 
 
 
 
 
 
0bb292e
 
 
95c86c0
75ead56
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import json
from flask import Flask, request, jsonify
from huggingface_hub import InferenceClient

app = Flask(__name__)


sysx="""你是一个辅助机器人,认真查看用户的所有问题并回复"""
# "You are an assistant"
def generate_response(user_prompt, system_instructions=sysx, model="microsoft/Phi-3-mini-4k-instruct"):
    try:
        client = InferenceClient(model)
        generate_kwargs = dict(
            max_new_tokens=100,
            do_sample=True,
        )
        formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]"
        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
        output = ""

        for response in stream:
            output += response.token.text
            if "[QUESTION]" in output:
                break
        print("output:",output)
        # 移除 [QUESTION] 及其之后的内容
        if "[QUESTION]" in output:
            output = output[:output.index("[QUESTION]")].strip()
        return output
    except Exception as e:
        return str(e)

# 根路径
@app.route("/", methods=["GET", "POST", "PUT", "DELETE"])
def home():
    return "hello, work~~~"

# /completions 路径
@app.route("/api/v1/chat/completions", methods=["POST"])
@app.route("/hf/v1/chat/completions", methods=["POST"])
@app.route("/chat/completions", methods=["POST"])
@app.route("/chat", methods=["POST"])
@app.route("/completions", methods=["POST"])
def completions():
    data = request.json
    model = data.get("model", "microsoft/Phi-3-mini-4k-instruct")  # 检查模型参数
    messages = data.get("messages", [])

    # 提取用户问题和系统消息
    user_message = next((msg["content"] for msg in messages if msg["role"] == "user"), "")
    system_instructions = next((msg["content"] for msg in messages if msg["role"] == "system"), "You are an assistant")

    # 生成响应
    response_text = generate_response(user_message, system_instructions, model)
    # 构建响应数据
    response_data = {
        "choices": [
            {
                "message": {
                    "role": "assistant",
                    "content": response_text
                }
            }
        ]
    }
    print(response_data)
    # 将数据转换为JSON字符串,确保非ASCII字符正确显示
    response_json = json.dumps(response_data, ensure_ascii=False)

    # 打印调试信息
    print(response_json)

    # 返回 JSON 响应,确保内容类型为 application/json
    return app.response_class(response=response_json, mimetype='application/json')

if __name__ == "__main__":
    # 启动 Flask 应用
    app.run(host="0.0.0.0", port=7860)