File size: 2,725 Bytes
0bb292e 75ead56 dbe1281 0bb292e 2912f0f d17d4a0 2912f0f dbe1281 6625885 dbe1281 95c86c0 5191811 95c86c0 5191811 95c86c0 3db9312 5191811 a26b6ee dbe1281 95c86c0 0bb292e b0f47a5 75ead56 b0f47a5 72bb84f b0f47a5 5191811 0bb292e 6625885 0bb292e dbe1281 95c86c0 8fc99cd dbe1281 95c86c0 b0f47a5 5191811 de0884c 5191811 0bb292e 95c86c0 75ead56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import json
from flask import Flask, request, jsonify
from huggingface_hub import InferenceClient
app = Flask(__name__)
sysx="""你是一个辅助机器人,认真查看用户的所有问题并回复"""
# "You are an assistant"
def generate_response(user_prompt, system_instructions=sysx, model="microsoft/Phi-3-mini-4k-instruct"):
try:
client = InferenceClient(model)
generate_kwargs = dict(
max_new_tokens=100,
do_sample=True,
)
formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]"
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
output = ""
for response in stream:
output += response.token.text
if "[QUESTION]" in output:
break
print("output:",output)
# 移除 [QUESTION] 及其之后的内容
if "[QUESTION]" in output:
output = output[:output.index("[QUESTION]")].strip()
return output
except Exception as e:
return str(e)
# 根路径
@app.route("/", methods=["GET", "POST", "PUT", "DELETE"])
def home():
return "hello, work~~~"
# /completions 路径
@app.route("/api/v1/chat/completions", methods=["POST"])
@app.route("/hf/v1/chat/completions", methods=["POST"])
@app.route("/chat/completions", methods=["POST"])
@app.route("/chat", methods=["POST"])
@app.route("/completions", methods=["POST"])
def completions():
data = request.json
model = data.get("model", "microsoft/Phi-3-mini-4k-instruct") # 检查模型参数
messages = data.get("messages", [])
# 提取用户问题和系统消息
user_message = next((msg["content"] for msg in messages if msg["role"] == "user"), "")
system_instructions = next((msg["content"] for msg in messages if msg["role"] == "system"), "You are an assistant")
# 生成响应
response_text = generate_response(user_message, system_instructions, model)
# 构建响应数据
response_data = {
"choices": [
{
"message": {
"role": "assistant",
"content": response_text
}
}
]
}
print(response_data)
# 将数据转换为JSON字符串,确保非ASCII字符正确显示
response_json = json.dumps(response_data, ensure_ascii=False)
# 打印调试信息
print(response_json)
# 返回 JSON 响应,确保内容类型为 application/json
return app.response_class(response=response_json, mimetype='application/json')
if __name__ == "__main__":
# 启动 Flask 应用
app.run(host="0.0.0.0", port=7860)
|