apin / app.py
sanbo
update sth. at 2024-11-15 14:13:53
0bb292e
raw
history blame
3.16 kB
from flask import Flask, request, jsonify, Response
import json
from huggingface_hub import InferenceClient
import threading
import gradio as gr
app = Flask(__name__)
# 定义生成文本的核心函数
def generate_response(model_name, messages, stream=False):
try:
client = InferenceClient(model_name) # 使用请求中的模型名称
system_instructions = next((msg["content"] for msg in messages if msg["role"] == "system"), "You are a helpful assistant.")
user_prompt = next((msg["content"] for msg in messages if msg["role"] == "user"), "")
formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]"
generate_kwargs = dict(
max_new_tokens=100,
do_sample=True,
)
if stream:
# 使用生成流响应
def stream_response():
output = ""
for response in client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False):
token_text = response.token.text
output += token_text
if output.endswith("<|assistant|>"):
output = output[:-13]
elif output.endswith("</s>"):
output = output[:-4]
yield json.dumps({"choices": [{"delta": {"content": token_text}}]}) + "\n"
return Response(stream_response(), content_type="application/json")
else:
# 非流式响应
output = ""
stream_response = client.text_generation(formatted_prompt, **generate_kwargs, stream=False, details=True, return_full_text=False)
for response in stream_response:
output += response.token.text
return jsonify({"choices": [{"message": {"role": "assistant", "content": output}}]})
except Exception as e:
return jsonify({"error": str(e)})
# Flask 路由,支持 /completions 格式的 API
@app.route("/completions", methods=["POST"])
def completions():
data = request.json
model_name = data.get("model", "microsoft/Phi-3-mini-4k-instruct") # 默认模型
messages = data.get("messages", [])
stream = data.get("stream", False)
return generate_response(model_name, messages, stream)
# Gradio 界面
with gr.Blocks() as demo:
gr.Markdown("## AI Text Generation")
with gr.Row():
user_input = gr.Textbox(label="Enter your prompt")
generate_button = gr.Button("Generate")
output_display = gr.Textbox(label="Generated Response")
def gradio_generate(user_prompt):
return generate_response("microsoft/Phi-3-mini-4k-instruct", [{"role": "user", "content": user_prompt}])
generate_button.click(
fn=gradio_generate,
inputs=[user_input],
outputs=output_display
)
# 启动 Gradio 和 Flask 应用
def start_gradio():
demo.launch(share=True)
if __name__ == "__main__":
# 启动 Gradio 界面线程
gradio_thread = threading.Thread(target=start_gradio)
gradio_thread.start()
# 启动 Flask 应用
app.run(port=7860)