Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
# 下载模型文件 | |
model_path = hf_hub_download( | |
repo_id="muchuan-l/qwen2.5math1.5b-2v2-rev", # 模型仓库 ID | |
filename="unsloth.Q4_K_M.gguf", # 模型文件名 | |
cache_dir="." # 下载到当前目录 | |
) | |
# 加载 GGUF 模型 | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=2048, # 上下文长度 | |
n_threads=4 # 线程数 | |
) | |
# 定义对话函数 | |
def chat(input_text): | |
# 生成回复 | |
output = llm(input_text, max_tokens=100) | |
response = output["choices"][0]["text"] | |
return response | |
# 创建 Gradio 界面 | |
interface = gr.Interface( | |
fn=chat, # 对话函数 | |
inputs="text", # 输入类型 | |
outputs="text", # 输出类型 | |
title="Qwen2.5Math1.5B Chat", # 界面标题 | |
description="A chatbot powered by Qwen2.5Math1.5B model.", # 界面描述 | |
examples=["What is 2 + 2?", "Explain the Pythagorean theorem."] # 示例输入 | |
) | |
# 启动界面 | |
interface.launch() |