import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# 下载模型文件
model_path = hf_hub_download(
    repo_id="muchuan-l/qwen2.5math1.5b-2v2-rev",  # 模型仓库 ID
    filename="unsloth.Q4_K_M.gguf",  # 模型文件名
    cache_dir="."  # 下载到当前目录
)

# 加载 GGUF 模型
llm = Llama(
    model_path=model_path,
    n_ctx=2048,  # 上下文长度
    n_threads=4  # 线程数
)

# 定义对话函数
def chat(input_text):
    # 生成回复
    output = llm(input_text, max_tokens=100)
    response = output["choices"][0]["text"]
    return response

# 创建 Gradio 界面
interface = gr.Interface(
    fn=chat,  # 对话函数
    inputs="text",  # 输入类型
    outputs="text",  # 输出类型
    title="Qwen2.5Math1.5B Chat",  # 界面标题
    description="A chatbot powered by Qwen2.5Math1.5B model.",  # 界面描述
    examples=["What is 2 + 2?", "Explain the Pythagorean theorem."]  # 示例输入
)

# 启动界面
interface.launch()