Spaces:
Running
Running
File size: 955 Bytes
b4d301c cb95a71 b4d301c cb95a71 b4d301c cb95a71 b4d301c cb95a71 e3ac475 cb95a71 e3ac475 cb95a71 e3ac475 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# 加载 Qwen3-0.6B 模型和 tokenizer
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16, # 半精度减少显存占用
device_map="auto", # 自动选择 GPU
trust_remote_code=True # 信任远程代码(Qwen 需要)
)
def generate_text(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 创建 Gradio 界面
demo = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(lines=3, placeholder="输入你的问题..."),
outputs=gr.Textbox(label="Qwen3-0.6B 的回答"),
title="Qwen3-0.6B 演示 (Free GPU)",
)
demo.launch() |