|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from transformers.generation import GenerationConfig |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model_id = "Qwen/Qwen2-VL-7B" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map=device, |
|
torch_dtype="auto", |
|
trust_remote_code=True |
|
).eval() |
|
|
|
|
|
model.generation_config = GenerationConfig.from_pretrained( |
|
model_id, |
|
trust_remote_code=True |
|
) |
|
model.generation_config.do_sample = False |
|
|
|
def respond(image, prompt, history): |
|
|
|
response, history = model.chat(tokenizer, image, prompt, history=history) |
|
return response, history |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"## Qwen2-VL-7B Demo (Model: {model_id})") |
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
image = gr.Image(type="pil", label="Image") |
|
text_input = gr.Textbox(label="Prompt", placeholder="输入提示") |
|
submit_button = gr.Button("Submit") |
|
with gr.Column(scale=6): |
|
chatbot = gr.Chatbot(label="Chatbot") |
|
|
|
history = gr.State([]) |
|
|
|
submit_button.click( |
|
respond, |
|
inputs=[image, text_input, history], |
|
outputs=[chatbot, history] |
|
) |
|
|
|
demo.queue().launch(server_name='0.0.0.0', server_port=7860, share=True) |