from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor import gradio as gr # 加载模型和处理器 model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", device_map="auto") processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") # 定义识别和分析函数 def recognize_and_analyze(image, text_prompt): messages = [ { "role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": text_prompt}, ], } ] # 处理视觉信息 inputs = processor(images=image, return_tensors="pt") inputs = inputs.to(model.device) # 生成输出 generated_ids = model.generate(**inputs) output_text = processor.batch_decode(generated_ids, skip_special_tokens=True) return output_text[0] # 设置Gradio界面 interface = gr.Interface( fn=recognize_and_analyze, inputs=[ gr.inputs.Image(type="filepath", label="上传图像"), gr.inputs.Textbox(label="输入描述文本"), ], outputs=gr.outputs.Textbox(label="识别结果"), title="Qwen2.5-VL 物体识别与分析", description="上传图像并输入描述文本以获取识别和分析结果。", ) # 启动Gradio应用 if __name__ == "__main__": interface.launch()