from PIL import Image import gradio as gr import requests from transformers import AutoProcessor, BlipForQuestionAnswering model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base") def generate_answer(text, image): text = text inputs = processor(images=image, text=text, return_tensors="pt") outputs = model.generate(**inputs) return processor.decode(outputs[0], skip_special_tokens=True) text_input = gr.Textbox(lines=5, label="Enter text") image_input = gr.Image(type="pil", label="Upload Image") iface = gr.Interface( fn=generate_answer, inputs=[text_input, image_input], outputs="text", title="DD360-Bot-Multimodal", description="Enter text and upload an image" ) iface.launch()