import os import gradio as gr from transformers import pipeline from transformers import AutoProcessor, BlipForQuestionAnswering processor = AutoProcessor.from_pretrained( "Salesforce/blip-vqa-base") model = BlipForQuestionAnswering.from_pretrained( "Salesforce/blip-vqa-base") def launch(pil_image, question): inputs = processor(pil_image, question, return_tensors="pt") out = model.generate(**inputs) return processor.decode(out[0], skip_special_tokens=True) iface = gr.Interface(fn=launch, inputs=[gr.Image(label="Input image", type='pil'), gr.Textbox(label="Question", lines=3)], outputs=[gr.Textbox(label="Answer", lines=3)], title="Image Q&A with Salesforce BLIP", description="1. Upload an image.\n2. Type a question.\n3. Press submit button.\n4. Get an answer.", allow_flagging="never" ) iface.launch()