| import os | |
| import gradio as gr | |
| from transformers import pipeline | |
| from transformers import AutoProcessor, BlipForQuestionAnswering | |
| processor = AutoProcessor.from_pretrained( | |
| "Salesforce/blip-vqa-base") | |
| model = BlipForQuestionAnswering.from_pretrained( | |
| "Salesforce/blip-vqa-base") | |
| def launch(pil_image, question): | |
| inputs = processor(pil_image, question, return_tensors="pt") | |
| out = model.generate(**inputs) | |
| return processor.decode(out[0], skip_special_tokens=True) | |
| iface = gr.Interface(fn=launch, | |
| inputs=[gr.Image(label="Input image", type='pil'), | |
| gr.Textbox(label="Question", lines=3)], | |
| outputs=[gr.Textbox(label="Answer", lines=3)], | |
| title="Image Q&A with Salesforce BLIP", | |
| description="1. Upload an image.\n2. Type a question.\n3. Press submit button.\n4. Get an answer.", | |
| allow_flagging="never" | |
| ) | |
| iface.launch() |