from transformers import BlipForQuestionAnswering model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base") from transformers import AutoProcessor from PIL import Image import gradio as gr def answer_question(image, question): image = np.array(Image.open(image)) inputs = processor(image, question, return_tensors="pt") out = model.generate(**inputs) answer = processor.decode(out[0], skip_special_tokens=True) return answer # Create Gradio interface image_input = gr.Image(label="Upload Image") question_input = gr.Textbox(label="Ask a Question",lines = 4) output = gr.Textbox(label="Answer") interface = gr.Interface(fn=answer_question, inputs=[image_input, question_input], outputs=output, title="Multimodal Question Answering",description=" BlipForQuestionAnswering for Question Answering") interface.launch()