import gradio as gr from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, AutoModelForCausalLM, AutoTokenizer from PIL import Image import torch model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2") model_path = "microsoft/git-base-vqav2" dataset_name = "Multimodal-Fatima/OK-VQA_train" tokenizer = AutoTokenizer.from_pretrained(model_path) def main(): demo = gr.Interface( fn=main, inputs=[gr.Slider(1, len(questions), step=1)], outputs=["image", "text", "text"], ) demo.launch(share=True)