|
import gradio as gr |
|
from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, AutoModelForCausalLM, AutoTokenizer |
|
from PIL import Image |
|
import torch |
|
|
|
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2") |
|
model_path = "microsoft/git-base-vqav2" |
|
dataset_name = "Multimodal-Fatima/OK-VQA_train" |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
|
def main(): |
|
|
|
|
|
demo = gr.Interface( |
|
fn=main, |
|
inputs=[gr.Slider(1, len(questions), step=1)], |
|
outputs=["image", "text", "text"], |
|
) |
|
|
|
demo.launch(share=True) |
|
|