import sys
sys.path.append(".")

import streamlit as st
import pandas as pd
from PIL import Image
import time

from model_loader import *
from datasets import load_dataset


# load dataset
#ds = load_dataset("test")
# ds = load_dataset("HuggingFaceM4/VQAv2", split="validation", cache_dir="cache", streaming=False)
df = pd.read_json('vqa_samples.json', orient="columns")

# define selector
model_name = st.sidebar.selectbox(
    "Select a model: ",
    ('vilt', 'vilt_finetuned', 'git', 'blip', 'vbert')
)

image_selector_unspecific = st.number_input(
    "Select an question id: ",
    0, len(df)
)

# select and display
#sample = ds[image_selector_unspecific]
sample = df.iloc[image_selector_unspecific]
img_path = sample['img_path']
image = Image.open(f'images/{img_path}.jpg')

st.image(image, channels="RGB")
question = sample['ques']
label = sample['label']

# inference
question = st.text_input(f"Ask the model a question related to the image: \n"
                               f"(e.g. \"{sample['ques']}\")")

t_begin = time.perf_counter()
args = load_model(model_name) # TODO: cache
answer = get_answer(args, image, question, model_name)
t_end = time.perf_counter()
st.text(f"Answer by {model_name}: {answer}")
st.text(f"Ground truth (of the example): {label}")
st.text(f"Time consumption: {(t_end-t_begin): .4f} s")