|
import streamlit as st |
|
from datasets import load_dataset |
|
import numpy as np |
|
|
|
st.set_page_config(page_title="High-Level dataset") |
|
|
|
FIELDS = ["scene", "action", "rationale", "object"] |
|
QS = { |
|
"scene": "Where is the picture taken?", |
|
"action": "What is the subject doing?", |
|
"rationale": "Why is the subject doing it?" |
|
} |
|
SPLITS = ["test", "train"] |
|
|
|
AVG_PURITY = 1.10 |
|
|
|
AVG_DIVERSITY = 0.872819 |
|
MIN_DIVERSITY = 0 |
|
MAX_DIVERSITY = 100 |
|
|
|
@st.cache |
|
def load_data(split): |
|
|
|
dataset = load_dataset("michelecafagna26/hl") |
|
|
|
coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in |
|
range(len(dataset[split]))} |
|
|
|
return dataset, coco2id |
|
|
|
|
|
def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False, |
|
show_conf=False): |
|
|
|
st.image(dataset[split][img_id]['image']) |
|
|
|
item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']]) |
|
item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values())) |
|
|
|
|
|
item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY) |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
col1.metric(label="Diversity score", |
|
value=round(item_diversity, 2), |
|
delta=round(item_diversity - AVG_DIVERSITY, 2), |
|
help="Item's internal lexical diversity.\n Positive delta means higher then the average") |
|
|
|
col2.metric(label="Purity score", |
|
value=round(item_purity, 2), |
|
delta=round(item_purity - AVG_PURITY, 2), |
|
help="Item's internal semantic similarity.\n Positive delta means higher then the average") |
|
|
|
for field in options: |
|
|
|
st.markdown(f"## {field.capitalize()}") |
|
|
|
if show_questions and field != "object": |
|
st.markdown(f" Question: _{QS[field]}_") |
|
|
|
for n, annotation in enumerate(dataset[split][img_id][field]): |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
if list_type == "num": |
|
col1.markdown(f"{n + 1}. {annotation}") |
|
else: |
|
col1.markdown(f"{list_type} {annotation}") |
|
|
|
if show_conf and field != "object": |
|
col2.metric(label="confidence score", |
|
value=dataset[split][img_id]['confidence'][field][n]) |
|
|
|
|
|
def main(): |
|
st.title('High-Level Dataset') |
|
|
|
show_questions = st.sidebar.checkbox('Questions') |
|
show_conf = st.sidebar.checkbox('Confidence scores') |
|
options = st.sidebar.multiselect( |
|
'Choose the annotations', |
|
FIELDS, |
|
default=FIELDS) |
|
|
|
split = st.sidebar.selectbox( |
|
'Split', |
|
SPLITS) |
|
|
|
dataset, coco2id = load_data(split) |
|
|
|
|
|
choosen_image = st.selectbox( |
|
'Select an image', |
|
list(coco2id.keys()), |
|
help="write a key like: 7603" |
|
) |
|
|
|
write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num", |
|
show_questions=show_questions, show_conf=show_conf) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|