Michele Cafagna
added purity and diversity scores
efe64cb
import streamlit as st
from datasets import load_dataset
import numpy as np
st.set_page_config(page_title="High-Level dataset")
FIELDS = ["scene", "action", "rationale", "object"]
QS = {
"scene": "Where is the picture taken?",
"action": "What is the subject doing?",
"rationale": "Why is the subject doing it?"
}
SPLITS = ["test", "train"]
AVG_PURITY = 1.10
AVG_DIVERSITY = 0.872819
MIN_DIVERSITY = 0
MAX_DIVERSITY = 100
@st.cache
def load_data(split):
dataset = load_dataset("michelecafagna26/hl")
coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
range(len(dataset[split]))}
return dataset, coco2id
def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
show_conf=False):
st.image(dataset[split][img_id]['image'])
item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']])
item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values()))
# normalize
item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY)
col1, col2 = st.columns(2)
col1.metric(label="Diversity score",
value=round(item_diversity, 2),
delta=round(item_diversity - AVG_DIVERSITY, 2),
help="Item's internal lexical diversity.\n Positive delta means higher then the average")
col2.metric(label="Purity score",
value=round(item_purity, 2),
delta=round(item_purity - AVG_PURITY, 2),
help="Item's internal semantic similarity.\n Positive delta means higher then the average")
for field in options:
st.markdown(f"## {field.capitalize()}")
if show_questions and field != "object":
st.markdown(f" Question: _{QS[field]}_")
for n, annotation in enumerate(dataset[split][img_id][field]):
col1, col2 = st.columns(2)
if list_type == "num":
col1.markdown(f"{n + 1}. {annotation}")
else:
col1.markdown(f"{list_type} {annotation}")
if show_conf and field != "object":
col2.metric(label="confidence score",
value=dataset[split][img_id]['confidence'][field][n])
def main():
st.title('High-Level Dataset')
show_questions = st.sidebar.checkbox('Questions')
show_conf = st.sidebar.checkbox('Confidence scores')
options = st.sidebar.multiselect(
'Choose the annotations',
FIELDS,
default=FIELDS)
split = st.sidebar.selectbox(
'Split',
SPLITS)
dataset, coco2id = load_data(split)
# sidebar
choosen_image = st.selectbox(
'Select an image',
list(coco2id.keys()),
help="write a key like: 7603"
)
write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num",
show_questions=show_questions, show_conf=show_conf)
if __name__ == "__main__":
main()