import streamlit as st from datasets import load_dataset import numpy as np st.set_page_config(page_title="High-Level dataset") FIELDS = ["scene", "action", "rationale", "object"] QS = { "scene": "Where is the picture taken?", "action": "What is the subject doing?", "rationale": "Why is the subject doing it?" } SPLITS = ["test", "train"] AVG_PURITY = 1.10 AVG_DIVERSITY = 0.872819 MIN_DIVERSITY = 0 MAX_DIVERSITY = 100 @st.cache def load_data(split): dataset = load_dataset("michelecafagna26/hl") coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in range(len(dataset[split]))} return dataset, coco2id def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False, show_conf=False): st.image(dataset[split][img_id]['image']) item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']]) item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values())) # normalize item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY) col1, col2 = st.columns(2) col1.metric(label="Diversity score", value=round(item_diversity, 2), delta=round(item_diversity - AVG_DIVERSITY, 2), help="Item's internal lexical diversity.\n Positive delta means higher then the average") col2.metric(label="Purity score", value=round(item_purity, 2), delta=round(item_purity - AVG_PURITY, 2), help="Item's internal semantic similarity.\n Positive delta means higher then the average") for field in options: st.markdown(f"## {field.capitalize()}") if show_questions and field != "object": st.markdown(f" Question: _{QS[field]}_") for n, annotation in enumerate(dataset[split][img_id][field]): col1, col2 = st.columns(2) if list_type == "num": col1.markdown(f"{n + 1}. {annotation}") else: col1.markdown(f"{list_type} {annotation}") if show_conf and field != "object": col2.metric(label="confidence score", value=dataset[split][img_id]['confidence'][field][n]) def main(): st.title('High-Level Dataset') show_questions = st.sidebar.checkbox('Questions') show_conf = st.sidebar.checkbox('Confidence scores') options = st.sidebar.multiselect( 'Choose the annotations', FIELDS, default=FIELDS) split = st.sidebar.selectbox( 'Split', SPLITS) dataset, coco2id = load_data(split) # sidebar choosen_image = st.selectbox( 'Select an image', list(coco2id.keys()), help="write a key like: 7603" ) write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num", show_questions=show_questions, show_conf=show_conf) if __name__ == "__main__": main()