Spaces:

michelecafagna26
/

High-Level-Dataset-explorer

Sleeping

App Files Files Community

Michele Cafagna commited on Jan 30, 2023

Commit

efe64cb

•

1 Parent(s): cd38731

added purity and diversity scores

Browse files

Files changed (2) hide show

app.py +31 -20
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
 from datasets import load_dataset
 st.set_page_config(page_title="High-Level dataset")
@@ -11,10 +12,15 @@ QS = {
 }
 SPLITS = ["test", "train"]
 @st.cache
 def load_data(split):
-    #with st.spinner('Loading the data...'):
     dataset = load_dataset("michelecafagna26/hl")
     coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
@@ -25,20 +31,27 @@ def load_data(split):
 def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
               show_conf=False):
     st.image(dataset[split][img_id]['image'])
-    # col1, col2 = st.columns(2)
-    #
-    # col1.metric(label="Diversity score",
-    #             value=round(self_bleu[f"COCO_train2014_{img_id}.jpg"], 2),
-    #             delta=round(self_bleu[f"COCO_train2014_{img_id}.jpg"] - AVG_DIVERSITY, 2),
-    #             help="Normalized complementary 3-way Self-BLEU score. The delta is the difference with the average")
-    #
-    # col2.metric(label="Purity score",
-    #             value=round(bleurt[f"COCO_train2014_{img_id}.jpg"], 2),
-    #             delta=round(bleurt[f"COCO_train2014_{img_id}.jpg"] - AVG_PURITY, 2),
-    #             help="Normalized 3-way Bleurt score. The delta is the difference with the average")
-    #
     for field in options:
         st.markdown(f"## {field.capitalize()}")
@@ -61,7 +74,6 @@ def write_obj(dataset, img_id, options, split, list_type="num", show_questions=F
 def main():
     st.title('High-Level Dataset')
     show_questions = st.sidebar.checkbox('Questions')
@@ -75,19 +87,18 @@ def main():
         'Split',
         SPLITS)
     dataset, coco2id = load_data(split)
     # sidebar
     choosen_image = st.selectbox(
         'Select an image',
         list(coco2id.keys()),
-        help="write a key like: 532"
     )
-    write_obj(dataset, coco2id[choosen_image], options=options, split=split,
-              list_type="num", show_questions=show_questions, show_conf=show_conf)
-if __name__=="__main__":
-    main()

 import streamlit as st
 from datasets import load_dataset
+import numpy as np
 st.set_page_config(page_title="High-Level dataset")
 }
 SPLITS = ["test", "train"]
+AVG_PURITY = 1.10
+AVG_DIVERSITY = 0.872819
+MIN_DIVERSITY = 0
+MAX_DIVERSITY = 100
 @st.cache
 def load_data(split):
     dataset = load_dataset("michelecafagna26/hl")
     coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
 def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
               show_conf=False):
     st.image(dataset[split][img_id]['image'])
+    item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']])
+    item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values()))
+    # normalize
+    item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY)
+    col1, col2 = st.columns(2)
+    col1.metric(label="Diversity score",
+                value=round(item_diversity, 2),
+                delta=round(item_diversity - AVG_DIVERSITY, 2),
+                help="Item's internal lexical diversity.\n Positive delta means higher then the average")
+    col2.metric(label="Purity score",
+                value=round(item_purity, 2),
+                delta=round(item_purity - AVG_PURITY, 2),
+                help="Item's internal semantic similarity.\n Positive delta means higher then the average")
     for field in options:
         st.markdown(f"## {field.capitalize()}")
 def main():
     st.title('High-Level Dataset')
     show_questions = st.sidebar.checkbox('Questions')
         'Split',
         SPLITS)
     dataset, coco2id = load_data(split)
     # sidebar
     choosen_image = st.selectbox(
         'Select an image',
         list(coco2id.keys()),
+        help="write a key like: 7603"
     )
+    write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num",
+              show_questions=show_questions, show_conf=show_conf)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 datasets==2.9.0
-streamlit==1.17.0

 datasets==2.9.0
+streamlit==1.17.0
+numpy==1.21.1