Michele Cafagna commited on
Commit
efe64cb
1 Parent(s): cd38731

added purity and diversity scores

Browse files
Files changed (2) hide show
  1. app.py +31 -20
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  from datasets import load_dataset
 
3
 
4
  st.set_page_config(page_title="High-Level dataset")
5
 
@@ -11,10 +12,15 @@ QS = {
11
  }
12
  SPLITS = ["test", "train"]
13
 
 
 
 
 
 
 
14
  @st.cache
15
  def load_data(split):
16
 
17
- #with st.spinner('Loading the data...'):
18
  dataset = load_dataset("michelecafagna26/hl")
19
 
20
  coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
@@ -25,20 +31,27 @@ def load_data(split):
25
 
26
  def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
27
  show_conf=False):
 
28
  st.image(dataset[split][img_id]['image'])
29
 
30
- # col1, col2 = st.columns(2)
31
- #
32
- # col1.metric(label="Diversity score",
33
- # value=round(self_bleu[f"COCO_train2014_{img_id}.jpg"], 2),
34
- # delta=round(self_bleu[f"COCO_train2014_{img_id}.jpg"] - AVG_DIVERSITY, 2),
35
- # help="Normalized complementary 3-way Self-BLEU score. The delta is the difference with the average")
36
- #
37
- # col2.metric(label="Purity score",
38
- # value=round(bleurt[f"COCO_train2014_{img_id}.jpg"], 2),
39
- # delta=round(bleurt[f"COCO_train2014_{img_id}.jpg"] - AVG_PURITY, 2),
40
- # help="Normalized 3-way Bleurt score. The delta is the difference with the average")
41
- #
 
 
 
 
 
 
42
  for field in options:
43
 
44
  st.markdown(f"## {field.capitalize()}")
@@ -61,7 +74,6 @@ def write_obj(dataset, img_id, options, split, list_type="num", show_questions=F
61
 
62
 
63
  def main():
64
-
65
  st.title('High-Level Dataset')
66
 
67
  show_questions = st.sidebar.checkbox('Questions')
@@ -75,19 +87,18 @@ def main():
75
  'Split',
76
  SPLITS)
77
 
78
-
79
  dataset, coco2id = load_data(split)
80
 
81
  # sidebar
82
  choosen_image = st.selectbox(
83
  'Select an image',
84
  list(coco2id.keys()),
85
- help="write a key like: 532"
86
  )
87
 
88
- write_obj(dataset, coco2id[choosen_image], options=options, split=split,
89
- list_type="num", show_questions=show_questions, show_conf=show_conf)
90
 
91
 
92
- if __name__=="__main__":
93
- main()
 
1
  import streamlit as st
2
  from datasets import load_dataset
3
+ import numpy as np
4
 
5
  st.set_page_config(page_title="High-Level dataset")
6
 
 
12
  }
13
  SPLITS = ["test", "train"]
14
 
15
+ AVG_PURITY = 1.10
16
+
17
+ AVG_DIVERSITY = 0.872819
18
+ MIN_DIVERSITY = 0
19
+ MAX_DIVERSITY = 100
20
+
21
  @st.cache
22
  def load_data(split):
23
 
 
24
  dataset = load_dataset("michelecafagna26/hl")
25
 
26
  coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
 
31
 
32
  def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
33
  show_conf=False):
34
+
35
  st.image(dataset[split][img_id]['image'])
36
 
37
+ item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']])
38
+ item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values()))
39
+
40
+ # normalize
41
+ item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY)
42
+
43
+ col1, col2 = st.columns(2)
44
+
45
+ col1.metric(label="Diversity score",
46
+ value=round(item_diversity, 2),
47
+ delta=round(item_diversity - AVG_DIVERSITY, 2),
48
+ help="Item's internal lexical diversity.\n Positive delta means higher then the average")
49
+
50
+ col2.metric(label="Purity score",
51
+ value=round(item_purity, 2),
52
+ delta=round(item_purity - AVG_PURITY, 2),
53
+ help="Item's internal semantic similarity.\n Positive delta means higher then the average")
54
+
55
  for field in options:
56
 
57
  st.markdown(f"## {field.capitalize()}")
 
74
 
75
 
76
  def main():
 
77
  st.title('High-Level Dataset')
78
 
79
  show_questions = st.sidebar.checkbox('Questions')
 
87
  'Split',
88
  SPLITS)
89
 
 
90
  dataset, coco2id = load_data(split)
91
 
92
  # sidebar
93
  choosen_image = st.selectbox(
94
  'Select an image',
95
  list(coco2id.keys()),
96
+ help="write a key like: 7603"
97
  )
98
 
99
+ write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num",
100
+ show_questions=show_questions, show_conf=show_conf)
101
 
102
 
103
+ if __name__ == "__main__":
104
+ main()
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  datasets==2.9.0
2
- streamlit==1.17.0
 
 
1
  datasets==2.9.0
2
+ streamlit==1.17.0
3
+ numpy==1.21.1