felix commited on
Commit
e254e41
β€’
1 Parent(s): 8b8ceb9
Files changed (3) hide show
  1. README.md +3 -1
  2. app.py +32 -35
  3. requirements.txt +1 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ†
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: streamlit
7
- sdk_version: 1.25.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
@@ -13,4 +13,6 @@ license: apache-2.0
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
 
15
  TODO:
 
 
16
 
 
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: streamlit
7
+ sdk_version: 1.26.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
 
15
  TODO:
16
+ Add feedback form on the bottom?
17
+
18
 
app.py CHANGED
@@ -11,8 +11,6 @@ st.title('Meta Open LLM leaderboard')
11
 
12
  directories = os.listdir("./data")
13
 
14
- #data_dir = directories[0]
15
-
16
  def format_dir_date(data_dir):
17
  # Extracting date and time information from the path
18
  parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
@@ -20,12 +18,15 @@ def format_dir_date(data_dir):
20
  # Formatting the parsed date
21
  return parsed_date.strftime("%b %d, %Y %H:%M")
22
 
23
- data_dir = st.selectbox(
24
- 'Select different Date',
25
- directories,
26
- format_func=format_dir_date,
27
- index=len(directories)-1
28
- )
 
 
 
29
 
30
  captions_map = {
31
  "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
@@ -34,14 +35,13 @@ captions_map = {
34
  "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
35
  "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
36
  }
 
 
37
 
38
- st.write("Generated on: <b>" + format_dir_date(data_dir) + "</b>", unsafe_allow_html=True)
39
- st.divider()
40
 
41
  data_path = './data/' + data_dir
42
 
43
  imgs = glob.glob(os.path.join(data_path, '*.png'))
44
- white_image = './white_image.png'
45
 
46
  # Extracting images that start with "hf_llm_diagram"
47
  hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
@@ -93,38 +93,45 @@ def print_model_list(file_name, st, split_into_two=False):
93
  st.write(final_html, unsafe_allow_html=True)
94
 
95
 
 
96
  cols = st.columns(2)
97
 
98
- cols[0].write("HuggingFace Open LLM leaderboard by Model Size")
99
- cols[0].image(hf_llm_diagrams[0], use_column_width="auto")
100
- cols[1].write("<nbsp/>", unsafe_allow_html=True)
101
- cols[1].image(white_image, use_column_width="auto")
102
 
103
  print_model_list(hf_llm_diagrams[0],st, True)
104
- st.divider()
105
 
106
  cols = st.columns(2)
107
- cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
108
 
 
109
  print_model_list(hf_llm_diagrams[1],cols[0])
110
 
111
  cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
112
-
113
  print_model_list(hf_llm_diagrams[2],cols[1])
114
 
115
- st.divider()
 
 
 
 
 
 
 
 
 
116
 
117
- st.write("Big Code Models Leaderboard")
118
  cols = st.columns(2)
119
  cols[0].image(bigcode_diagrams[0], use_column_width="auto")
120
- cols[1].write("<nbsp/>", unsafe_allow_html=True)
121
- cols[1].image(white_image, use_column_width="auto")
122
 
123
  print_model_list(bigcode_diagrams[0],st,True)
124
 
125
- st.divider()
126
- st.write("HuggingFace and Other Leaderboards: A Comparative Model Evaluation")
 
127
  st.caption("Only models evaluated on both leaderboards are included.")
 
128
  cols = st.columns(2)
129
 
130
  for i, img in enumerate(remaining_imgs):
@@ -151,17 +158,7 @@ st.write(
151
  </ul>
152
  """, unsafe_allow_html=True
153
  )
154
- st.divider()
155
 
156
- cols = st.columns(2)
157
- cols[0].write("TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size")
158
- cols[0].image(hf_llm_diagrams[3],use_column_width="auto")
159
- print_model_list(hf_llm_diagrams[3],cols[0],False)
160
-
161
- cols[1].write("ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size")
162
- cols[1].image(hf_llm_diagrams[4],use_column_width="auto")
163
- print_model_list(hf_llm_diagrams[4],cols[1],False)
164
 
165
- st.divider()
166
- st.subheader('About')
167
  st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)
 
11
 
12
  directories = os.listdir("./data")
13
 
 
 
14
  def format_dir_date(data_dir):
15
  # Extracting date and time information from the path
16
  parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
 
18
  # Formatting the parsed date
19
  return parsed_date.strftime("%b %d, %Y %H:%M")
20
 
21
+ col1, col2 = st.columns(2)
22
+
23
+ with col1:
24
+ data_dir = st.selectbox(
25
+ 'Select different data generation date',
26
+ directories,
27
+ format_func=format_dir_date,
28
+ index=len(directories)-1,
29
+ )
30
 
31
  captions_map = {
32
  "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
 
35
  "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
36
  "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
37
  }
38
+ with col2:
39
+ st.write("<div style=\"text-align: center\" >Generated on: <b>" + format_dir_date(data_dir) + "</b></div>", unsafe_allow_html=True)
40
 
 
 
41
 
42
  data_path = './data/' + data_dir
43
 
44
  imgs = glob.glob(os.path.join(data_path, '*.png'))
 
45
 
46
  # Extracting images that start with "hf_llm_diagram"
47
  hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
 
93
  st.write(final_html, unsafe_allow_html=True)
94
 
95
 
96
+ st.header("HuggingFace Open LLM leaderboard by Model Size", divider=True)
97
  cols = st.columns(2)
98
 
99
+ cols[0].image(hf_llm_diagrams[0], caption="Main chart using all the models", use_column_width="auto")
 
 
 
100
 
101
  print_model_list(hf_llm_diagrams[0],st, True)
102
+ st.write("<nbsp/>", unsafe_allow_html=True)
103
 
104
  cols = st.columns(2)
 
105
 
106
+ cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
107
  print_model_list(hf_llm_diagrams[1],cols[0])
108
 
109
  cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
 
110
  print_model_list(hf_llm_diagrams[2],cols[1])
111
 
112
+ st.write("<nbsp/>", unsafe_allow_html=True)
113
+
114
+ cols = st.columns(2)
115
+ cols[0].image(hf_llm_diagrams[3],caption="TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
116
+ print_model_list(hf_llm_diagrams[3],cols[0],False)
117
+
118
+ cols[1].image(hf_llm_diagrams[4],caption="ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
119
+ print_model_list(hf_llm_diagrams[4],cols[1],False)
120
+
121
+
122
 
123
+ st.header("Big Code Models Leaderboard", divider=True)
124
  cols = st.columns(2)
125
  cols[0].image(bigcode_diagrams[0], use_column_width="auto")
126
+
 
127
 
128
  print_model_list(bigcode_diagrams[0],st,True)
129
 
130
+
131
+
132
+ st.header("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
133
  st.caption("Only models evaluated on both leaderboards are included.")
134
+
135
  cols = st.columns(2)
136
 
137
  for i, img in enumerate(remaining_imgs):
 
158
  </ul>
159
  """, unsafe_allow_html=True
160
  )
 
161
 
 
 
 
 
 
 
 
 
162
 
163
+ st.header('About', divider=True)
 
164
  st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)
requirements.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ streamlit==1.26.0