Spaces:

felixz
/

meta_open_llm_leaderboard

Running

App Files Files Community

felix commited on Sep 9, 2023

Commit

e254e41

•

1 Parent(s): 8b8ceb9

updates

Browse files

Files changed (3) hide show

README.md +3 -1
app.py +32 -35
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏆
 colorFrom: yellow
 colorTo: gray
 sdk: streamlit
-sdk_version: 1.25.0
 app_file: app.py
 pinned: false
 license: apache-2.0
@@ -13,4 +13,6 @@ license: apache-2.0
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 TODO:

 colorFrom: yellow
 colorTo: gray
 sdk: streamlit
+sdk_version: 1.26.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 TODO:
+Add feedback form on the bottom?

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ st.title('Meta Open LLM leaderboard')
 directories = os.listdir("./data")
-#data_dir = directories[0]
 def format_dir_date(data_dir):
     # Extracting date and time information from the path
     parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
@@ -20,12 +18,15 @@ def format_dir_date(data_dir):
     # Formatting the parsed date
     return  parsed_date.strftime("%b %d, %Y %H:%M")
-data_dir = st.selectbox(
-    'Select different Date',
-    directories,
-    format_func=format_dir_date,
-    index=len(directories)-1
-    )
 captions_map = {
     "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
@@ -34,14 +35,13 @@ captions_map = {
     "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
     "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
 }
-st.write("Generated on: <b>" + format_dir_date(data_dir) + "</b>", unsafe_allow_html=True)
-st.divider()
 data_path = './data/' + data_dir
 imgs = glob.glob(os.path.join(data_path, '*.png'))
-white_image = './white_image.png'
 # Extracting images that start with "hf_llm_diagram"
 hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
@@ -93,38 +93,45 @@ def print_model_list(file_name, st, split_into_two=False):
             st.write(final_html, unsafe_allow_html=True)
 cols = st.columns(2)
-cols[0].write("HuggingFace Open LLM leaderboard by Model Size")
-cols[0].image(hf_llm_diagrams[0], use_column_width="auto")
-cols[1].write("<nbsp/>", unsafe_allow_html=True)
-cols[1].image(white_image, use_column_width="auto")
 print_model_list(hf_llm_diagrams[0],st, True)
-st.divider()
 cols = st.columns(2)
-cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
 print_model_list(hf_llm_diagrams[1],cols[0])
 cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
 print_model_list(hf_llm_diagrams[2],cols[1])
-st.divider()
-st.write("Big Code Models Leaderboard")
 cols = st.columns(2)
 cols[0].image(bigcode_diagrams[0], use_column_width="auto")
-cols[1].write("<nbsp/>", unsafe_allow_html=True)
-cols[1].image(white_image, use_column_width="auto")
 print_model_list(bigcode_diagrams[0],st,True)
-st.divider()
-st.write("HuggingFace and Other Leaderboards: A Comparative Model Evaluation")
 st.caption("Only models evaluated on both leaderboards are included.")
 cols = st.columns(2)
 for i, img in enumerate(remaining_imgs):
@@ -151,17 +158,7 @@ st.write(
         </ul>
     """, unsafe_allow_html=True
 )
-st.divider()
-cols = st.columns(2)
-cols[0].write("TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size")
-cols[0].image(hf_llm_diagrams[3],use_column_width="auto")
-print_model_list(hf_llm_diagrams[3],cols[0],False)
-cols[1].write("ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size")
-cols[1].image(hf_llm_diagrams[4],use_column_width="auto")
-print_model_list(hf_llm_diagrams[4],cols[1],False)
-st.divider()
-st.subheader('About')
 st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)

 directories = os.listdir("./data")
 def format_dir_date(data_dir):
     # Extracting date and time information from the path
     parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
     # Formatting the parsed date
     return  parsed_date.strftime("%b %d, %Y %H:%M")
+col1, col2 = st.columns(2)
+with col1:
+    data_dir = st.selectbox(
+        'Select different data generation date',
+        directories,
+        format_func=format_dir_date,
+        index=len(directories)-1,
+        )
 captions_map = {
     "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
     "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
     "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
 }
+with col2:
+    st.write("<div style=\"text-align: center\" >Generated on: <b>" + format_dir_date(data_dir) + "</b></div>", unsafe_allow_html=True)
 data_path = './data/' + data_dir
 imgs = glob.glob(os.path.join(data_path, '*.png'))
 # Extracting images that start with "hf_llm_diagram"
 hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
             st.write(final_html, unsafe_allow_html=True)
+st.header("HuggingFace Open LLM leaderboard by Model Size", divider=True)
 cols = st.columns(2)
+cols[0].image(hf_llm_diagrams[0], caption="Main chart using all the models", use_column_width="auto")
 print_model_list(hf_llm_diagrams[0],st, True)
+st.write("<nbsp/>", unsafe_allow_html=True)
 cols = st.columns(2)
+cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
 print_model_list(hf_llm_diagrams[1],cols[0])
 cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
 print_model_list(hf_llm_diagrams[2],cols[1])
+st.write("<nbsp/>", unsafe_allow_html=True)
+cols = st.columns(2)
+cols[0].image(hf_llm_diagrams[3],caption="TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
+print_model_list(hf_llm_diagrams[3],cols[0],False)
+cols[1].image(hf_llm_diagrams[4],caption="ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
+print_model_list(hf_llm_diagrams[4],cols[1],False)
+st.header("Big Code Models Leaderboard", divider=True)
 cols = st.columns(2)
 cols[0].image(bigcode_diagrams[0], use_column_width="auto")
 print_model_list(bigcode_diagrams[0],st,True)
+st.header("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
 st.caption("Only models evaluated on both leaderboards are included.")
 cols = st.columns(2)
 for i, img in enumerate(remaining_imgs):
         </ul>
     """, unsafe_allow_html=True
 )
+st.header('About', divider=True)
 st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)

requirements.txt CHANGED Viewed

	@@ -0,0 +1 @@


1	+ streamlit==1.26.0