CoreyMorris commited on
Commit
3ebf7a7
1 Parent(s): 443052d

Updated with new results 11-21

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. processed_data_2023-11-21.csv +0 -0
  3. results +1 -1
app.py CHANGED
@@ -112,7 +112,7 @@ def find_top_differences_table(df, target_model, closest_models, num_differences
112
 
113
  # st.title('Model Evaluation Results including MMLU by task')
114
  st.title('Interactive Portal for Analyzing Open Source Large Language Models')
115
- st.markdown("""***Last updated November 18th***""")
116
  st.markdown("""**Models that are suspected to have training data contaminated with evaluation data have been removed.**""")
117
  st.markdown("""
118
  This page provides a way to explore the results for individual tasks and compare models across tasks. Data for the benchmarks hellaswag, arc_challenge, and truthfulQA have also been included for comparison.
@@ -121,7 +121,7 @@ st.markdown("""
121
  """)
122
 
123
  # Load the data into memory
124
- data_path = "processed_data_2023-11-18.csv"
125
  data_df = load_csv_data(data_path)
126
  # drop the column Unnamed: 0
127
  data_df.rename(columns={'Unnamed: 0': "Model Name"}, inplace=True)
 
112
 
113
  # st.title('Model Evaluation Results including MMLU by task')
114
  st.title('Interactive Portal for Analyzing Open Source Large Language Models')
115
+ st.markdown("""***Last updated November 21th***""")
116
  st.markdown("""**Models that are suspected to have training data contaminated with evaluation data have been removed.**""")
117
  st.markdown("""
118
  This page provides a way to explore the results for individual tasks and compare models across tasks. Data for the benchmarks hellaswag, arc_challenge, and truthfulQA have also been included for comparison.
 
121
  """)
122
 
123
  # Load the data into memory
124
+ data_path = "processed_data_2023-11-21.csv"
125
  data_df = load_csv_data(data_path)
126
  # drop the column Unnamed: 0
127
  data_df.rename(columns={'Unnamed: 0': "Model Name"}, inplace=True)
processed_data_2023-11-21.csv ADDED
The diff for this file is too large to render. See raw diff
 
results CHANGED
@@ -1 +1 @@
1
- Subproject commit 0dc0ada6c97e34ae6f89354fe7fe0e3257849914
 
1
+ Subproject commit ed5d75491130aac5d145db63d458f2eaf410745a