Spaces:

SeaLLMs
/

SeaExam_leaderboard

Running

lukecq commited on Apr 23

Commit

01bb2e0

•

1 Parent(s): cc86cb5

update the interface

Files changed (2) hide show

src/display/about.py CHANGED Viewed

@@ -29,7 +29,17 @@ Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaB
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
-## How it works
 ## Reproducibility
 To reproduce our results, here is the commands you can run:

 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
+# About
+Even though large language models (LLMs) have shown impressive performance on various benchmarks for English, their performance on Southeast Asian (SEA) languages is still underexplored. This leaderboard aims to evaluate LLMs on exam-type benchmarks for SEA languages, focusing on world knowledge and reasoning abilities.
+## Datasets
+The leaderboard evaluates models on the following tasks:
+- **M3Exam**:
+- **MMLU**:
+## Evalation Criteria
+## Reults
 ## Reproducibility
 To reproduce our results, here is the commands you can run:

src/leaderboard/load_results.py CHANGED Viewed

@@ -25,7 +25,7 @@ def load_data(data_path):
     df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
     columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
-    columns_sorted = ['rank','type', 'Model', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
     # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
     df_m3exam = df.iloc[:, :11]  # M3Exam columns
@@ -54,15 +54,15 @@ def load_data(data_path):
     df_mmlu = df_mmlu.sort_values(by='avg_sea', ascending=False)
     df_avg = df_avg.sort_values(by='avg_sea', ascending=False)
-    # change the column name from 'avg_sea' to 'avg_sea⬆️'
-    df_m3exam = df_m3exam.rename(columns={'avg_sea': 'avg_sea⬆️'})
-    df_mmlu = df_mmlu.rename(columns={'avg_sea': 'avg_sea⬆️'})
-    df_avg = df_avg.rename(columns={'avg_sea': 'avg_sea⬆️'})
     # map the values in the 'type' column to the following values: {'base': 'Base', 'chat': 'Chat'}
-    df_m3exam['type'] = df_m3exam['type'].map({'base': '🟢', 'chat': '🔶'})
-    df_mmlu['type'] = df_mmlu['type'].map({'base': '🟢', 'chat': '🔶'})
-    df_avg['type'] = df_avg['type'].map({'base': '🟢', 'chat': '🔶'})
     return df_m3exam, df_mmlu, df_avg

     df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
     columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
+    columns_sorted = ['rank','type', 'Model',  'avg_sea', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'open?',]
     # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
     df_m3exam = df.iloc[:, :11]  # M3Exam columns
     df_mmlu = df_mmlu.sort_values(by='avg_sea', ascending=False)
     df_avg = df_avg.sort_values(by='avg_sea', ascending=False)
+    # change the column name from 'avg_sea' to 'avg_sea ⬇️'
+    df_m3exam = df_m3exam.rename(columns={'avg_sea': 'avg_sea ⬇️'})
+    df_mmlu = df_mmlu.rename(columns={'avg_sea': 'avg_sea ⬇️'})
+    df_avg = df_avg.rename(columns={'avg_sea': 'avg_sea ⬇️'})
     # map the values in the 'type' column to the following values: {'base': 'Base', 'chat': 'Chat'}
+    df_m3exam['type'] = df_m3exam['type'].map({'base': '🟢base', 'chat': '🔶chat'})
+    df_mmlu['type'] = df_mmlu['type'].map({'base': '🟢base', 'chat': '🔶chat'})
+    df_avg['type'] = df_avg['type'].map({'base': '🟢base', 'chat': '🔶chat'})
     return df_m3exam, df_mmlu, df_avg