lukecq commited on
Commit
01bb2e0
1 Parent(s): cc86cb5

update the interface

Browse files
src/display/about.py CHANGED
@@ -29,7 +29,17 @@ Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaB
29
 
30
  # Which evaluations are you running? how can people reproduce what you have?
31
  LLM_BENCHMARKS_TEXT = f"""
32
- ## How it works
 
 
 
 
 
 
 
 
 
 
33
 
34
  ## Reproducibility
35
  To reproduce our results, here is the commands you can run:
 
29
 
30
  # Which evaluations are you running? how can people reproduce what you have?
31
  LLM_BENCHMARKS_TEXT = f"""
32
+ # About
33
+ Even though large language models (LLMs) have shown impressive performance on various benchmarks for English, their performance on Southeast Asian (SEA) languages is still underexplored. This leaderboard aims to evaluate LLMs on exam-type benchmarks for SEA languages, focusing on world knowledge and reasoning abilities.
34
+
35
+ ## Datasets
36
+ The leaderboard evaluates models on the following tasks:
37
+ - **M3Exam**:
38
+ - **MMLU**:
39
+
40
+ ## Evalation Criteria
41
+
42
+ ## Reults
43
 
44
  ## Reproducibility
45
  To reproduce our results, here is the commands you can run:
src/leaderboard/load_results.py CHANGED
@@ -25,7 +25,7 @@ def load_data(data_path):
25
  df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
26
 
27
  columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
28
- columns_sorted = ['rank','type', 'Model', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
29
 
30
  # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
31
  df_m3exam = df.iloc[:, :11] # M3Exam columns
@@ -54,15 +54,15 @@ def load_data(data_path):
54
  df_mmlu = df_mmlu.sort_values(by='avg_sea', ascending=False)
55
  df_avg = df_avg.sort_values(by='avg_sea', ascending=False)
56
 
57
- # change the column name from 'avg_sea' to 'avg_sea⬆️'
58
- df_m3exam = df_m3exam.rename(columns={'avg_sea': 'avg_sea⬆️'})
59
- df_mmlu = df_mmlu.rename(columns={'avg_sea': 'avg_sea⬆️'})
60
- df_avg = df_avg.rename(columns={'avg_sea': 'avg_sea⬆️'})
61
 
62
  # map the values in the 'type' column to the following values: {'base': 'Base', 'chat': 'Chat'}
63
- df_m3exam['type'] = df_m3exam['type'].map({'base': '🟢', 'chat': '🔶'})
64
- df_mmlu['type'] = df_mmlu['type'].map({'base': '🟢', 'chat': '🔶'})
65
- df_avg['type'] = df_avg['type'].map({'base': '🟢', 'chat': '🔶'})
66
 
67
  return df_m3exam, df_mmlu, df_avg
68
 
 
25
  df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
26
 
27
  columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
28
+ columns_sorted = ['rank','type', 'Model', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'open?',]
29
 
30
  # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
31
  df_m3exam = df.iloc[:, :11] # M3Exam columns
 
54
  df_mmlu = df_mmlu.sort_values(by='avg_sea', ascending=False)
55
  df_avg = df_avg.sort_values(by='avg_sea', ascending=False)
56
 
57
+ # change the column name from 'avg_sea' to 'avg_sea ⬇️'
58
+ df_m3exam = df_m3exam.rename(columns={'avg_sea': 'avg_sea ⬇️'})
59
+ df_mmlu = df_mmlu.rename(columns={'avg_sea': 'avg_sea ⬇️'})
60
+ df_avg = df_avg.rename(columns={'avg_sea': 'avg_sea ⬇️'})
61
 
62
  # map the values in the 'type' column to the following values: {'base': 'Base', 'chat': 'Chat'}
63
+ df_m3exam['type'] = df_m3exam['type'].map({'base': '🟢base', 'chat': '🔶chat'})
64
+ df_mmlu['type'] = df_mmlu['type'].map({'base': '🟢base', 'chat': '🔶chat'})
65
+ df_avg['type'] = df_avg['type'].map({'base': '🟢base', 'chat': '🔶chat'})
66
 
67
  return df_m3exam, df_mmlu, df_avg
68