lukecq commited on
Commit
e608ddc
1 Parent(s): 48d8b0b

udpate scritps

Browse files
Files changed (2) hide show
  1. app.py +12 -6
  2. results/SeaExam_results_0419.csv +0 -46
app.py CHANGED
@@ -1,12 +1,18 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import os
 
4
 
5
- # # clone / pull the lmeh eval data
6
- # TOKEN = os.environ.get("TOKEN", None)
7
- # RESULTS_REPO = f"lukecq/SeaExam-results"
8
- # CACHE_PATH=os.getenv("HF_HOME", ".")
9
- # EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
 
 
 
 
10
 
11
  # Load the CSV file
12
  def load_csv(file_path):
@@ -14,7 +20,7 @@ def load_csv(file_path):
14
  return data
15
 
16
  # Example path to your CSV file
17
- csv_path = './results/SeaExam_results_0419.csv'
18
  data = load_csv(csv_path)
19
 
20
  def show_data():
 
1
  import gradio as gr
2
  import pandas as pd
3
  import os
4
+ from huggingface_hub import snapshot_download
5
 
6
+ # clone / pull the lmeh eval data
7
+ TOKEN = os.environ.get("TOKEN", None)
8
+ RESULTS_REPO = f"lukecq/SeaExam-results"
9
+ CACHE_PATH=os.getenv("HF_HOME", ".")
10
+ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
11
+ print(EVAL_RESULTS_PATH)
12
+ snapshot_download(
13
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
14
+ token=TOKEN
15
+ )
16
 
17
  # Load the CSV file
18
  def load_csv(file_path):
 
20
  return data
21
 
22
  # Example path to your CSV file
23
+ csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_0419.csv'
24
  data = load_csv(csv_path)
25
 
26
  def show_data():
results/SeaExam_results_0419.csv DELETED
@@ -1,46 +0,0 @@
1
- ,,,,M3Exam,,,,,,
2
- Model,type,open?,shot,en,zh,id,th,vi,avg,avg_sea
3
- gpt4-1106,chat,N,0,0.877,0.789,0.649,0.69,0.708,0.742,0.682
4
- Meta-Llama-3-70B,base,Y,3,0.844,0.756,0.619,0.662,0.683,0.713,0.654
5
- Meta-Llama-3-70B-Instruct,chat,Y,3,0.863,0.694,0.63,0.643,0.684,0.703,0.652
6
- Qwen1.5-72B,base,Y,3,0.839,0.925,0.587,0.568,0.648,0.713,0.601
7
- claude-3-sonnet-20240229,chat,N,0,0.789,0.683,0.585,0.571,0.626,0.651,0.594
8
- claude-3-haiku-20240307,chat,N,0,0.79,0.652,0.563,0.573,0.631,0.642,0.589
9
- dbrx-base,base,Y,3,0.808,0.689,0.534,0.507,0.605,0.629,0.548
10
- Mixtral-8x22B-v0.1,base,Y,3,0.839,0.696,0.57,0.487,0.601,0.639,0.553
11
- SeaLLM-7B-v2.5,chat,Y,3,0.759,0.602,0.501,0.507,0.618,0.597,0.542
12
- Qwen1.5-14B,base,Y,3,0.797,0.862,0.527,0.478,0.549,0.643,0.518
13
- gemini-1.0-pro,chat,N,0,0.569,0.725,0.44,0.492,0.605,0.566,0.513
14
- gemma-7b,base,Y,3,0.731,0.528,0.465,0.463,0.597,0.557,0.508
15
- gpt-3.5-turbo-0125,chat,N,3,0.751,0.589,0.5,0.389,0.534,0.552,0.474
16
- Mixtral-8x7B-v0.1,base,Y,3,0.771,0.606,0.48,0.435,0.522,0.563,0.479
17
- Llama-2-70b-hf,base,Y,3,0.749,0.599,0.492,0.345,0.559,0.549,0.465
18
- Meta-Llama-3-8B,base,Y,3,0.7,0.54,0.427,0.454,0.509,0.526,0.463
19
- Sailor-7B-Chat,chat,Y,3,0.656,0.651,0.474,0.464,0.512,0.551,0.483
20
- gpt-3.5-turbo-0125,chat,N,0,0.756,0.606,0.493,0.397,0.529,0.556,0.473
21
- Yi-34B,base,Y,3,0.815,0.86,0.541,0.381,0.502,0.62,0.475
22
- Meta-Llama-3-8B-Instruct,chat,Y,3,0.725,0.537,0.466,0.371,0.509,0.522,0.449
23
- SeaLLM-7B-v2,chat,Y,3,0.702,0.516,0.432,0.406,0.515,0.514,0.451
24
- Sailor-7B,base,Y,3,0.611,0.632,0.443,0.41,0.499,0.519,0.451
25
- Qwen1.5-7B-Chat,chat,Y,3,0.646,0.627,0.43,0.398,0.492,0.519,0.44
26
- Yi-9B,base,Y,3,0.775,0.792,0.492,0.357,0.453,0.574,0.434
27
- Qwen1.5-7B,base,Y,3,0.721,0.811,0.441,0.361,0.45,0.557,0.417
28
- Mistral-7B-v0.1,base,Y,3,0.677,0.497,0.422,0.346,0.409,0.47,0.392
29
- gemma-7b-it,chat,Y,3,0.622,0.427,0.373,0.321,0.467,0.442,0.387
30
- Mistral-7B-Instruct-v0.2,chat,Y,3,0.657,0.495,0.404,0.304,0.399,0.452,0.369
31
- Qwen1.5-4B,base,Y,3,0.664,0.772,0.351,0.319,0.389,0.499,0.353
32
- Yi-6B,base,Y,3,0.704,0.809,0.411,0.298,0.37,0.519,0.36
33
- Llama-2-13b-hf,base,Y,3,0.605,0.365,0.384,0.288,0.409,0.41,0.36
34
- Llama-2-13b-chat-hf,chat,Y,3,0.589,0.382,0.372,0.288,0.39,0.404,0.35
35
- Qwen1.5-MoE-A2.7B,base,Y,3,0.628,0.789,0.366,0.254,0.402,0.488,0.341
36
- gemma-2b-it,chat,Y,3,0.439,0.377,0.316,0.284,0.357,0.355,0.319
37
- Llama-2-7b-chat-hf,chat,Y,3,0.566,0.326,0.341,0.268,0.34,0.368,0.317
38
- bloomz-7b1,chat,Y,3,0.431,0.377,0.361,0.256,0.356,0.356,0.325
39
- gemma-2b,base,Y,3,0.417,0.275,0.304,0.286,0.316,0.32,0.302
40
- Llama-2-7b-hf,base,Y,3,0.491,0.323,0.308,0.263,0.317,0.341,0.296
41
- Qwen1.5-1.8B,base,Y,3,0.546,0.713,0.326,0.244,0.324,0.43,0.298
42
- Qwen1.5-0.5B,base,Y,3,0.446,0.61,0.294,0.26,0.297,0.381,0.284
43
- sea-lion-7b-instruct,chat,Y,3,0.27,0.273,0.287,0.264,0.269,0.273,0.273
44
- sea-lion-7b,base,Y,3,0.245,0.228,0.254,0.264,0.241,0.247,0.253
45
- phi-2,base,Y,3,0.582,0.286,0.295,0.21,0.269,0.328,0.258
46
- bloom-7b1,base,Y,3,0.227,0.183,0.253,0.24,0.243,0.229,0.246