Jr23xd23 commited on
Commit
1ae2939
·
verified ·
1 Parent(s): 47b66db

Upload benchmark_data.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark_data.json +73 -0
benchmark_data.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "methodology": "lm-evaluation-harness v0.4.11, apply_chat_template=True, limit=200, acc_norm preferred",
3
+ "tasks": [
4
+ "copa_ar",
5
+ "arabic_mt_hellaswag",
6
+ "arabic_leaderboard_arabic_mmlu"
7
+ ],
8
+ "models": [
9
+ {
10
+ "name": "RightNow-Arabic-0.5B-Turbo",
11
+ "params_B": 0.518,
12
+ "copa_ar": 58.4,
13
+ "arabic_mt_hellaswag": 26.0,
14
+ "arabic_leaderboard_arabic_mmlu": 23.2,
15
+ "mean": 35.87,
16
+ "category": "ours"
17
+ },
18
+ {
19
+ "name": "Qwen2.5-0.5B-Instruct",
20
+ "params_B": 0.494,
21
+ "copa_ar": 53.9,
22
+ "arabic_mt_hellaswag": 22.5,
23
+ "arabic_leaderboard_arabic_mmlu": 26.0,
24
+ "mean": 34.13,
25
+ "category": "small"
26
+ },
27
+ {
28
+ "name": "Falcon-H1-0.5B-Instruct",
29
+ "params_B": 0.524,
30
+ "copa_ar": 44.9,
31
+ "arabic_mt_hellaswag": 23.0,
32
+ "arabic_leaderboard_arabic_mmlu": 24.2,
33
+ "mean": 30.7,
34
+ "category": "small"
35
+ },
36
+ {
37
+ "name": "Falcon-H1-1.5B-Instruct",
38
+ "params_B": 1.5,
39
+ "copa_ar": 58.4,
40
+ "arabic_mt_hellaswag": 27.5,
41
+ "arabic_leaderboard_arabic_mmlu": 32.7,
42
+ "mean": 39.53,
43
+ "category": "medium"
44
+ },
45
+ {
46
+ "name": "AceGPT-7B-chat",
47
+ "params_B": 7.0,
48
+ "copa_ar": 69.7,
49
+ "arabic_mt_hellaswag": 27.0,
50
+ "arabic_leaderboard_arabic_mmlu": 35.0,
51
+ "mean": 43.9,
52
+ "category": "large"
53
+ },
54
+ {
55
+ "name": "ALLaM-7B-Instruct",
56
+ "params_B": 7.0,
57
+ "copa_ar": 68.5,
58
+ "arabic_mt_hellaswag": 29.0,
59
+ "arabic_leaderboard_arabic_mmlu": 52.2,
60
+ "mean": 49.9,
61
+ "category": "large"
62
+ },
63
+ {
64
+ "name": "SILMA-9B-Instruct",
65
+ "params_B": 9.0,
66
+ "copa_ar": 69.7,
67
+ "arabic_mt_hellaswag": 38.0,
68
+ "arabic_leaderboard_arabic_mmlu": 52.9,
69
+ "mean": 53.53,
70
+ "category": "xlarge"
71
+ }
72
+ ]
73
+ }