zhuohan-7 commited on
Commit
fb8024c
1 Parent(s): a12f124

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +1 -0
  2. results/cross_lingual/zero_shot/cross_mmlu.csv +1 -0
  3. results/cross_lingual/zero_shot/cross_xquad.csv +1 -0
  4. results/cultural_reasoning/zero_shot/cn_eval.csv +1 -0
  5. results/cultural_reasoning/zero_shot/ph_eval.csv +1 -0
  6. results/cultural_reasoning/zero_shot/sg_eval.csv +1 -0
  7. results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +1 -0
  8. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +1 -0
  9. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +1 -0
  10. results/cultural_reasoning/zero_shot/us_eval.csv +1 -0
  11. results/dialogue/zero_shot/dialogsum.csv +1 -0
  12. results/dialogue/zero_shot/dream.csv +1 -0
  13. results/dialogue/zero_shot/samsum.csv +1 -0
  14. results/emotion/zero_shot/ind_emotion.csv +1 -0
  15. results/emotion/zero_shot/sst2.csv +1 -0
  16. results/flores_translation/zero_shot/ind2eng.csv +1 -0
  17. results/flores_translation/zero_shot/vie2eng.csv +1 -0
  18. results/flores_translation/zero_shot/zho2eng.csv +1 -0
  19. results/flores_translation/zero_shot/zsm2eng.csv +1 -0
  20. results/fundamental_nlp_tasks/zero_shot/c3.csv +1 -0
  21. results/fundamental_nlp_tasks/zero_shot/cola.csv +1 -0
  22. results/fundamental_nlp_tasks/zero_shot/mnli.csv +1 -0
  23. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -0
  24. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +1 -0
  25. results/fundamental_nlp_tasks/zero_shot/qnli.csv +1 -0
  26. results/fundamental_nlp_tasks/zero_shot/qqp.csv +1 -0
  27. results/fundamental_nlp_tasks/zero_shot/rte.csv +1 -0
  28. results/fundamental_nlp_tasks/zero_shot/wnli.csv +1 -0
  29. results/general_reasoning/zero_shot/c_eval.csv +1 -0
  30. results/general_reasoning/zero_shot/cmmlu.csv +1 -0
  31. results/general_reasoning/zero_shot/indommlu.csv +1 -0
  32. results/general_reasoning/zero_shot/mmlu.csv +1 -0
  33. results/general_reasoning/zero_shot/zbench.csv +1 -0
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.60
17
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
18
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
19
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
 
20
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
21
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
22
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
 
17
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
18
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
19
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.45860389610389607,0.46087662337662333,0.4597374508818368,0.5568181818181818,0.48295454545454547,0.4659090909090909,0.4431818181818182,0.4431818181818182,0.4318181818181818,0.38636363636363635
21
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
22
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
23
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74
17
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
18
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
19
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
 
20
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
21
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
22
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
 
17
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
18
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
19
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5447619047619048,0.5127619047619049,0.5282777548560428,0.6533333333333333,0.5333333333333333,0.5066666666666667,0.54,0.5466666666666666,0.5133333333333333,0.52
21
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
22
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
23
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
results/cross_lingual/zero_shot/cross_xquad.csv CHANGED
@@ -15,6 +15,7 @@ SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.953
15
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
16
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
17
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
 
18
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
19
  sg_llama3_8192_8b,0.899579831932773,0.8611344537815127,0.8799374134072335,0.9201680672268907,0.8831932773109243,0.9117647058823529,0.8831932773109243,,,
20
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
 
15
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
16
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
17
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
18
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.9382352941176471,0.9071428571428571,0.9224271402050233,0.9445378151260504,0.9369747899159664,0.9445378151260504,0.926890756302521,,,
19
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
20
  sg_llama3_8192_8b,0.899579831932773,0.8611344537815127,0.8799374134072335,0.9201680672268907,0.8831932773109243,0.9117647058823529,0.8831932773109243,,,
21
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.819047619047619
17
  Qwen2_5_72B_Instruct,0.8761904761904762
18
  gemma-2-9b-it,0.580952380952381
19
  Meta-Llama-3-70B-Instruct,0.5333333333333333
 
20
  Qwen2_5_14B_Instruct,0.8285714285714286
21
  sg_llama3_8192_8b,0.47619047619047616
22
  sg_llama3_70b_inst,0.5523809523809524
 
17
  Qwen2_5_72B_Instruct,0.8761904761904762
18
  gemma-2-9b-it,0.580952380952381
19
  Meta-Llama-3-70B-Instruct,0.5333333333333333
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.49523809523809526
21
  Qwen2_5_14B_Instruct,0.8285714285714286
22
  sg_llama3_8192_8b,0.47619047619047616
23
  sg_llama3_70b_inst,0.5523809523809524
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.47
17
  Qwen2_5_72B_Instruct,0.72
18
  gemma-2-9b-it,0.58
19
  Meta-Llama-3-70B-Instruct,0.63
 
20
  Qwen2_5_14B_Instruct,0.6
21
  sg_llama3_8192_8b,0.57
22
  sg_llama3_70b_inst,0.69
 
17
  Qwen2_5_72B_Instruct,0.72
18
  gemma-2-9b-it,0.58
19
  Meta-Llama-3-70B-Instruct,0.63
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.52
21
  Qwen2_5_14B_Instruct,0.6
22
  sg_llama3_8192_8b,0.57
23
  sg_llama3_70b_inst,0.69
results/cultural_reasoning/zero_shot/sg_eval.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.7184466019417476
17
  Qwen2_5_72B_Instruct,0.7669902912621359
18
  gemma-2-9b-it,0.6699029126213593
19
  Meta-Llama-3-70B-Instruct,0.7087378640776699
 
20
  Qwen2_5_14B_Instruct,0.7669902912621359
21
  sg_llama3_8192_8b,0.5533980582524272
22
  sg_llama3_70b_inst,0.6699029126213593
 
17
  Qwen2_5_72B_Instruct,0.7669902912621359
18
  gemma-2-9b-it,0.6699029126213593
19
  Meta-Llama-3-70B-Instruct,0.7087378640776699
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6116504854368932
21
  Qwen2_5_14B_Instruct,0.7669902912621359
22
  sg_llama3_8192_8b,0.5533980582524272
23
  sg_llama3_70b_inst,0.6699029126213593
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.5882352941176471
17
  Qwen2_5_72B_Instruct,0.7205882352941176
18
  gemma-2-9b-it,0.6029411764705882
19
  Meta-Llama-3-70B-Instruct,0.6617647058823529
 
20
  Qwen2_5_14B_Instruct,0.6911764705882353
21
  sg_llama3_8192_8b,0.47058823529411764
22
  sg_llama3_70b_inst,0.6176470588235294
 
17
  Qwen2_5_72B_Instruct,0.7205882352941176
18
  gemma-2-9b-it,0.6029411764705882
19
  Meta-Llama-3-70B-Instruct,0.6617647058823529
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5735294117647058
21
  Qwen2_5_14B_Instruct,0.6911764705882353
22
  sg_llama3_8192_8b,0.47058823529411764
23
  sg_llama3_70b_inst,0.6176470588235294
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.7836363636363637
17
  Qwen2_5_72B_Instruct,0.8618181818181818
18
  gemma-2-9b-it,0.8036363636363636
19
  Meta-Llama-3-70B-Instruct,0.8381818181818181
 
20
  Qwen2_5_14B_Instruct,0.8345454545454546
21
  sg_llama3_8192_8b,0.76
22
  sg_llama3_70b_inst,0.8436363636363636
 
17
  Qwen2_5_72B_Instruct,0.8618181818181818
18
  gemma-2-9b-it,0.8036363636363636
19
  Meta-Llama-3-70B-Instruct,0.8381818181818181
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7927272727272727
21
  Qwen2_5_14B_Instruct,0.8345454545454546
22
  sg_llama3_8192_8b,0.76
23
  sg_llama3_70b_inst,0.8436363636363636
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,55.0
17
  Qwen2_5_72B_Instruct,53.32
18
  gemma-2-9b-it,53.96
19
  Meta-Llama-3-70B-Instruct,50.599999999999994
 
20
  Qwen2_5_14B_Instruct,53.2
21
  sg_llama3_8192_8b,53.4
22
  sg_llama3_70b_inst,51.959999999999994
 
17
  Qwen2_5_72B_Instruct,53.32
18
  gemma-2-9b-it,53.96
19
  Meta-Llama-3-70B-Instruct,50.599999999999994
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,49.32000000000001
21
  Qwen2_5_14B_Instruct,53.2
22
  sg_llama3_8192_8b,53.4
23
  sg_llama3_70b_inst,51.959999999999994
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.6915887850467289
17
  Qwen2_5_72B_Instruct,0.8598130841121495
18
  gemma-2-9b-it,0.8130841121495327
19
  Meta-Llama-3-70B-Instruct,0.8691588785046729
 
20
  Qwen2_5_14B_Instruct,0.822429906542056
21
  sg_llama3_8192_8b,0.7009345794392523
22
  sg_llama3_70b_inst,0.8598130841121495
 
17
  Qwen2_5_72B_Instruct,0.8598130841121495
18
  gemma-2-9b-it,0.8130841121495327
19
  Meta-Llama-3-70B-Instruct,0.8691588785046729
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6635514018691588
21
  Qwen2_5_14B_Instruct,0.822429906542056
22
  sg_llama3_8192_8b,0.7009345794392523
23
  sg_llama3_70b_inst,0.8598130841121495
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0
17
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
18
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
19
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
 
20
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
21
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
22
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
 
17
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
18
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
19
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23822266043844623,0.33277843672963314,0.1255742736802169,0.2563152709054887
21
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
22
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
23
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
results/dialogue/zero_shot/dream.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.9265066144047036
17
  Qwen2_5_72B_Instruct,0.9627633512983832
18
  gemma-2-9b-it,0.9416952474277315
19
  Meta-Llama-3-70B-Instruct,0.9480646741793238
 
20
  Qwen2_5_14B_Instruct,0.9461048505634493
21
  sg_llama3_8192_8b,0.9103380695737384
22
  sg_llama3_70b_inst,0.9524742773150416
 
17
  Qwen2_5_72B_Instruct,0.9627633512983832
18
  gemma-2-9b-it,0.9416952474277315
19
  Meta-Llama-3-70B-Instruct,0.9480646741793238
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.8731014208721215
21
  Qwen2_5_14B_Instruct,0.9461048505634493
22
  sg_llama3_8192_8b,0.9103380695737384
23
  sg_llama3_70b_inst,0.9524742773150416
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.3
17
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
18
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
19
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
 
20
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
21
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
22
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
 
17
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
18
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
19
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2813508829464006,0.38872030551635356,0.1571931255091481,0.29813921781370006
21
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
22
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
23
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.6454545454545455
17
  Qwen2_5_72B_Instruct,0.7068181818181818
18
  gemma-2-9b-it,0.7477272727272727
19
  Meta-Llama-3-70B-Instruct,0.6909090909090909
 
20
  Qwen2_5_14B_Instruct,0.6954545454545454
21
  sg_llama3_8192_8b,0.6045454545454545
22
  sg_llama3_70b_inst,0.7
 
17
  Qwen2_5_72B_Instruct,0.7068181818181818
18
  gemma-2-9b-it,0.7477272727272727
19
  Meta-Llama-3-70B-Instruct,0.6909090909090909
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6659090909090909
21
  Qwen2_5_14B_Instruct,0.6954545454545454
22
  sg_llama3_8192_8b,0.6045454545454545
23
  sg_llama3_70b_inst,0.7
results/emotion/zero_shot/sst2.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.9403669724770642
17
  Qwen2_5_72B_Instruct,0.9334862385321101
18
  gemma-2-9b-it,0.9311926605504587
19
  Meta-Llama-3-70B-Instruct,0.9495412844036697
 
20
  Qwen2_5_14B_Instruct,0.9311926605504587
21
  sg_llama3_8192_8b,0.9208715596330275
22
  sg_llama3_70b_inst,0.9334862385321101
 
17
  Qwen2_5_72B_Instruct,0.9334862385321101
18
  gemma-2-9b-it,0.9311926605504587
19
  Meta-Llama-3-70B-Instruct,0.9495412844036697
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.911697247706422
21
  Qwen2_5_14B_Instruct,0.9311926605504587
22
  sg_llama3_8192_8b,0.9208715596330275
23
  sg_llama3_70b_inst,0.9334862385321101
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.3594829412574955
17
  Qwen2_5_72B_Instruct,0.4215612766585066
18
  gemma-2-9b-it,0.40786563079141763
19
  Meta-Llama-3-70B-Instruct,0.3830092775167675
 
20
  Qwen2_5_14B_Instruct,0.3901044620348051
21
  sg_llama3_8192_8b,0.3758986882788705
22
  sg_llama3_70b_inst,0.4086440304524362
 
17
  Qwen2_5_72B_Instruct,0.4215612766585066
18
  gemma-2-9b-it,0.40786563079141763
19
  Meta-Llama-3-70B-Instruct,0.3830092775167675
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.3778716126842916
21
  Qwen2_5_14B_Instruct,0.3901044620348051
22
  sg_llama3_8192_8b,0.3758986882788705
23
  sg_llama3_70b_inst,0.4086440304524362
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.30981028289420137
17
  Qwen2_5_72B_Instruct,0.35733464866179004
18
  gemma-2-9b-it,0.3367700653885
19
  Meta-Llama-3-70B-Instruct,0.3230140263371192
 
20
  Qwen2_5_14B_Instruct,0.32198218156960645
21
  sg_llama3_8192_8b,0.3087032778607667
22
  sg_llama3_70b_inst,0.34258533717783785
 
17
  Qwen2_5_72B_Instruct,0.35733464866179004
18
  gemma-2-9b-it,0.3367700653885
19
  Meta-Llama-3-70B-Instruct,0.3230140263371192
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2881995374548925
21
  Qwen2_5_14B_Instruct,0.32198218156960645
22
  sg_llama3_8192_8b,0.3087032778607667
23
  sg_llama3_70b_inst,0.34258533717783785
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.2516593644617717
17
  Qwen2_5_72B_Instruct,0.2843491241986514
18
  gemma-2-9b-it,0.267527968123433
19
  Meta-Llama-3-70B-Instruct,0.24397819518058994
 
20
  Qwen2_5_14B_Instruct,0.2627781200417998
21
  sg_llama3_8192_8b,0.23778397807613597
22
  sg_llama3_70b_inst,0.26000707510414633
 
17
  Qwen2_5_72B_Instruct,0.2843491241986514
18
  gemma-2-9b-it,0.267527968123433
19
  Meta-Llama-3-70B-Instruct,0.24397819518058994
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23373987803056884
21
  Qwen2_5_14B_Instruct,0.2627781200417998
22
  sg_llama3_8192_8b,0.23778397807613597
23
  sg_llama3_70b_inst,0.26000707510414633
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.3484133510670942
17
  Qwen2_5_72B_Instruct,0.4237666988692159
18
  gemma-2-9b-it,0.4234100394581857
19
  Meta-Llama-3-70B-Instruct,0.3957287030176054
 
20
  Qwen2_5_14B_Instruct,0.3841042767934729
21
  sg_llama3_8192_8b,0.376818225699898
22
  sg_llama3_70b_inst,0.4163761508073963
 
17
  Qwen2_5_72B_Instruct,0.4237666988692159
18
  gemma-2-9b-it,0.4234100394581857
19
  Meta-Llama-3-70B-Instruct,0.3957287030176054
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.37943052200875504
21
  Qwen2_5_14B_Instruct,0.3841042767934729
22
  sg_llama3_8192_8b,0.376818225699898
23
  sg_llama3_70b_inst,0.4163761508073963
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.9143605086013463
17
  Qwen2_5_72B_Instruct,0.9596110695587136
18
  gemma-2-9b-it,0.9222139117427075
19
  Meta-Llama-3-70B-Instruct,0.9521316379955124
 
20
  Qwen2_5_14B_Instruct,0.9502617801047121
21
  sg_llama3_8192_8b,0.8051608077786089
22
  Meta-Llama-3.1-70B,0.7786088257292446
 
17
  Qwen2_5_72B_Instruct,0.9596110695587136
18
  gemma-2-9b-it,0.9222139117427075
19
  Meta-Llama-3-70B-Instruct,0.9521316379955124
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.8358264771877337
21
  Qwen2_5_14B_Instruct,0.9502617801047121
22
  sg_llama3_8192_8b,0.8051608077786089
23
  Meta-Llama-3.1-70B,0.7786088257292446
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.785234899328859
17
  Qwen2_5_72B_Instruct,0.8571428571428571
18
  gemma-2-9b-it,0.7938638542665388
19
  Meta-Llama-3-70B-Instruct,0.835091083413231
 
20
  Qwen2_5_14B_Instruct,0.8063279002876318
21
  sg_llama3_8192_8b,0.8130393096836049
22
  sg_llama3_70b_inst,0.8696069031639502
 
17
  Qwen2_5_72B_Instruct,0.8571428571428571
18
  gemma-2-9b-it,0.7938638542665388
19
  Meta-Llama-3-70B-Instruct,0.835091083413231
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7468839884947267
21
  Qwen2_5_14B_Instruct,0.8063279002876318
22
  sg_llama3_8192_8b,0.8130393096836049
23
  sg_llama3_70b_inst,0.8696069031639502
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.653
17
  Qwen2_5_72B_Instruct,0.8445
18
  gemma-2-9b-it,0.716
19
  Meta-Llama-3-70B-Instruct,0.6709421285692472
 
20
  Qwen2_5_14B_Instruct,0.818
21
  sg_llama3_8192_8b,0.6605
22
  sg_llama3_70b_inst,0.7685
 
17
  Qwen2_5_72B_Instruct,0.8445
18
  gemma-2-9b-it,0.716
19
  Meta-Llama-3-70B-Instruct,0.6709421285692472
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.571
21
  Qwen2_5_14B_Instruct,0.818
22
  sg_llama3_8192_8b,0.6605
23
  sg_llama3_70b_inst,0.7685
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.7475490196078431
16
  Qwen2_5_72B_Instruct,0.8014705882352942
17
  gemma-2-9b-it,0.7401960784313726
18
  Meta-Llama-3-70B-Instruct,0.7598039215686274
 
19
  Qwen2_5_14B_Instruct,0.7794117647058824
20
  sg_llama3_8192_8b,0.7254901960784313
21
  sg_llama3_70b_inst,0.7892156862745098
 
16
  Qwen2_5_72B_Instruct,0.8014705882352942
17
  gemma-2-9b-it,0.7401960784313726
18
  Meta-Llama-3-70B-Instruct,0.7598039215686274
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7377450980392157
20
  Qwen2_5_14B_Instruct,0.7794117647058824
21
  sg_llama3_8192_8b,0.7254901960784313
22
  sg_llama3_70b_inst,0.7892156862745098
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.5698305084745763
16
  Qwen2_5_72B_Instruct,0.7684745762711864
17
  gemma-2-9b-it,0.6189830508474576
18
  Meta-Llama-3-70B-Instruct,0.5928813559322034
 
19
  Qwen2_5_14B_Instruct,0.7538983050847458
20
  sg_llama3_8192_8b,0.5084745762711864
21
  sg_llama3_70b_inst,0.6420338983050847
 
16
  Qwen2_5_72B_Instruct,0.7684745762711864
17
  gemma-2-9b-it,0.6189830508474576
18
  Meta-Llama-3-70B-Instruct,0.5928813559322034
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5138983050847458
20
  Qwen2_5_14B_Instruct,0.7538983050847458
21
  sg_llama3_8192_8b,0.5084745762711864
22
  sg_llama3_70b_inst,0.6420338983050847
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.7159070107999268
16
  Qwen2_5_72B_Instruct,0.9082921471718836
17
  gemma-2-9b-it,0.9070107999267801
18
  Meta-Llama-3-70B-Instruct,0.876807614863628
 
19
  Qwen2_5_14B_Instruct,0.9079260479589969
20
  sg_llama3_8192_8b,0.8118250045762402
21
  sg_llama3_70b_inst,0.9004210140948197
 
16
  Qwen2_5_72B_Instruct,0.9082921471718836
17
  gemma-2-9b-it,0.9070107999267801
18
  Meta-Llama-3-70B-Instruct,0.876807614863628
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6829580816401245
20
  Qwen2_5_14B_Instruct,0.9079260479589969
21
  sg_llama3_8192_8b,0.8118250045762402
22
  sg_llama3_70b_inst,0.9004210140948197
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.7625
16
  Qwen2_5_72B_Instruct,0.8315
17
  gemma-2-9b-it,0.7775
18
  Meta-Llama-3-70B-Instruct,0.7876082117239673
 
19
  Qwen2_5_14B_Instruct,0.8255
20
  sg_llama3_8192_8b,0.8095
21
  sg_llama3_70b_inst,0.804
 
16
  Qwen2_5_72B_Instruct,0.8315
17
  gemma-2-9b-it,0.7775
18
  Meta-Llama-3-70B-Instruct,0.7876082117239673
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.767
20
  Qwen2_5_14B_Instruct,0.8255
21
  sg_llama3_8192_8b,0.8095
22
  sg_llama3_70b_inst,0.804
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.7870036101083032
16
  Qwen2_5_72B_Instruct,0.9025270758122743
17
  gemma-2-9b-it,0.7472924187725631
18
  Meta-Llama-3-70B-Instruct,0.8086642599277978
 
19
  Qwen2_5_14B_Instruct,0.8664259927797834
20
  sg_llama3_8192_8b,0.7364620938628159
21
  sg_llama3_70b_inst,0.8916967509025271
 
16
  Qwen2_5_72B_Instruct,0.9025270758122743
17
  gemma-2-9b-it,0.7472924187725631
18
  Meta-Llama-3-70B-Instruct,0.8086642599277978
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7292418772563177
20
  Qwen2_5_14B_Instruct,0.8664259927797834
21
  sg_llama3_8192_8b,0.7364620938628159
22
  sg_llama3_70b_inst,0.8916967509025271
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.5915492957746479
16
  Qwen2_5_72B_Instruct,0.8169014084507042
17
  gemma-2-9b-it,0.7746478873239436
18
  Meta-Llama-3-70B-Instruct,0.7887323943661971
 
19
  Qwen2_5_14B_Instruct,0.8309859154929577
20
  sg_llama3_8192_8b,0.704225352112676
21
  sg_llama3_70b_inst,0.8309859154929577
 
16
  Qwen2_5_72B_Instruct,0.8169014084507042
17
  gemma-2-9b-it,0.7746478873239436
18
  Meta-Llama-3-70B-Instruct,0.7887323943661971
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5915492957746479
20
  Qwen2_5_14B_Instruct,0.8309859154929577
21
  sg_llama3_8192_8b,0.704225352112676
22
  sg_llama3_70b_inst,0.8309859154929577
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.7658779576587795
17
  Qwen2_5_72B_Instruct,0.8325031133250311
18
  gemma-2-9b-it,0.5523038605230386
19
  Meta-Llama-3-70B-Instruct,0.6220423412204235
 
20
  Qwen2_5_14B_Instruct,0.7839352428393525
21
  sg_llama3_8192_8b,0.4825653798256538
22
  sg_llama3_70b_inst,0.5722291407222914
 
17
  Qwen2_5_72B_Instruct,0.8325031133250311
18
  gemma-2-9b-it,0.5523038605230386
19
  Meta-Llama-3-70B-Instruct,0.6220423412204235
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4800747198007472
21
  Qwen2_5_14B_Instruct,0.7839352428393525
22
  sg_llama3_8192_8b,0.4825653798256538
23
  sg_llama3_70b_inst,0.5722291407222914
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.7684337765498187
16
  Qwen2_5_72B_Instruct,0.8343982041098256
17
  gemma-2-9b-it,0.5700224486271801
18
  Meta-Llama-3-70B-Instruct,0.6494560524952513
 
19
  Qwen2_5_14B_Instruct,0.7807805214988776
20
  sg_llama3_8192_8b,0.49050250388533934
21
  sg_llama3_70b_inst,0.6044724572612675
 
16
  Qwen2_5_72B_Instruct,0.8343982041098256
17
  gemma-2-9b-it,0.5700224486271801
18
  Meta-Llama-3-70B-Instruct,0.6494560524952513
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4750474874805733
20
  Qwen2_5_14B_Instruct,0.7807805214988776
21
  sg_llama3_8192_8b,0.49050250388533934
22
  sg_llama3_70b_inst,0.6044724572612675
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.5267374324053675
17
  Qwen2_5_72B_Instruct,0.6380933306629281
18
  gemma-2-9b-it,0.606983109686895
19
  Meta-Llama-3-70B-Instruct,0.6323519594098405
 
20
  Qwen2_5_14B_Instruct,0.6009746979104079
21
  sg_llama3_8192_8b,0.5021697042526203
22
  sg_llama3_70b_inst,0.6394285332799252
 
17
  Qwen2_5_72B_Instruct,0.6380933306629281
18
  gemma-2-9b-it,0.606983109686895
19
  Meta-Llama-3-70B-Instruct,0.6323519594098405
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5312103611723079
21
  Qwen2_5_14B_Instruct,0.6009746979104079
22
  sg_llama3_8192_8b,0.5021697042526203
23
  sg_llama3_70b_inst,0.6394285332799252
results/general_reasoning/zero_shot/mmlu.csv CHANGED
@@ -16,6 +16,7 @@ SeaLLMs-v3-7B-Chat,0.6670003575259207
16
  Qwen2_5_72B_Instruct,0.8129424383267787
17
  gemma-2-9b-it,0.7100464783696818
18
  Meta-Llama-3-70B-Instruct,0.7649624597783339
 
19
  Qwen2_5_14B_Instruct,0.7542366821594566
20
  sg_llama3_8192_8b,0.6235967107615302
21
  sg_llama3_70b_inst,0.7407937075437969
 
16
  Qwen2_5_72B_Instruct,0.8129424383267787
17
  gemma-2-9b-it,0.7100464783696818
18
  Meta-Llama-3-70B-Instruct,0.7649624597783339
19
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5775473721844834
20
  Qwen2_5_14B_Instruct,0.7542366821594566
21
  sg_llama3_8192_8b,0.6235967107615302
22
  sg_llama3_70b_inst,0.7407937075437969
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -17,6 +17,7 @@ SeaLLMs-v3-7B-Chat,0.5454545454545454
17
  Qwen2_5_72B_Instruct,0.696969696969697
18
  gemma-2-9b-it,0.48484848484848486
19
  Meta-Llama-3-70B-Instruct,0.5151515151515151
 
20
  Qwen2_5_14B_Instruct,0.6666666666666666
21
  sg_llama3_8192_8b,0.30303030303030304
22
  sg_llama3_70b_inst,0.42424242424242425
 
17
  Qwen2_5_72B_Instruct,0.696969696969697
18
  gemma-2-9b-it,0.48484848484848486
19
  Meta-Llama-3-70B-Instruct,0.5151515151515151
20
+ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.36363636363636365
21
  Qwen2_5_14B_Instruct,0.6666666666666666
22
  sg_llama3_8192_8b,0.30303030303030304
23
  sg_llama3_70b_inst,0.42424242424242425