zhuohan-7 commited on
Commit
35d21e5
·
verified ·
1 Parent(s): f8a58e0

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +4 -0
  2. results/cross_lingual/zero_shot/cross_mmlu.csv +4 -0
  3. results/cross_lingual/zero_shot/cross_xquad.csv +4 -0
  4. results/cultural_reasoning/zero_shot/cn_eval.csv +4 -0
  5. results/cultural_reasoning/zero_shot/ph_eval.csv +4 -0
  6. results/cultural_reasoning/zero_shot/sg_eval.csv +4 -0
  7. results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +4 -0
  8. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +5 -0
  9. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +5 -0
  10. results/cultural_reasoning/zero_shot/us_eval.csv +4 -0
  11. results/dialogue/zero_shot/dialogsum.csv +4 -0
  12. results/dialogue/zero_shot/dream.csv +4 -0
  13. results/dialogue/zero_shot/samsum.csv +4 -0
  14. results/emotion/zero_shot/ind_emotion.csv +4 -0
  15. results/emotion/zero_shot/sst2.csv +4 -0
  16. results/flores_translation/zero_shot/ind2eng.csv +5 -0
  17. results/flores_translation/zero_shot/vie2eng.csv +5 -0
  18. results/flores_translation/zero_shot/zho2eng.csv +5 -0
  19. results/flores_translation/zero_shot/zsm2eng.csv +5 -0
  20. results/fundamental_nlp_tasks/zero_shot/c3.csv +4 -0
  21. results/fundamental_nlp_tasks/zero_shot/cola.csv +4 -0
  22. results/fundamental_nlp_tasks/zero_shot/mnli.csv +4 -0
  23. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +4 -0
  24. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +4 -0
  25. results/fundamental_nlp_tasks/zero_shot/qnli.csv +4 -0
  26. results/fundamental_nlp_tasks/zero_shot/qqp.csv +4 -0
  27. results/fundamental_nlp_tasks/zero_shot/rte.csv +4 -0
  28. results/fundamental_nlp_tasks/zero_shot/wnli.csv +4 -0
  29. results/general_reasoning/zero_shot/c_eval.csv +4 -0
  30. results/general_reasoning/zero_shot/cmmlu.csv +4 -0
  31. results/general_reasoning/zero_shot/indommlu.csv +4 -0
  32. results/general_reasoning/zero_shot/mmlu.csv +4 -0
  33. results/general_reasoning/zero_shot/zbench.csv +4 -0
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4496753246753246,0.49188311688311687,0.46
9
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
10
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
11
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6542207792207791,0.5862012987012987,0.618346089144296,0.7329545454545454,0.7045454545454546,0.6477272727272727,0.6420454545454546,0.625,0.6647727272727273,0.5625
13
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
14
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.62461756988007
17
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
18
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
19
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
 
20
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
21
  llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
 
22
  Qwen2_5_0_5B_Instruct,0.3538961038961039,0.1978896103896103,0.25383898238962527,0.45454545454545453,0.39204545454545453,0.3465909090909091,0.375,0.3409090909090909,0.30113636363636365,0.26704545454545453
23
  GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6071428571428571,0.5717532467532467,0.5889168666140888,0.6988636363636364,0.6363636363636364,0.625,0.5681818181818182,0.5965909090909091,0.5965909090909091,0.5284090909090909
25
  cross_openhermes_llama3_8b_4096_2_inst,0.48214285714285715,0.4417207792207793,0.4610475185885475,0.5454545454545454,0.4943181818181818,0.4943181818181818,0.4602272727272727,0.48295454545454547,0.5227272727272727,0.375
 
9
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
10
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
11
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
12
+ cross_openhermes_llama3_8b_16384_inst,0.4878246753246754,0.48051948051948057,0.4841445226527821,0.5738636363636364,0.5056818181818182,0.4943181818181818,0.4602272727272727,0.4772727272727273,0.5227272727272727,0.3806818181818182
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6542207792207791,0.5862012987012987,0.618346089144296,0.7329545454545454,0.7045454545454546,0.6477272727272727,0.6420454545454546,0.625,0.6647727272727273,0.5625
14
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
15
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
 
18
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
19
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
20
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
21
+ cross_openhermes_llama3_8b_2048_inst,0.4659090909090909,0.4407467532467531,0.452978757986988,0.5284090909090909,0.5284090909090909,0.4375,0.4659090909090909,0.4375,0.48863636363636365,0.375
22
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
23
  llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
24
+ cross_openhermes_llama3_8b_12288_inst,0.48701298701298695,0.46964285714285725,0.4781702261263516,0.5511363636363636,0.5227272727272727,0.4715909090909091,0.4772727272727273,0.4943181818181818,0.5056818181818182,0.38636363636363635
25
  Qwen2_5_0_5B_Instruct,0.3538961038961039,0.1978896103896103,0.25383898238962527,0.45454545454545453,0.39204545454545453,0.3465909090909091,0.375,0.3409090909090909,0.30113636363636365,0.26704545454545453
26
  GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
27
+ cross_openhermes_llama3_8b_8192_inst,0.476461038961039,0.44724025974025977,0.46138845771372755,0.5454545454545454,0.4772727272727273,0.4772727272727273,0.4715909090909091,0.48863636363636365,0.48295454545454547,0.39204545454545453
28
  cross_openhermes_llama3_70b_4096_inst,0.6071428571428571,0.5717532467532467,0.5889168666140888,0.6988636363636364,0.6363636363636364,0.625,0.5681818181818182,0.5965909090909091,0.5965909090909091,0.5284090909090909
29
  cross_openhermes_llama3_8b_4096_2_inst,0.48214285714285715,0.4417207792207793,0.4610475185885475,0.5454545454545454,0.4943181818181818,0.4943181818181818,0.4602272727272727,0.48295454545454547,0.5227272727272727,0.375
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5523809523809523,0.5384761904761904,0.545
9
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
11
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7466666666666667,0.7828571428571428,0.7643337483933655,0.8,0.72,0.7333333333333333,0.72,0.7866666666666666,0.7466666666666667,0.72
13
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
14
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.744621866597198
17
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
18
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
19
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
 
20
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
 
22
  Qwen2_5_0_5B_Instruct,0.4228571428571429,0.2436190476190476,0.3091364879297727,0.6133333333333333,0.5,0.4266666666666667,0.4066666666666667,0.3933333333333333,0.3333333333333333,0.2866666666666667
23
  GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
 
24
  cross_openhermes_llama3_70b_4096_inst,0.7257142857142858,0.7375238095238097,0.7315713913287366,0.8,0.7066666666666667,0.7466666666666667,0.6733333333333333,0.7666666666666667,0.7333333333333333,0.6533333333333333
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5666666666666667,0.5346666666666665,0.5502017755755465,0.6533333333333333,0.56,0.5266666666666666,0.5533333333333333,0.6,0.5533333333333333,0.52
 
9
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
11
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
12
+ cross_openhermes_llama3_8b_16384_inst,0.5752380952380952,0.5430476190476191,0.5586795467133899,0.6866666666666666,0.5933333333333334,0.5733333333333334,0.5266666666666666,0.5666666666666667,0.5666666666666667,0.5133333333333333
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7466666666666667,0.7828571428571428,0.7643337483933655,0.8,0.72,0.7333333333333333,0.72,0.7866666666666666,0.7466666666666667,0.72
14
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
15
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
 
18
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
19
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
20
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
21
+ cross_openhermes_llama3_8b_2048_inst,0.5504761904761906,0.528,0.53900388550283,0.6733333333333333,0.5466666666666666,0.52,0.5133333333333333,0.5666666666666667,0.5466666666666666,0.4866666666666667
22
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
24
+ cross_openhermes_llama3_8b_12288_inst,0.5733333333333333,0.5337142857142858,0.5528148657449711,0.6666666666666666,0.6,0.5466666666666666,0.5333333333333333,0.5666666666666667,0.5933333333333334,0.5066666666666667
25
  Qwen2_5_0_5B_Instruct,0.4228571428571429,0.2436190476190476,0.3091364879297727,0.6133333333333333,0.5,0.4266666666666667,0.4066666666666667,0.3933333333333333,0.3333333333333333,0.2866666666666667
26
  GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
27
+ cross_openhermes_llama3_8b_8192_inst,0.5676190476190476,0.5441904761904762,0.5556579129353294,0.6866666666666666,0.5533333333333333,0.54,0.5666666666666667,0.54,0.5666666666666667,0.52
28
  cross_openhermes_llama3_70b_4096_inst,0.7257142857142858,0.7375238095238097,0.7315713913287366,0.8,0.7066666666666667,0.7466666666666667,0.6733333333333333,0.7666666666666667,0.7333333333333333,0.6533333333333333
29
  cross_openhermes_llama3_8b_4096_2_inst,0.5666666666666667,0.5346666666666665,0.5502017755755465,0.6533333333333333,0.56,0.5266666666666666,0.5533333333333333,0.6,0.5533333333333333,0.52
results/cross_lingual/zero_shot/cross_xquad.csv CHANGED
@@ -8,6 +8,7 @@ cross_openhermes_llama3_8b_4096_inst,0.9052521008403361,0.8705882352941177,0.887
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
 
11
  cross_openhermes_llama3_70b_4096_inst_2,0.959873949579832,0.9579831932773109,0.9589276393593623,0.965546218487395,0.9554621848739496,0.9621848739495799,0.9563025210084034,,,
12
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
13
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
@@ -16,9 +17,12 @@ Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.95065914992089
16
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
17
  sg_llama3_8192_8b,0.899579831932773,0.8611344537815127,0.8799374134072335,0.9201680672268907,0.8831932773109243,0.9117647058823529,0.8831932773109243,,,
18
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
 
19
  gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
20
  llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
 
21
  Qwen2_5_0_5B_Instruct,0.6584033613445378,0.48172268907563026,0.5563732844778362,0.692436974789916,0.673109243697479,0.653781512605042,0.6142857142857143,,,
22
  GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
 
23
  cross_openhermes_llama3_70b_4096_inst,0.9510504201680672,0.9464285714285714,0.9487338668359928,0.9621848739495799,0.9512605042016806,0.9487394957983193,0.9420168067226891,,,
24
  cross_openhermes_llama3_8b_4096_2_inst,0.9180672268907563,0.871218487394958,0.8940295385002202,0.9310924369747899,0.9218487394957983,0.9084033613445378,0.9109243697478991,,,
 
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
11
+ cross_openhermes_llama3_8b_16384_inst,0.9283613445378152,0.8817226890756302,0.9044411705686984,0.9403361344537815,0.9201680672268907,0.926890756302521,0.9260504201680673,,,
12
  cross_openhermes_llama3_70b_4096_inst_2,0.959873949579832,0.9579831932773109,0.9589276393593623,0.965546218487395,0.9554621848739496,0.9621848739495799,0.9563025210084034,,,
13
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
14
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
 
17
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
18
  sg_llama3_8192_8b,0.899579831932773,0.8611344537815127,0.8799374134072335,0.9201680672268907,0.8831932773109243,0.9117647058823529,0.8831932773109243,,,
19
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
20
+ cross_openhermes_llama3_8b_2048_inst,0.9109243697478991,0.8632352941176471,0.886438895223954,0.9277310924369748,0.9050420168067227,0.9033613445378151,0.907563025210084,,,
21
  gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
22
  llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
23
+ cross_openhermes_llama3_8b_12288_inst,0.9273109243697479,0.8850840336134453,0.9057055579353634,0.9394957983193277,0.9252100840336135,0.9218487394957983,0.9226890756302522,,,
24
  Qwen2_5_0_5B_Instruct,0.6584033613445378,0.48172268907563026,0.5563732844778362,0.692436974789916,0.673109243697479,0.653781512605042,0.6142857142857143,,,
25
  GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
26
+ cross_openhermes_llama3_8b_8192_inst,0.9218487394957983,0.8758403361344538,0.8982557893523895,0.9336134453781513,0.9176470588235294,0.9193277310924369,0.9168067226890756,,,
27
  cross_openhermes_llama3_70b_4096_inst,0.9510504201680672,0.9464285714285714,0.9487338668359928,0.9621848739495799,0.9512605042016806,0.9487394957983193,0.9420168067226891,,,
28
  cross_openhermes_llama3_8b_4096_2_inst,0.9180672268907563,0.871218487394958,0.8940295385002202,0.9310924369747899,0.9218487394957983,0.9084033613445378,0.9109243697478991,,,
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.47619047619047616
9
  Meta-Llama-3-8B-Instruct,0.4666666666666667
10
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
11
  Qwen2_5_3B_Instruct,0.7142857142857143
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.638095238095238
13
  SeaLLMs-v3-7B-Chat,0.819047619047619
14
  Qwen2_5_72B_Instruct,0.8761904761904762
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.5333333333333333
17
  Qwen2_5_14B_Instruct,0.8285714285714286
18
  sg_llama3_8192_8b,0.47619047619047616
19
  sg_llama3_70b_inst,0.5523809523809524
 
20
  gemma-2-2b-it,0.3619047619047619
21
  llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
 
22
  Qwen2_5_0_5B_Instruct,0.3619047619047619
23
  GPT4o_0513,0.8095238095238095
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6095238095238096
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5142857142857142
 
9
  Meta-Llama-3-8B-Instruct,0.4666666666666667
10
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
11
  Qwen2_5_3B_Instruct,0.7142857142857143
12
+ cross_openhermes_llama3_8b_16384_inst,0.5428571428571428
13
  cross_openhermes_llama3_70b_4096_inst_2,0.638095238095238
14
  SeaLLMs-v3-7B-Chat,0.819047619047619
15
  Qwen2_5_72B_Instruct,0.8761904761904762
 
18
  Qwen2_5_14B_Instruct,0.8285714285714286
19
  sg_llama3_8192_8b,0.47619047619047616
20
  sg_llama3_70b_inst,0.5523809523809524
21
+ cross_openhermes_llama3_8b_2048_inst,0.5142857142857142
22
  gemma-2-2b-it,0.3619047619047619
23
  llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
24
+ cross_openhermes_llama3_8b_12288_inst,0.5523809523809524
25
  Qwen2_5_0_5B_Instruct,0.3619047619047619
26
  GPT4o_0513,0.8095238095238095
27
+ cross_openhermes_llama3_8b_8192_inst,0.5142857142857142
28
  cross_openhermes_llama3_70b_4096_inst,0.6095238095238096
29
  cross_openhermes_llama3_8b_4096_2_inst,0.5142857142857142
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.47
9
  Meta-Llama-3-8B-Instruct,0.58
10
  Meta-Llama-3.1-70B-Instruct,0.68
11
  Qwen2_5_3B_Instruct,0.4
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.66
13
  SeaLLMs-v3-7B-Chat,0.47
14
  Qwen2_5_72B_Instruct,0.72
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.63
17
  Qwen2_5_14B_Instruct,0.6
18
  sg_llama3_8192_8b,0.57
19
  sg_llama3_70b_inst,0.69
 
20
  gemma-2-2b-it,0.4
21
  llama3-8b-cpt-sea-lionv2-instruct,0.56
 
22
  Qwen2_5_0_5B_Instruct,0.27
23
  GPT4o_0513,0.77
 
24
  cross_openhermes_llama3_70b_4096_inst,0.68
25
  cross_openhermes_llama3_8b_4096_2_inst,0.49
 
9
  Meta-Llama-3-8B-Instruct,0.58
10
  Meta-Llama-3.1-70B-Instruct,0.68
11
  Qwen2_5_3B_Instruct,0.4
12
+ cross_openhermes_llama3_8b_16384_inst,0.52
13
  cross_openhermes_llama3_70b_4096_inst_2,0.66
14
  SeaLLMs-v3-7B-Chat,0.47
15
  Qwen2_5_72B_Instruct,0.72
 
18
  Qwen2_5_14B_Instruct,0.6
19
  sg_llama3_8192_8b,0.57
20
  sg_llama3_70b_inst,0.69
21
+ cross_openhermes_llama3_8b_2048_inst,0.5
22
  gemma-2-2b-it,0.4
23
  llama3-8b-cpt-sea-lionv2-instruct,0.56
24
+ cross_openhermes_llama3_8b_12288_inst,0.52
25
  Qwen2_5_0_5B_Instruct,0.27
26
  GPT4o_0513,0.77
27
+ cross_openhermes_llama3_8b_8192_inst,0.48
28
  cross_openhermes_llama3_70b_4096_inst,0.68
29
  cross_openhermes_llama3_8b_4096_2_inst,0.49
results/cultural_reasoning/zero_shot/sg_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6019417475728155
9
  Meta-Llama-3-8B-Instruct,0.6504854368932039
10
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
11
  Qwen2_5_3B_Instruct,0.6310679611650486
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7087378640776699
13
  SeaLLMs-v3-7B-Chat,0.7184466019417476
14
  Qwen2_5_72B_Instruct,0.7669902912621359
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.7087378640776699
17
  Qwen2_5_14B_Instruct,0.7669902912621359
18
  sg_llama3_8192_8b,0.5533980582524272
19
  sg_llama3_70b_inst,0.6699029126213593
 
20
  gemma-2-2b-it,0.5533980582524272
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6504854368932039
 
22
  Qwen2_5_0_5B_Instruct,0.4077669902912621
23
  GPT4o_0513,0.8446601941747572
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6796116504854369
25
  cross_openhermes_llama3_8b_4096_2_inst,0.6407766990291263
 
9
  Meta-Llama-3-8B-Instruct,0.6504854368932039
10
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
11
  Qwen2_5_3B_Instruct,0.6310679611650486
12
+ cross_openhermes_llama3_8b_16384_inst,0.6504854368932039
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7087378640776699
14
  SeaLLMs-v3-7B-Chat,0.7184466019417476
15
  Qwen2_5_72B_Instruct,0.7669902912621359
 
18
  Qwen2_5_14B_Instruct,0.7669902912621359
19
  sg_llama3_8192_8b,0.5533980582524272
20
  sg_llama3_70b_inst,0.6699029126213593
21
+ cross_openhermes_llama3_8b_2048_inst,0.6116504854368932
22
  gemma-2-2b-it,0.5533980582524272
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6504854368932039
24
+ cross_openhermes_llama3_8b_12288_inst,0.6310679611650486
25
  Qwen2_5_0_5B_Instruct,0.4077669902912621
26
  GPT4o_0513,0.8446601941747572
27
+ cross_openhermes_llama3_8b_8192_inst,0.6407766990291263
28
  cross_openhermes_llama3_70b_4096_inst,0.6796116504854369
29
  cross_openhermes_llama3_8b_4096_2_inst,0.6407766990291263
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6029411764705882
9
  Meta-Llama-3-8B-Instruct,0.5882352941176471
10
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
11
  Qwen2_5_3B_Instruct,0.5882352941176471
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6470588235294118
13
  SeaLLMs-v3-7B-Chat,0.5882352941176471
14
  Qwen2_5_72B_Instruct,0.7205882352941176
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6617647058823529
17
  Qwen2_5_14B_Instruct,0.6911764705882353
18
  sg_llama3_8192_8b,0.47058823529411764
19
  sg_llama3_70b_inst,0.6176470588235294
 
20
  gemma-2-2b-it,0.5147058823529411
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
 
22
  Qwen2_5_0_5B_Instruct,0.36764705882352944
23
  GPT4o_0513,0.8088235294117647
 
24
  cross_openhermes_llama3_70b_4096_inst,0.5882352941176471
25
  cross_openhermes_llama3_8b_4096_2_inst,0.6029411764705882
 
9
  Meta-Llama-3-8B-Instruct,0.5882352941176471
10
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
11
  Qwen2_5_3B_Instruct,0.5882352941176471
12
+ cross_openhermes_llama3_8b_16384_inst,0.6176470588235294
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6470588235294118
14
  SeaLLMs-v3-7B-Chat,0.5882352941176471
15
  Qwen2_5_72B_Instruct,0.7205882352941176
 
18
  Qwen2_5_14B_Instruct,0.6911764705882353
19
  sg_llama3_8192_8b,0.47058823529411764
20
  sg_llama3_70b_inst,0.6176470588235294
21
+ cross_openhermes_llama3_8b_2048_inst,0.6323529411764706
22
  gemma-2-2b-it,0.5147058823529411
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
24
+ cross_openhermes_llama3_8b_12288_inst,0.6029411764705882
25
  Qwen2_5_0_5B_Instruct,0.36764705882352944
26
  GPT4o_0513,0.8088235294117647
27
+ cross_openhermes_llama3_8b_8192_inst,0.6176470588235294
28
  cross_openhermes_llama3_70b_4096_inst,0.5882352941176471
29
  cross_openhermes_llama3_8b_4096_2_inst,0.6029411764705882
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -4,14 +4,19 @@ Qwen2_5_32B_Instruct,0.8436363636363636
4
  Qwen2_5_7B_Instruct,0.78
5
  Qwen2_5_1_5B_Instruct,0.6636363636363637
6
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
 
7
  Qwen2_5_3B_Instruct,0.72
 
8
  cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
9
  Qwen2_5_72B_Instruct,0.8618181818181818
10
  Meta-Llama-3-70B-Instruct,0.8381818181818181
11
  Qwen2_5_14B_Instruct,0.8345454545454546
12
  sg_llama3_8192_8b,0.76
13
  sg_llama3_70b_inst,0.8436363636363636
 
14
  gemma-2-2b-it,0.7163636363636363
 
15
  Qwen2_5_0_5B_Instruct,0.5727272727272728
 
16
  cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
17
  cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
 
4
  Qwen2_5_7B_Instruct,0.78
5
  Qwen2_5_1_5B_Instruct,0.6636363636363637
6
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
7
+ Meta-Llama-3-8B-Instruct,0.7909090909090909
8
  Qwen2_5_3B_Instruct,0.72
9
+ cross_openhermes_llama3_8b_16384_inst,0.7963636363636364
10
  cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
11
  Qwen2_5_72B_Instruct,0.8618181818181818
12
  Meta-Llama-3-70B-Instruct,0.8381818181818181
13
  Qwen2_5_14B_Instruct,0.8345454545454546
14
  sg_llama3_8192_8b,0.76
15
  sg_llama3_70b_inst,0.8436363636363636
16
+ cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
17
  gemma-2-2b-it,0.7163636363636363
18
+ cross_openhermes_llama3_8b_12288_inst,0.7890909090909091
19
  Qwen2_5_0_5B_Instruct,0.5727272727272728
20
+ cross_openhermes_llama3_8b_8192_inst,0.78
21
  cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
22
  cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -4,14 +4,19 @@ Qwen2_5_32B_Instruct,53.2
4
  Qwen2_5_7B_Instruct,50.279999999999994
5
  Qwen2_5_1_5B_Instruct,44.480000000000004
6
  cross_openhermes_llama3_8b_4096_inst,51.6
 
7
  Qwen2_5_3B_Instruct,47.24
 
8
  cross_openhermes_llama3_70b_4096_inst_2,55.8
9
  Qwen2_5_72B_Instruct,53.32
10
  Meta-Llama-3-70B-Instruct,50.599999999999994
11
  Qwen2_5_14B_Instruct,53.2
12
  sg_llama3_8192_8b,53.4
13
  sg_llama3_70b_inst,51.959999999999994
 
14
  gemma-2-2b-it,52.08
 
15
  Qwen2_5_0_5B_Instruct,35.28
 
16
  cross_openhermes_llama3_70b_4096_inst,53.2
17
  cross_openhermes_llama3_8b_4096_2_inst,52.28
 
4
  Qwen2_5_7B_Instruct,50.279999999999994
5
  Qwen2_5_1_5B_Instruct,44.480000000000004
6
  cross_openhermes_llama3_8b_4096_inst,51.6
7
+ Meta-Llama-3-8B-Instruct,51.120000000000005
8
  Qwen2_5_3B_Instruct,47.24
9
+ cross_openhermes_llama3_8b_16384_inst,52.44
10
  cross_openhermes_llama3_70b_4096_inst_2,55.8
11
  Qwen2_5_72B_Instruct,53.32
12
  Meta-Llama-3-70B-Instruct,50.599999999999994
13
  Qwen2_5_14B_Instruct,53.2
14
  sg_llama3_8192_8b,53.4
15
  sg_llama3_70b_inst,51.959999999999994
16
+ cross_openhermes_llama3_8b_2048_inst,52.24
17
  gemma-2-2b-it,52.08
18
+ cross_openhermes_llama3_8b_12288_inst,52.480000000000004
19
  Qwen2_5_0_5B_Instruct,35.28
20
+ cross_openhermes_llama3_8b_8192_inst,53.0
21
  cross_openhermes_llama3_70b_4096_inst,53.2
22
  cross_openhermes_llama3_8b_4096_2_inst,52.28
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6448598130841121
9
  Meta-Llama-3-8B-Instruct,0.7009345794392523
10
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
11
  Qwen2_5_3B_Instruct,0.6728971962616822
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.8598130841121495
13
  SeaLLMs-v3-7B-Chat,0.6915887850467289
14
  Qwen2_5_72B_Instruct,0.8598130841121495
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.8691588785046729
17
  Qwen2_5_14B_Instruct,0.822429906542056
18
  sg_llama3_8192_8b,0.7009345794392523
19
  sg_llama3_70b_inst,0.8598130841121495
 
20
  gemma-2-2b-it,0.6915887850467289
21
  llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
 
22
  Qwen2_5_0_5B_Instruct,0.37383177570093457
23
  GPT4o_0513,0.8691588785046729
 
24
  cross_openhermes_llama3_70b_4096_inst,0.8317757009345794
25
  cross_openhermes_llama3_8b_4096_2_inst,0.7289719626168224
 
9
  Meta-Llama-3-8B-Instruct,0.7009345794392523
10
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
11
  Qwen2_5_3B_Instruct,0.6728971962616822
12
+ cross_openhermes_llama3_8b_16384_inst,0.7663551401869159
13
  cross_openhermes_llama3_70b_4096_inst_2,0.8598130841121495
14
  SeaLLMs-v3-7B-Chat,0.6915887850467289
15
  Qwen2_5_72B_Instruct,0.8598130841121495
 
18
  Qwen2_5_14B_Instruct,0.822429906542056
19
  sg_llama3_8192_8b,0.7009345794392523
20
  sg_llama3_70b_inst,0.8598130841121495
21
+ cross_openhermes_llama3_8b_2048_inst,0.7102803738317757
22
  gemma-2-2b-it,0.6915887850467289
23
  llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
24
+ cross_openhermes_llama3_8b_12288_inst,0.7663551401869159
25
  Qwen2_5_0_5B_Instruct,0.37383177570093457
26
  GPT4o_0513,0.8691588785046729
27
+ cross_openhermes_llama3_8b_8192_inst,0.7383177570093458
28
  cross_openhermes_llama3_70b_4096_inst,0.8317757009345794
29
  cross_openhermes_llama3_8b_4096_2_inst,0.7289719626168224
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.2519360474995096,0.3481981488260775,0.132
9
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
10
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
11
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.2696117118557506,0.3743401999085179,0.14161035346358444,0.2928845821951494
13
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
14
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.1275808733778686
17
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
18
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
19
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
 
20
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
21
  llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
 
22
  Qwen2_5_0_5B_Instruct,0.19408176276624156,0.28989753303423227,0.07842728643649079,0.21392046882800164
23
  GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
 
24
  cross_openhermes_llama3_70b_4096_inst,0.2727448037865066,0.3786585439052446,0.14288118221672744,0.2966946852375478
25
  cross_openhermes_llama3_8b_4096_2_inst,0.2606665855752626,0.3625849627075515,0.1356024730798863,0.28381232093835007
 
9
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
10
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
11
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
12
+ cross_openhermes_llama3_8b_16384_inst,0.26804472072851154,0.3719015771017311,0.14057603944113953,0.29165654564266397
13
  cross_openhermes_llama3_70b_4096_inst_2,0.2696117118557506,0.3743401999085179,0.14161035346358444,0.2928845821951494
14
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
15
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
 
18
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
19
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
20
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
21
+ cross_openhermes_llama3_8b_2048_inst,0.26075548388204367,0.3613222112557008,0.13618000204638436,0.28476423834404585
22
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
23
  llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
24
+ cross_openhermes_llama3_8b_12288_inst,0.27081377092899106,0.3746700335717668,0.1422316280821482,0.2955396511330582
25
  Qwen2_5_0_5B_Instruct,0.19408176276624156,0.28989753303423227,0.07842728643649079,0.21392046882800164
26
  GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
27
+ cross_openhermes_llama3_8b_8192_inst,0.26572751013608503,0.3688173245167265,0.13924057139568077,0.2891246344958478
28
  cross_openhermes_llama3_70b_4096_inst,0.2727448037865066,0.3786585439052446,0.14288118221672744,0.2966946852375478
29
  cross_openhermes_llama3_8b_4096_2_inst,0.2606665855752626,0.3625849627075515,0.1356024730798863,0.28381232093835007
results/dialogue/zero_shot/dream.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.8613424791768741
9
  Meta-Llama-3-8B-Instruct,0.8946594806467418
10
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
11
  Qwen2_5_3B_Instruct,0.9029887310142087
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.9549240568348849
13
  SeaLLMs-v3-7B-Chat,0.9265066144047036
14
  Qwen2_5_72B_Instruct,0.9627633512983832
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.9480646741793238
17
  Qwen2_5_14B_Instruct,0.9461048505634493
18
  sg_llama3_8192_8b,0.9103380695737384
19
  sg_llama3_70b_inst,0.9524742773150416
 
20
  gemma-2-2b-it,0.8510534051935326
21
  llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
 
22
  Qwen2_5_0_5B_Instruct,0.6526212640862322
23
  GPT4o_0513,0.9583537481626654
 
24
  cross_openhermes_llama3_70b_4096_inst,0.9514943655071043
25
  cross_openhermes_llama3_8b_4096_2_inst,0.876531112199902
 
9
  Meta-Llama-3-8B-Instruct,0.8946594806467418
10
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
11
  Qwen2_5_3B_Instruct,0.9029887310142087
12
+ cross_openhermes_llama3_8b_16384_inst,0.8853503184713376
13
  cross_openhermes_llama3_70b_4096_inst_2,0.9549240568348849
14
  SeaLLMs-v3-7B-Chat,0.9265066144047036
15
  Qwen2_5_72B_Instruct,0.9627633512983832
 
18
  Qwen2_5_14B_Instruct,0.9461048505634493
19
  sg_llama3_8192_8b,0.9103380695737384
20
  sg_llama3_70b_inst,0.9524742773150416
21
+ cross_openhermes_llama3_8b_2048_inst,0.8642822146006859
22
  gemma-2-2b-it,0.8510534051935326
23
  llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
24
+ cross_openhermes_llama3_8b_12288_inst,0.8829005389514943
25
  Qwen2_5_0_5B_Instruct,0.6526212640862322
26
  GPT4o_0513,0.9583537481626654
27
+ cross_openhermes_llama3_8b_8192_inst,0.8833904948554631
28
  cross_openhermes_llama3_70b_4096_inst,0.9514943655071043
29
  cross_openhermes_llama3_8b_4096_2_inst,0.876531112199902
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.2961783902880866,0.40739117705606903,0.16
9
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
10
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
11
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.31836805920341904,0.432137300473344,0.18274124314511622,0.34022563399179695
13
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
14
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578
17
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
18
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
19
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
 
20
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
21
  llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
 
22
  Qwen2_5_0_5B_Instruct,0.20766179544894214,0.3105872033328297,0.08726222085933319,0.22513596215466355
23
  GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
 
24
  cross_openhermes_llama3_70b_4096_inst,0.32140993091581,0.43714768967090817,0.18346420469253946,0.3436178983839823
25
  cross_openhermes_llama3_8b_4096_2_inst,0.29120862165554934,0.40224666789707636,0.16041490437445552,0.31096429269511605
 
9
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
10
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
11
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
12
+ cross_openhermes_llama3_8b_16384_inst,0.3035835775124711,0.4145496028647668,0.1712406649168221,0.32496046475582435
13
  cross_openhermes_llama3_70b_4096_inst_2,0.31836805920341904,0.432137300473344,0.18274124314511622,0.34022563399179695
14
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
15
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
 
18
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
19
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
20
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
21
+ cross_openhermes_llama3_8b_2048_inst,0.29349656315763506,0.4044332095677647,0.16224920209587818,0.3138072778092623
22
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
23
  llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
24
+ cross_openhermes_llama3_8b_12288_inst,0.30043920936284546,0.41309659421156786,0.16636483587009585,0.3218561980068726
25
  Qwen2_5_0_5B_Instruct,0.20766179544894214,0.3105872033328297,0.08726222085933319,0.22513596215466355
26
  GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
27
+ cross_openhermes_llama3_8b_8192_inst,0.3006064157801082,0.4136369613192758,0.16732467499775527,0.32085761102329347
28
  cross_openhermes_llama3_70b_4096_inst,0.32140993091581,0.43714768967090817,0.18346420469253946,0.3436178983839823
29
  cross_openhermes_llama3_8b_4096_2_inst,0.29120862165554934,0.40224666789707636,0.16041490437445552,0.31096429269511605
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6704545454545454
9
  Meta-Llama-3-8B-Instruct,0.6522727272727272
10
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
11
  Qwen2_5_3B_Instruct,0.5522727272727272
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7068181818181818
13
  SeaLLMs-v3-7B-Chat,0.6454545454545455
14
  Qwen2_5_72B_Instruct,0.7068181818181818
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6909090909090909
17
  Qwen2_5_14B_Instruct,0.6954545454545454
18
  sg_llama3_8192_8b,0.6045454545454545
19
  sg_llama3_70b_inst,0.7
 
20
  gemma-2-2b-it,0.6636363636363637
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
 
22
  Qwen2_5_0_5B_Instruct,0.37727272727272726
23
  GPT4o_0513,0.7068181818181818
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6863636363636364
25
  cross_openhermes_llama3_8b_4096_2_inst,0.6863636363636364
 
9
  Meta-Llama-3-8B-Instruct,0.6522727272727272
10
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
11
  Qwen2_5_3B_Instruct,0.5522727272727272
12
+ cross_openhermes_llama3_8b_16384_inst,0.6977272727272728
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7068181818181818
14
  SeaLLMs-v3-7B-Chat,0.6454545454545455
15
  Qwen2_5_72B_Instruct,0.7068181818181818
 
18
  Qwen2_5_14B_Instruct,0.6954545454545454
19
  sg_llama3_8192_8b,0.6045454545454545
20
  sg_llama3_70b_inst,0.7
21
+ cross_openhermes_llama3_8b_2048_inst,0.6636363636363637
22
  gemma-2-2b-it,0.6636363636363637
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
24
+ cross_openhermes_llama3_8b_12288_inst,0.7045454545454546
25
  Qwen2_5_0_5B_Instruct,0.37727272727272726
26
  GPT4o_0513,0.7068181818181818
27
+ cross_openhermes_llama3_8b_8192_inst,0.7022727272727273
28
  cross_openhermes_llama3_70b_4096_inst,0.6863636363636364
29
  cross_openhermes_llama3_8b_4096_2_inst,0.6863636363636364
results/emotion/zero_shot/sst2.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.926605504587156
9
  Meta-Llama-3-8B-Instruct,0.8784403669724771
10
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
11
  Qwen2_5_3B_Instruct,0.8245412844036697
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.9415137614678899
13
  SeaLLMs-v3-7B-Chat,0.9403669724770642
14
  Qwen2_5_72B_Instruct,0.9334862385321101
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.9495412844036697
17
  Qwen2_5_14B_Instruct,0.9311926605504587
18
  sg_llama3_8192_8b,0.9208715596330275
19
  sg_llama3_70b_inst,0.9334862385321101
 
20
  gemma-2-2b-it,0.9243119266055045
21
  llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
 
22
  Qwen2_5_0_5B_Instruct,0.7889908256880734
23
  GPT4o_0513,0.9415137614678899
 
24
  cross_openhermes_llama3_70b_4096_inst,0.9380733944954128
25
  cross_openhermes_llama3_8b_4096_2_inst,0.930045871559633
 
9
  Meta-Llama-3-8B-Instruct,0.8784403669724771
10
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
11
  Qwen2_5_3B_Instruct,0.8245412844036697
12
+ cross_openhermes_llama3_8b_16384_inst,0.9243119266055045
13
  cross_openhermes_llama3_70b_4096_inst_2,0.9415137614678899
14
  SeaLLMs-v3-7B-Chat,0.9403669724770642
15
  Qwen2_5_72B_Instruct,0.9334862385321101
 
18
  Qwen2_5_14B_Instruct,0.9311926605504587
19
  sg_llama3_8192_8b,0.9208715596330275
20
  sg_llama3_70b_inst,0.9334862385321101
21
+ cross_openhermes_llama3_8b_2048_inst,0.9243119266055045
22
  gemma-2-2b-it,0.9243119266055045
23
  llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
24
+ cross_openhermes_llama3_8b_12288_inst,0.9288990825688074
25
  Qwen2_5_0_5B_Instruct,0.7889908256880734
26
  GPT4o_0513,0.9415137614678899
27
+ cross_openhermes_llama3_8b_8192_inst,0.9254587155963303
28
  cross_openhermes_llama3_70b_4096_inst,0.9380733944954128
29
  cross_openhermes_llama3_8b_4096_2_inst,0.930045871559633
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -7,15 +7,20 @@ cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
8
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
9
  Qwen2_5_3B_Instruct,0.3316936422167389
 
10
  cross_openhermes_llama3_70b_4096_inst_2,0.41785038798707536
11
  SeaLLMs-v3-7B-Chat,0.3594829412574955
 
12
  gemma-2-9b-it,0.40786563079141763
13
  Meta-Llama-3-70B-Instruct,0.3830092775167675
14
  Qwen2_5_14B_Instruct,0.3901044620348051
15
  sg_llama3_8192_8b,0.3758986882788705
16
  sg_llama3_70b_inst,0.4086440304524362
 
17
  gemma-2-2b-it,0.3482500758113138
18
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
 
19
  GPT4o_0513,0.42589589086974855
 
20
  cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
21
  cross_openhermes_llama3_8b_4096_2_inst,0.38791104175399305
 
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
8
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
9
  Qwen2_5_3B_Instruct,0.3316936422167389
10
+ cross_openhermes_llama3_8b_16384_inst,0.39095982486662645
11
  cross_openhermes_llama3_70b_4096_inst_2,0.41785038798707536
12
  SeaLLMs-v3-7B-Chat,0.3594829412574955
13
+ Qwen2_5_72B_Instruct,0.4215612766585066
14
  gemma-2-9b-it,0.40786563079141763
15
  Meta-Llama-3-70B-Instruct,0.3830092775167675
16
  Qwen2_5_14B_Instruct,0.3901044620348051
17
  sg_llama3_8192_8b,0.3758986882788705
18
  sg_llama3_70b_inst,0.4086440304524362
19
+ cross_openhermes_llama3_8b_2048_inst,0.3904916300086918
20
  gemma-2-2b-it,0.3482500758113138
21
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
22
+ cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
23
  GPT4o_0513,0.42589589086974855
24
+ cross_openhermes_llama3_8b_8192_inst,0.3929315974686861
25
  cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
26
  cross_openhermes_llama3_8b_4096_2_inst,0.38791104175399305
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -7,15 +7,20 @@ cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
8
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
9
  Qwen2_5_3B_Instruct,0.27312609009801636
 
10
  cross_openhermes_llama3_70b_4096_inst_2,0.35523251361264646
11
  SeaLLMs-v3-7B-Chat,0.30981028289420137
 
12
  gemma-2-9b-it,0.3367700653885
13
  Meta-Llama-3-70B-Instruct,0.3230140263371192
14
  Qwen2_5_14B_Instruct,0.32198218156960645
15
  sg_llama3_8192_8b,0.3087032778607667
16
  sg_llama3_70b_inst,0.34258533717783785
 
17
  gemma-2-2b-it,0.27518909199172303
18
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
 
19
  GPT4o_0513,0.36219303373759176
 
20
  cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
21
  cross_openhermes_llama3_8b_4096_2_inst,0.2995657605717809
 
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
8
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
9
  Qwen2_5_3B_Instruct,0.27312609009801636
10
+ cross_openhermes_llama3_8b_16384_inst,0.2956191140783817
11
  cross_openhermes_llama3_70b_4096_inst_2,0.35523251361264646
12
  SeaLLMs-v3-7B-Chat,0.30981028289420137
13
+ Qwen2_5_72B_Instruct,0.35733464866179004
14
  gemma-2-9b-it,0.3367700653885
15
  Meta-Llama-3-70B-Instruct,0.3230140263371192
16
  Qwen2_5_14B_Instruct,0.32198218156960645
17
  sg_llama3_8192_8b,0.3087032778607667
18
  sg_llama3_70b_inst,0.34258533717783785
19
+ cross_openhermes_llama3_8b_2048_inst,0.2973194210388712
20
  gemma-2-2b-it,0.27518909199172303
21
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
22
+ cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
23
  GPT4o_0513,0.36219303373759176
24
+ cross_openhermes_llama3_8b_8192_inst,0.29989110440173306
25
  cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
26
  cross_openhermes_llama3_8b_4096_2_inst,0.2995657605717809
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -7,15 +7,20 @@ cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
8
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
9
  Qwen2_5_3B_Instruct,0.2245195134637718
 
10
  cross_openhermes_llama3_70b_4096_inst_2,0.272938440868618
11
  SeaLLMs-v3-7B-Chat,0.2516593644617717
 
12
  gemma-2-9b-it,0.267527968123433
13
  Meta-Llama-3-70B-Instruct,0.24397819518058994
14
  Qwen2_5_14B_Instruct,0.2627781200417998
15
  sg_llama3_8192_8b,0.23778397807613597
16
  sg_llama3_70b_inst,0.26000707510414633
 
17
  gemma-2-2b-it,0.21164036008441425
18
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
 
19
  GPT4o_0513,0.27722306559544163
 
20
  cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
21
  cross_openhermes_llama3_8b_4096_2_inst,0.24086384403666913
 
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
8
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
9
  Qwen2_5_3B_Instruct,0.2245195134637718
10
+ cross_openhermes_llama3_8b_16384_inst,0.24205424492731104
11
  cross_openhermes_llama3_70b_4096_inst_2,0.272938440868618
12
  SeaLLMs-v3-7B-Chat,0.2516593644617717
13
+ Qwen2_5_72B_Instruct,0.2843491241986514
14
  gemma-2-9b-it,0.267527968123433
15
  Meta-Llama-3-70B-Instruct,0.24397819518058994
16
  Qwen2_5_14B_Instruct,0.2627781200417998
17
  sg_llama3_8192_8b,0.23778397807613597
18
  sg_llama3_70b_inst,0.26000707510414633
19
+ cross_openhermes_llama3_8b_2048_inst,0.23916426190948417
20
  gemma-2-2b-it,0.21164036008441425
21
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
22
+ cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
23
  GPT4o_0513,0.27722306559544163
24
+ cross_openhermes_llama3_8b_8192_inst,0.24473214674903845
25
  cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
26
  cross_openhermes_llama3_8b_4096_2_inst,0.24086384403666913
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -7,15 +7,20 @@ cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
8
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
9
  Qwen2_5_3B_Instruct,0.31056841204320457
 
10
  cross_openhermes_llama3_70b_4096_inst_2,0.439568878723126
11
  SeaLLMs-v3-7B-Chat,0.3484133510670942
 
12
  gemma-2-9b-it,0.4234100394581857
13
  Meta-Llama-3-70B-Instruct,0.3957287030176054
14
  Qwen2_5_14B_Instruct,0.3841042767934729
15
  sg_llama3_8192_8b,0.376818225699898
16
  sg_llama3_70b_inst,0.4163761508073963
 
17
  gemma-2-2b-it,0.33737270487369614
18
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
 
19
  GPT4o_0513,0.451496635720668
 
20
  cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
21
  cross_openhermes_llama3_8b_4096_2_inst,0.3909715760518667
 
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
8
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
9
  Qwen2_5_3B_Instruct,0.31056841204320457
10
+ cross_openhermes_llama3_8b_16384_inst,0.3958514861574909
11
  cross_openhermes_llama3_70b_4096_inst_2,0.439568878723126
12
  SeaLLMs-v3-7B-Chat,0.3484133510670942
13
+ Qwen2_5_72B_Instruct,0.4237666988692159
14
  gemma-2-9b-it,0.4234100394581857
15
  Meta-Llama-3-70B-Instruct,0.3957287030176054
16
  Qwen2_5_14B_Instruct,0.3841042767934729
17
  sg_llama3_8192_8b,0.376818225699898
18
  sg_llama3_70b_inst,0.4163761508073963
19
+ cross_openhermes_llama3_8b_2048_inst,0.3904643635616676
20
  gemma-2-2b-it,0.33737270487369614
21
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
22
+ cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
23
  GPT4o_0513,0.451496635720668
24
+ cross_openhermes_llama3_8b_8192_inst,0.39476822018254265
25
  cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
26
  cross_openhermes_llama3_8b_4096_2_inst,0.3909715760518667
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7718773373223635
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
11
  Qwen2_5_3B_Instruct,0.8668661181750187
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.9420344053851907
13
  SeaLLMs-v3-7B-Chat,0.9143605086013463
14
  Qwen2_5_72B_Instruct,0.9596110695587136
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.9521316379955124
17
  Qwen2_5_14B_Instruct,0.9502617801047121
18
  sg_llama3_8192_8b,0.8051608077786089
19
  sg_llama3_70b_inst,0.9289454001495886
 
20
  gemma-2-2b-it,0.7700074794315632
21
  llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
 
22
  Qwen2_5_0_5B_Instruct,0.612939416604338
23
  GPT4o_0513,0.9648466716529543
 
24
  cross_openhermes_llama3_70b_4096_inst,0.9270755422587883
25
  cross_openhermes_llama3_8b_4096_2_inst,0.8290949887808526
 
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
11
  Qwen2_5_3B_Instruct,0.8668661181750187
12
+ cross_openhermes_llama3_8b_16384_inst,0.8477935676888556
13
  cross_openhermes_llama3_70b_4096_inst_2,0.9420344053851907
14
  SeaLLMs-v3-7B-Chat,0.9143605086013463
15
  Qwen2_5_72B_Instruct,0.9596110695587136
 
18
  Qwen2_5_14B_Instruct,0.9502617801047121
19
  sg_llama3_8192_8b,0.8051608077786089
20
  sg_llama3_70b_inst,0.9289454001495886
21
+ cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
22
  gemma-2-2b-it,0.7700074794315632
23
  llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
24
+ cross_openhermes_llama3_8b_12288_inst,0.8485415108451758
25
  Qwen2_5_0_5B_Instruct,0.612939416604338
26
  GPT4o_0513,0.9648466716529543
27
+ cross_openhermes_llama3_8b_8192_inst,0.8444278234854151
28
  cross_openhermes_llama3_70b_4096_inst,0.9270755422587883
29
  cross_openhermes_llama3_8b_4096_2_inst,0.8290949887808526
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7660594439117929
9
  Meta-Llama-3-8B-Instruct,0.6548418024928092
10
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
11
  Qwen2_5_3B_Instruct,0.6644295302013423
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.8609779482262704
13
  SeaLLMs-v3-7B-Chat,0.785234899328859
14
  Qwen2_5_72B_Instruct,0.8571428571428571
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.835091083413231
17
  Qwen2_5_14B_Instruct,0.8063279002876318
18
  sg_llama3_8192_8b,0.8130393096836049
19
  sg_llama3_70b_inst,0.8696069031639502
 
20
  gemma-2-2b-it,0.6749760306807286
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
 
22
  Qwen2_5_0_5B_Instruct,0.6116970278044104
23
  GPT4o_0513,0.8398849472674976
 
24
  cross_openhermes_llama3_70b_4096_inst,0.8456375838926175
25
  cross_openhermes_llama3_8b_4096_2_inst,0.7651006711409396
 
9
  Meta-Llama-3-8B-Instruct,0.6548418024928092
10
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
11
  Qwen2_5_3B_Instruct,0.6644295302013423
12
+ cross_openhermes_llama3_8b_16384_inst,0.8053691275167785
13
  cross_openhermes_llama3_70b_4096_inst_2,0.8609779482262704
14
  SeaLLMs-v3-7B-Chat,0.785234899328859
15
  Qwen2_5_72B_Instruct,0.8571428571428571
 
18
  Qwen2_5_14B_Instruct,0.8063279002876318
19
  sg_llama3_8192_8b,0.8130393096836049
20
  sg_llama3_70b_inst,0.8696069031639502
21
+ cross_openhermes_llama3_8b_2048_inst,0.7727708533077661
22
  gemma-2-2b-it,0.6749760306807286
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
24
+ cross_openhermes_llama3_8b_12288_inst,0.8207094918504314
25
  Qwen2_5_0_5B_Instruct,0.6116970278044104
26
  GPT4o_0513,0.8398849472674976
27
+ cross_openhermes_llama3_8b_8192_inst,0.7890699904122723
28
  cross_openhermes_llama3_70b_4096_inst,0.8456375838926175
29
  cross_openhermes_llama3_8b_4096_2_inst,0.7651006711409396
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.57
9
  Meta-Llama-3-8B-Instruct,0.546
10
  Meta-Llama-3.1-70B-Instruct,0.7015
11
  Qwen2_5_3B_Instruct,0.7465
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7481549345956126
13
  SeaLLMs-v3-7B-Chat,0.653
14
  Qwen2_5_72B_Instruct,0.8445
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6709421285692472
17
  Qwen2_5_14B_Instruct,0.818
18
  sg_llama3_8192_8b,0.6605
19
  sg_llama3_70b_inst,0.7685
 
20
  gemma-2-2b-it,0.6185
21
  llama3-8b-cpt-sea-lionv2-instruct,0.5765
 
22
  Qwen2_5_0_5B_Instruct,0.5095
23
  GPT4o_0513,0.8335
 
24
  cross_openhermes_llama3_70b_4096_inst,0.743
25
  cross_openhermes_llama3_8b_4096_2_inst,0.619
 
9
  Meta-Llama-3-8B-Instruct,0.546
10
  Meta-Llama-3.1-70B-Instruct,0.7015
11
  Qwen2_5_3B_Instruct,0.7465
12
+ cross_openhermes_llama3_8b_16384_inst,0.656
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7481549345956126
14
  SeaLLMs-v3-7B-Chat,0.653
15
  Qwen2_5_72B_Instruct,0.8445
 
18
  Qwen2_5_14B_Instruct,0.818
19
  sg_llama3_8192_8b,0.6605
20
  sg_llama3_70b_inst,0.7685
21
+ cross_openhermes_llama3_8b_2048_inst,0.57
22
  gemma-2-2b-it,0.6185
23
  llama3-8b-cpt-sea-lionv2-instruct,0.5765
24
+ cross_openhermes_llama3_8b_12288_inst,0.6485
25
  Qwen2_5_0_5B_Instruct,0.5095
26
  GPT4o_0513,0.8335
27
+ cross_openhermes_llama3_8b_8192_inst,0.6455
28
  cross_openhermes_llama3_70b_4096_inst,0.743
29
  cross_openhermes_llama3_8b_4096_2_inst,0.619
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7303921568627451
9
  Meta-Llama-3-8B-Instruct,0.678921568627451
10
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
11
  Qwen2_5_3B_Instruct,0.5661764705882353
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7916666666666666
13
  SeaLLMs-v3-7B-Chat,0.7475490196078431
14
  Qwen2_5_72B_Instruct,0.8014705882352942
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.7598039215686274
17
  Qwen2_5_14B_Instruct,0.7794117647058824
18
  sg_llama3_8192_8b,0.7254901960784313
19
  sg_llama3_70b_inst,0.7892156862745098
 
20
  gemma-2-2b-it,0.7083333333333334
21
  llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
 
22
  Qwen2_5_0_5B_Instruct,0.5759803921568627
23
  GPT4o_0513,0.7377450980392157
 
24
  cross_openhermes_llama3_70b_4096_inst,0.7818627450980392
25
  cross_openhermes_llama3_8b_4096_2_inst,0.6985294117647058
 
9
  Meta-Llama-3-8B-Instruct,0.678921568627451
10
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
11
  Qwen2_5_3B_Instruct,0.5661764705882353
12
+ cross_openhermes_llama3_8b_16384_inst,0.7034313725490197
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7916666666666666
14
  SeaLLMs-v3-7B-Chat,0.7475490196078431
15
  Qwen2_5_72B_Instruct,0.8014705882352942
 
18
  Qwen2_5_14B_Instruct,0.7794117647058824
19
  sg_llama3_8192_8b,0.7254901960784313
20
  sg_llama3_70b_inst,0.7892156862745098
21
+ cross_openhermes_llama3_8b_2048_inst,0.7058823529411765
22
  gemma-2-2b-it,0.7083333333333334
23
  llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
24
+ cross_openhermes_llama3_8b_12288_inst,0.6985294117647058
25
  Qwen2_5_0_5B_Instruct,0.5759803921568627
26
  GPT4o_0513,0.7377450980392157
27
+ cross_openhermes_llama3_8b_8192_inst,0.696078431372549
28
  cross_openhermes_llama3_70b_4096_inst,0.7818627450980392
29
  cross_openhermes_llama3_8b_4096_2_inst,0.6985294117647058
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5183050847457628
9
  Meta-Llama-3-8B-Instruct,0.44033898305084745
10
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
11
  Qwen2_5_3B_Instruct,0.6145762711864406
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6759322033898305
13
  SeaLLMs-v3-7B-Chat,0.5698305084745763
14
  Qwen2_5_72B_Instruct,0.7684745762711864
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.5928813559322034
17
  Qwen2_5_14B_Instruct,0.7538983050847458
18
  sg_llama3_8192_8b,0.5084745762711864
19
  sg_llama3_70b_inst,0.6420338983050847
 
20
  gemma-2-2b-it,0.43322033898305085
21
  llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
 
22
  Qwen2_5_0_5B_Instruct,0.3847457627118644
23
  GPT4o_0513,0.7308474576271187
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6647457627118644
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5498305084745763
 
9
  Meta-Llama-3-8B-Instruct,0.44033898305084745
10
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
11
  Qwen2_5_3B_Instruct,0.6145762711864406
12
+ cross_openhermes_llama3_8b_16384_inst,0.5884745762711865
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6759322033898305
14
  SeaLLMs-v3-7B-Chat,0.5698305084745763
15
  Qwen2_5_72B_Instruct,0.7684745762711864
 
18
  Qwen2_5_14B_Instruct,0.7538983050847458
19
  sg_llama3_8192_8b,0.5084745762711864
20
  sg_llama3_70b_inst,0.6420338983050847
21
+ cross_openhermes_llama3_8b_2048_inst,0.5322033898305085
22
  gemma-2-2b-it,0.43322033898305085
23
  llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
24
+ cross_openhermes_llama3_8b_12288_inst,0.5925423728813559
25
  Qwen2_5_0_5B_Instruct,0.3847457627118644
26
  GPT4o_0513,0.7308474576271187
27
+ cross_openhermes_llama3_8b_8192_inst,0.5847457627118644
28
  cross_openhermes_llama3_70b_4096_inst,0.6647457627118644
29
  cross_openhermes_llama3_8b_4096_2_inst,0.5498305084745763
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7596558667398865
9
  Meta-Llama-3-8B-Instruct,0.6025993044114956
10
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
11
  Qwen2_5_3B_Instruct,0.7645982061138569
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.9086582463847702
13
  SeaLLMs-v3-7B-Chat,0.7159070107999268
14
  Qwen2_5_72B_Instruct,0.9082921471718836
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.876807614863628
17
  Qwen2_5_14B_Instruct,0.9079260479589969
18
  sg_llama3_8192_8b,0.8118250045762402
19
  sg_llama3_70b_inst,0.9004210140948197
 
20
  gemma-2-2b-it,0.7792421746293245
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
 
22
  Qwen2_5_0_5B_Instruct,0.5464030752333883
23
  GPT4o_0513,0.9304411495515285
 
24
  cross_openhermes_llama3_70b_4096_inst,0.8943803770821893
25
  cross_openhermes_llama3_8b_4096_2_inst,0.7971810360607725
 
9
  Meta-Llama-3-8B-Instruct,0.6025993044114956
10
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
11
  Qwen2_5_3B_Instruct,0.7645982061138569
12
+ cross_openhermes_llama3_8b_16384_inst,0.8359875526267618
13
  cross_openhermes_llama3_70b_4096_inst_2,0.9086582463847702
14
  SeaLLMs-v3-7B-Chat,0.7159070107999268
15
  Qwen2_5_72B_Instruct,0.9082921471718836
 
18
  Qwen2_5_14B_Instruct,0.9079260479589969
19
  sg_llama3_8192_8b,0.8118250045762402
20
  sg_llama3_70b_inst,0.9004210140948197
21
+ cross_openhermes_llama3_8b_2048_inst,0.7393373604246751
22
  gemma-2-2b-it,0.7792421746293245
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
24
+ cross_openhermes_llama3_8b_12288_inst,0.8282994691561413
25
  Qwen2_5_0_5B_Instruct,0.5464030752333883
26
  GPT4o_0513,0.9304411495515285
27
+ cross_openhermes_llama3_8b_8192_inst,0.8427603880651656
28
  cross_openhermes_llama3_70b_4096_inst,0.8943803770821893
29
  cross_openhermes_llama3_8b_4096_2_inst,0.7971810360607725
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7495
9
  Meta-Llama-3-8B-Instruct,0.563
10
  Meta-Llama-3.1-70B-Instruct,0.815
11
  Qwen2_5_3B_Instruct,0.7415
 
12
  SeaLLMs-v3-7B-Chat,0.7625
13
  Qwen2_5_72B_Instruct,0.8315
14
  gemma-2-9b-it,0.7775
@@ -16,9 +17,12 @@ Meta-Llama-3-70B-Instruct,0.7876082117239673
16
  Qwen2_5_14B_Instruct,0.8255
17
  sg_llama3_8192_8b,0.8095
18
  sg_llama3_70b_inst,0.804
 
19
  gemma-2-2b-it,0.761
20
  llama3-8b-cpt-sea-lionv2-instruct,0.6225
 
21
  Qwen2_5_0_5B_Instruct,0.619
22
  GPT4o_0513,0.8085
 
23
  cross_openhermes_llama3_70b_4096_inst,0.801
24
  cross_openhermes_llama3_8b_4096_2_inst,0.79
 
9
  Meta-Llama-3-8B-Instruct,0.563
10
  Meta-Llama-3.1-70B-Instruct,0.815
11
  Qwen2_5_3B_Instruct,0.7415
12
+ cross_openhermes_llama3_8b_16384_inst,0.792
13
  SeaLLMs-v3-7B-Chat,0.7625
14
  Qwen2_5_72B_Instruct,0.8315
15
  gemma-2-9b-it,0.7775
 
17
  Qwen2_5_14B_Instruct,0.8255
18
  sg_llama3_8192_8b,0.8095
19
  sg_llama3_70b_inst,0.804
20
+ cross_openhermes_llama3_8b_2048_inst,0.722
21
  gemma-2-2b-it,0.761
22
  llama3-8b-cpt-sea-lionv2-instruct,0.6225
23
+ cross_openhermes_llama3_8b_12288_inst,0.792
24
  Qwen2_5_0_5B_Instruct,0.619
25
  GPT4o_0513,0.8085
26
+ cross_openhermes_llama3_8b_8192_inst,0.7895
27
  cross_openhermes_llama3_70b_4096_inst,0.801
28
  cross_openhermes_llama3_8b_4096_2_inst,0.79
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6967509025270758
9
  Meta-Llama-3-8B-Instruct,0.6173285198555957
10
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
11
  Qwen2_5_3B_Instruct,0.779783393501805
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.8916967509025271
13
  SeaLLMs-v3-7B-Chat,0.7870036101083032
14
  Qwen2_5_72B_Instruct,0.9025270758122743
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.8086642599277978
17
  Qwen2_5_14B_Instruct,0.8664259927797834
18
  sg_llama3_8192_8b,0.7364620938628159
19
  sg_llama3_70b_inst,0.8916967509025271
 
20
  gemma-2-2b-it,0.7292418772563177
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
 
22
  Qwen2_5_0_5B_Instruct,0.5992779783393501
23
  GPT4o_0513,0.8700361010830325
 
24
  cross_openhermes_llama3_70b_4096_inst,0.8953068592057761
25
  cross_openhermes_llama3_8b_4096_2_inst,0.7545126353790613
 
9
  Meta-Llama-3-8B-Instruct,0.6173285198555957
10
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
11
  Qwen2_5_3B_Instruct,0.779783393501805
12
+ cross_openhermes_llama3_8b_16384_inst,0.8014440433212996
13
  cross_openhermes_llama3_70b_4096_inst_2,0.8916967509025271
14
  SeaLLMs-v3-7B-Chat,0.7870036101083032
15
  Qwen2_5_72B_Instruct,0.9025270758122743
 
18
  Qwen2_5_14B_Instruct,0.8664259927797834
19
  sg_llama3_8192_8b,0.7364620938628159
20
  sg_llama3_70b_inst,0.8916967509025271
21
+ cross_openhermes_llama3_8b_2048_inst,0.7075812274368231
22
  gemma-2-2b-it,0.7292418772563177
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
24
+ cross_openhermes_llama3_8b_12288_inst,0.8050541516245487
25
  Qwen2_5_0_5B_Instruct,0.5992779783393501
26
  GPT4o_0513,0.8700361010830325
27
+ cross_openhermes_llama3_8b_8192_inst,0.8158844765342961
28
  cross_openhermes_llama3_70b_4096_inst,0.8953068592057761
29
  cross_openhermes_llama3_8b_4096_2_inst,0.7545126353790613
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4647887323943662
9
  Meta-Llama-3-8B-Instruct,0.4788732394366197
10
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
11
  Qwen2_5_3B_Instruct,0.647887323943662
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.8450704225352113
13
  SeaLLMs-v3-7B-Chat,0.5915492957746479
14
  Qwen2_5_72B_Instruct,0.8169014084507042
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.7887323943661971
17
  Qwen2_5_14B_Instruct,0.8309859154929577
18
  sg_llama3_8192_8b,0.704225352112676
19
  sg_llama3_70b_inst,0.8309859154929577
 
20
  gemma-2-2b-it,0.43661971830985913
21
  llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
 
22
  Qwen2_5_0_5B_Instruct,0.43661971830985913
23
  GPT4o_0513,0.9295774647887324
 
24
  cross_openhermes_llama3_70b_4096_inst,0.8450704225352113
25
  cross_openhermes_llama3_8b_4096_2_inst,0.43661971830985913
 
9
  Meta-Llama-3-8B-Instruct,0.4788732394366197
10
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
11
  Qwen2_5_3B_Instruct,0.647887323943662
12
+ cross_openhermes_llama3_8b_16384_inst,0.5492957746478874
13
  cross_openhermes_llama3_70b_4096_inst_2,0.8450704225352113
14
  SeaLLMs-v3-7B-Chat,0.5915492957746479
15
  Qwen2_5_72B_Instruct,0.8169014084507042
 
18
  Qwen2_5_14B_Instruct,0.8309859154929577
19
  sg_llama3_8192_8b,0.704225352112676
20
  sg_llama3_70b_inst,0.8309859154929577
21
+ cross_openhermes_llama3_8b_2048_inst,0.49295774647887325
22
  gemma-2-2b-it,0.43661971830985913
23
  llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
24
+ cross_openhermes_llama3_8b_12288_inst,0.5211267605633803
25
  Qwen2_5_0_5B_Instruct,0.43661971830985913
26
  GPT4o_0513,0.9295774647887324
27
+ cross_openhermes_llama3_8b_8192_inst,0.5211267605633803
28
  cross_openhermes_llama3_70b_4096_inst,0.8450704225352113
29
  cross_openhermes_llama3_8b_4096_2_inst,0.43661971830985913
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.44707347447073476
9
  Meta-Llama-3-8B-Instruct,0.4775840597758406
10
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
11
  Qwen2_5_3B_Instruct,0.6537982565379825
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6133250311332503
13
  SeaLLMs-v3-7B-Chat,0.7658779576587795
14
  Qwen2_5_72B_Instruct,0.8325031133250311
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6220423412204235
17
  Qwen2_5_14B_Instruct,0.7839352428393525
18
  sg_llama3_8192_8b,0.4825653798256538
19
  sg_llama3_70b_inst,0.5722291407222914
 
20
  gemma-2-2b-it,0.4352428393524284
21
  llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
 
22
  Qwen2_5_0_5B_Instruct,0.41718555417185554
23
  GPT4o_0513,0.7073474470734745
 
24
  cross_openhermes_llama3_70b_4096_inst,0.5734744707347447
25
  cross_openhermes_llama3_8b_4096_2_inst,0.4738480697384807
 
9
  Meta-Llama-3-8B-Instruct,0.4775840597758406
10
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
11
  Qwen2_5_3B_Instruct,0.6537982565379825
12
+ cross_openhermes_llama3_8b_16384_inst,0.48941469489414696
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6133250311332503
14
  SeaLLMs-v3-7B-Chat,0.7658779576587795
15
  Qwen2_5_72B_Instruct,0.8325031133250311
 
18
  Qwen2_5_14B_Instruct,0.7839352428393525
19
  sg_llama3_8192_8b,0.4825653798256538
20
  sg_llama3_70b_inst,0.5722291407222914
21
+ cross_openhermes_llama3_8b_2048_inst,0.4663760896637609
22
  gemma-2-2b-it,0.4352428393524284
23
  llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
24
+ cross_openhermes_llama3_8b_12288_inst,0.4863013698630137
25
  Qwen2_5_0_5B_Instruct,0.41718555417185554
26
  GPT4o_0513,0.7073474470734745
27
+ cross_openhermes_llama3_8b_8192_inst,0.4794520547945205
28
  cross_openhermes_llama3_70b_4096_inst,0.5734744707347447
29
  cross_openhermes_llama3_8b_4096_2_inst,0.4738480697384807
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4547573821447073
9
  Meta-Llama-3-8B-Instruct,0.4839405974788465
10
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
11
  Qwen2_5_3B_Instruct,0.6621481609393887
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6392678293904335
13
  SeaLLMs-v3-7B-Chat,0.7684337765498187
14
  Qwen2_5_72B_Instruct,0.8343982041098256
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6494560524952513
17
  Qwen2_5_14B_Instruct,0.7807805214988776
18
  sg_llama3_8192_8b,0.49050250388533934
19
  sg_llama3_70b_inst,0.6044724572612675
 
20
  gemma-2-2b-it,0.4412882058366431
21
  llama3-8b-cpt-sea-lionv2-instruct,0.48929373165256435
 
22
  Qwen2_5_0_5B_Instruct,0.42056639613192887
23
  GPT4o_0513,0.7414954239336902
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6177689518217925
25
  cross_openhermes_llama3_8b_4096_2_inst,0.4807459851493697
 
9
  Meta-Llama-3-8B-Instruct,0.4839405974788465
10
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
11
  Qwen2_5_3B_Instruct,0.6621481609393887
12
+ cross_openhermes_llama3_8b_16384_inst,0.4911068900017268
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6392678293904335
14
  SeaLLMs-v3-7B-Chat,0.7684337765498187
15
  Qwen2_5_72B_Instruct,0.8343982041098256
 
18
  Qwen2_5_14B_Instruct,0.7807805214988776
19
  sg_llama3_8192_8b,0.49050250388533934
20
  sg_llama3_70b_inst,0.6044724572612675
21
+ cross_openhermes_llama3_8b_2048_inst,0.47064410291832154
22
  gemma-2-2b-it,0.4412882058366431
23
  llama3-8b-cpt-sea-lionv2-instruct,0.48929373165256435
24
+ cross_openhermes_llama3_8b_12288_inst,0.48877568640994645
25
  Qwen2_5_0_5B_Instruct,0.42056639613192887
26
  GPT4o_0513,0.7414954239336902
27
+ cross_openhermes_llama3_8b_8192_inst,0.4868761871870143
28
  cross_openhermes_llama3_70b_4096_inst,0.6177689518217925
29
  cross_openhermes_llama3_8b_4096_2_inst,0.4807459851493697
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5097803591695039
9
  Meta-Llama-3-8B-Instruct,0.5264703918819681
10
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
11
  Qwen2_5_3B_Instruct,0.49656185326123237
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.6559182855998398
13
  SeaLLMs-v3-7B-Chat,0.5267374324053675
14
  Qwen2_5_72B_Instruct,0.6380933306629281
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.6323519594098405
17
  Qwen2_5_14B_Instruct,0.6009746979104079
18
  sg_llama3_8192_8b,0.5021697042526203
19
  sg_llama3_70b_inst,0.6394285332799252
 
20
  gemma-2-2b-it,0.48220842512851325
21
  llama3-8b-cpt-sea-lionv2-instruct,0.5252687095266707
 
22
  Qwen2_5_0_5B_Instruct,0.3279925228653448
23
  GPT4o_0513,0.7584618465852193
 
24
  cross_openhermes_llama3_70b_4096_inst,0.6333533613725882
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5434942252486815
 
9
  Meta-Llama-3-8B-Instruct,0.5264703918819681
10
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
11
  Qwen2_5_3B_Instruct,0.49656185326123237
12
+ cross_openhermes_llama3_8b_16384_inst,0.549769677548568
13
  cross_openhermes_llama3_70b_4096_inst_2,0.6559182855998398
14
  SeaLLMs-v3-7B-Chat,0.5267374324053675
15
  Qwen2_5_72B_Instruct,0.6380933306629281
 
18
  Qwen2_5_14B_Instruct,0.6009746979104079
19
  sg_llama3_8192_8b,0.5021697042526203
20
  sg_llama3_70b_inst,0.6394285332799252
21
+ cross_openhermes_llama3_8b_2048_inst,0.528873756592563
22
  gemma-2-2b-it,0.48220842512851325
23
  llama3-8b-cpt-sea-lionv2-instruct,0.5252687095266707
24
+ cross_openhermes_llama3_8b_12288_inst,0.5533747246144602
25
  Qwen2_5_0_5B_Instruct,0.3279925228653448
26
  GPT4o_0513,0.7584618465852193
27
+ cross_openhermes_llama3_8b_8192_inst,0.5475665932305227
28
  cross_openhermes_llama3_70b_4096_inst,0.6333533613725882
29
  cross_openhermes_llama3_8b_4096_2_inst,0.5434942252486815
results/general_reasoning/zero_shot/mmlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.556381837683232
9
  Meta-Llama-3-8B-Instruct,0.6005720414730068
10
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
11
  Qwen2_5_3B_Instruct,0.6118698605648909
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.7551662495530926
13
  SeaLLMs-v3-7B-Chat,0.6670003575259207
14
  Qwen2_5_72B_Instruct,0.8129424383267787
@@ -17,9 +18,12 @@ Meta-Llama-3-70B-Instruct,0.7649624597783339
17
  Qwen2_5_14B_Instruct,0.7542366821594566
18
  sg_llama3_8192_8b,0.6235967107615302
19
  sg_llama3_70b_inst,0.7407937075437969
 
20
  gemma-2-2b-it,0.5706828745084018
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
 
22
  Qwen2_5_0_5B_Instruct,0.461136932427601
23
  GPT4o_0513,0.8308187343582409
 
24
  cross_openhermes_llama3_70b_4096_inst,0.7400071505184126
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5785484447622453
 
9
  Meta-Llama-3-8B-Instruct,0.6005720414730068
10
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
11
  Qwen2_5_3B_Instruct,0.6118698605648909
12
+ cross_openhermes_llama3_8b_16384_inst,0.5873435824097247
13
  cross_openhermes_llama3_70b_4096_inst_2,0.7551662495530926
14
  SeaLLMs-v3-7B-Chat,0.6670003575259207
15
  Qwen2_5_72B_Instruct,0.8129424383267787
 
18
  Qwen2_5_14B_Instruct,0.7542366821594566
19
  sg_llama3_8192_8b,0.6235967107615302
20
  sg_llama3_70b_inst,0.7407937075437969
21
+ cross_openhermes_llama3_8b_2048_inst,0.5697533071147658
22
  gemma-2-2b-it,0.5706828745084018
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
24
+ cross_openhermes_llama3_8b_12288_inst,0.5870575616732213
25
  Qwen2_5_0_5B_Instruct,0.461136932427601
26
  GPT4o_0513,0.8308187343582409
27
+ cross_openhermes_llama3_8b_8192_inst,0.5839113335716839
28
  cross_openhermes_llama3_70b_4096_inst,0.7400071505184126
29
  cross_openhermes_llama3_8b_4096_2_inst,0.5785484447622453
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.3333333333333333
9
  Meta-Llama-3-8B-Instruct,0.3333333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
11
  Qwen2_5_3B_Instruct,0.5757575757575758
 
12
  cross_openhermes_llama3_70b_4096_inst_2,0.48484848484848486
13
  SeaLLMs-v3-7B-Chat,0.5454545454545454
14
  Qwen2_5_72B_Instruct,0.696969696969697
@@ -17,8 +18,11 @@ Meta-Llama-3-70B-Instruct,0.5151515151515151
17
  Qwen2_5_14B_Instruct,0.6666666666666666
18
  sg_llama3_8192_8b,0.30303030303030304
19
  sg_llama3_70b_inst,0.42424242424242425
 
20
  gemma-2-2b-it,0.24242424242424243
21
  llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
 
22
  Qwen2_5_0_5B_Instruct,0.36363636363636365
 
23
  cross_openhermes_llama3_70b_4096_inst,0.42424242424242425
24
  cross_openhermes_llama3_8b_4096_2_inst,0.45454545454545453
 
9
  Meta-Llama-3-8B-Instruct,0.3333333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
11
  Qwen2_5_3B_Instruct,0.5757575757575758
12
+ cross_openhermes_llama3_8b_16384_inst,0.48484848484848486
13
  cross_openhermes_llama3_70b_4096_inst_2,0.48484848484848486
14
  SeaLLMs-v3-7B-Chat,0.5454545454545454
15
  Qwen2_5_72B_Instruct,0.696969696969697
 
18
  Qwen2_5_14B_Instruct,0.6666666666666666
19
  sg_llama3_8192_8b,0.30303030303030304
20
  sg_llama3_70b_inst,0.42424242424242425
21
+ cross_openhermes_llama3_8b_2048_inst,0.3333333333333333
22
  gemma-2-2b-it,0.24242424242424243
23
  llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
24
+ cross_openhermes_llama3_8b_12288_inst,0.42424242424242425
25
  Qwen2_5_0_5B_Instruct,0.36363636363636365
26
+ cross_openhermes_llama3_8b_8192_inst,0.45454545454545453
27
  cross_openhermes_llama3_70b_4096_inst,0.42424242424242425
28
  cross_openhermes_llama3_8b_4096_2_inst,0.45454545454545453