Muennighoff commited on
Commit
ca4f9b6
1 Parent(s): b1799ed
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 1b121b21b/evaluation/lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.csv +21 -0
  2. 1b121b21b/evaluation/lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.json +87 -0
  3. 1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-01-17-56-56.csv +120 -0
  4. 1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-01-17-56-56.json +427 -0
  5. 1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-03-10-25-38.csv +10 -0
  6. 1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-03-10-25-38.json +39 -0
  7. 1b121b21b/global_step39672/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  8. 1b121b21b/global_step39672/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  9. 1b121b21b/global_step39672/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  10. 1b121b21b/global_step39672/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  11. 1b121b21b/global_step39672/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  12. 1b121b21b/global_step39672/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  13. 1b121b21b/global_step39672/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  14. 1b121b21b/global_step39672/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  15. 1b121b21b/global_step39672/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  16. 1b121b21b/global_step39672/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  17. 1b121b21b/global_step39672/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  18. 1b121b21b/global_step39672/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  19. 1b121b21b/global_step39672/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  20. 1b121b21b/global_step39672/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  21. 1b121b21b/global_step39672/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  22. 1b121b21b/global_step39672/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  23. 1b121b21b/global_step39672/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  24. 1b121b21b/global_step39672/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  25. 1b121b21b/global_step39672/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  26. 1b121b21b/global_step39672/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt +3 -0
  27. 1b121b21b/global_step39672/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt +3 -0
  28. 1b121b21b/global_step39672/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt +3 -0
  29. 1b121b21b/global_step39672/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  30. 1b121b21b/global_step39672/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt +3 -0
  31. 1b121b21b/global_step39672/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt +3 -0
  32. 1b121b21b/global_step39672/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt +3 -0
  33. 1b121b21b/global_step39672/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt +3 -0
  34. 1b121b21b/global_step39672/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt +3 -0
  35. 1b121b21b/global_step39672/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt +3 -0
  36. 1b121b21b/global_step39672/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt +3 -0
  37. 1b121b21b/global_step39672/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt +3 -0
  38. 1b121b21b/global_step39672/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt +3 -0
  39. 1b121b21b/global_step39672/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt +3 -0
  40. 1b121b21b/global_step39672/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  41. 1b121b21b/global_step39672/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt +3 -0
  42. 1b121b21b/global_step39672/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt +3 -0
  43. 1b121b21b/global_step39672/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt +3 -0
  44. 1b121b21b/global_step39672/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt +3 -0
  45. 1b121b21b/global_step39672/bf16_zero_pp_rank_134_mp_rank_00_optim_states.pt +3 -0
  46. 1b121b21b/global_step39672/bf16_zero_pp_rank_135_mp_rank_00_optim_states.pt +3 -0
  47. 1b121b21b/global_step39672/bf16_zero_pp_rank_136_mp_rank_00_optim_states.pt +3 -0
  48. 1b121b21b/global_step39672/bf16_zero_pp_rank_137_mp_rank_00_optim_states.pt +3 -0
  49. 1b121b21b/global_step39672/bf16_zero_pp_rank_138_mp_rank_00_optim_states.pt +3 -0
  50. 1b121b21b/global_step39672/bf16_zero_pp_rank_139_mp_rank_00_optim_states.pt +3 -0
1b121b21b/evaluation/lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.286,0.014297146862517908,0
3
+ anli_r2,acc,0.304,0.01455320568795044,0
4
+ anli_r3,acc,0.3175,0.013443538681348054,0
5
+ arc_challenge,acc,0.26535836177474403,0.012902554762313969,0
6
+ arc_challenge,acc_norm,0.2909556313993174,0.013273077865907573,0
7
+ arc_easy,acc,0.5122053872053872,0.010256726235129016,0
8
+ arc_easy,acc_norm,0.4877946127946128,0.01025672623512901,0
9
+ boolq,acc,0.6862385321100918,0.008115773046958279,1
10
+ cb,acc,0.35714285714285715,0.06460957383809221,1
11
+ cb,f1,0.27666815942678014,,1
12
+ copa,acc,0.69,0.04648231987117316,0
13
+ hellaswag,acc,0.4907388966341366,0.004988925410522774,0
14
+ hellaswag,acc_norm,0.5834495120493925,0.00491979470467327,0
15
+ piqa,acc,0.6605005440696409,0.011048455047173918,0
16
+ piqa,acc_norm,0.6534276387377584,0.011103020320872166,0
17
+ rte,acc,0.5631768953068592,0.029855247390314945,0
18
+ sciq,acc,0.905,0.009276910103103324,0
19
+ sciq,acc_norm,0.872,0.010570133761108658,0
20
+ storycloze_2016,acc,0.5879208979155531,0.011382271506935862,0
21
+ winogrande,acc,0.5177584846093133,0.014043619596174966,0
1b121b21b/evaluation/lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.286,
5
+ "acc_stderr": 0.014297146862517908
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.304,
9
+ "acc_stderr": 0.01455320568795044
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3175,
13
+ "acc_stderr": 0.013443538681348054
14
+ },
15
+ "cb": {
16
+ "acc": 0.35714285714285715,
17
+ "acc_stderr": 0.06460957383809221,
18
+ "f1": 0.27666815942678014
19
+ },
20
+ "copa": {
21
+ "acc": 0.69,
22
+ "acc_stderr": 0.04648231987117316
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4907388966341366,
26
+ "acc_stderr": 0.004988925410522774,
27
+ "acc_norm": 0.5834495120493925,
28
+ "acc_norm_stderr": 0.00491979470467327
29
+ },
30
+ "rte": {
31
+ "acc": 0.5631768953068592,
32
+ "acc_stderr": 0.029855247390314945
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5177584846093133,
36
+ "acc_stderr": 0.014043619596174966
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.5879208979155531,
40
+ "acc_stderr": 0.011382271506935862
41
+ },
42
+ "boolq": {
43
+ "acc": 0.6862385321100918,
44
+ "acc_stderr": 0.008115773046958279
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5122053872053872,
48
+ "acc_stderr": 0.010256726235129016,
49
+ "acc_norm": 0.4877946127946128,
50
+ "acc_norm_stderr": 0.01025672623512901
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.26535836177474403,
54
+ "acc_stderr": 0.012902554762313969,
55
+ "acc_norm": 0.2909556313993174,
56
+ "acc_norm_stderr": 0.013273077865907573
57
+ },
58
+ "sciq": {
59
+ "acc": 0.905,
60
+ "acc_stderr": 0.009276910103103324,
61
+ "acc_norm": 0.872,
62
+ "acc_norm_stderr": 0.010570133761108658
63
+ },
64
+ "piqa": {
65
+ "acc": 0.6605005440696409,
66
+ "acc_stderr": 0.011048455047173918,
67
+ "acc_norm": 0.6534276387377584,
68
+ "acc_norm_stderr": 0.011103020320872166
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-01-17-56-56.csv ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ copa,acc,0.68,0.04688261722621505,0
3
+ hendrycksTest-abstract_algebra,acc,0.28,0.04512608598542127,0
4
+ hendrycksTest-abstract_algebra,acc_norm,0.29,0.045604802157206824,0
5
+ hendrycksTest-anatomy,acc,0.2074074074074074,0.03502553170678318,0
6
+ hendrycksTest-anatomy,acc_norm,0.1925925925925926,0.03406542058502651,0
7
+ hendrycksTest-astronomy,acc,0.21052631578947367,0.03317672787533157,0
8
+ hendrycksTest-astronomy,acc_norm,0.27631578947368424,0.03639057569952924,0
9
+ hendrycksTest-business_ethics,acc,0.34,0.04760952285695236,0
10
+ hendrycksTest-business_ethics,acc_norm,0.29,0.04560480215720683,0
11
+ hendrycksTest-clinical_knowledge,acc,0.20754716981132076,0.02495991802891127,0
12
+ hendrycksTest-clinical_knowledge,acc_norm,0.32452830188679244,0.028815615713432115,0
13
+ hendrycksTest-college_biology,acc,0.25,0.03621034121889507,0
14
+ hendrycksTest-college_biology,acc_norm,0.2361111111111111,0.03551446610810826,0
15
+ hendrycksTest-college_chemistry,acc,0.23,0.042295258468165085,0
16
+ hendrycksTest-college_chemistry,acc_norm,0.31,0.04648231987117316,0
17
+ hendrycksTest-college_computer_science,acc,0.27,0.0446196043338474,0
18
+ hendrycksTest-college_computer_science,acc_norm,0.28,0.04512608598542128,0
19
+ hendrycksTest-college_mathematics,acc,0.26,0.0440844002276808,0
20
+ hendrycksTest-college_mathematics,acc_norm,0.31,0.04648231987117316,0
21
+ hendrycksTest-college_medicine,acc,0.2138728323699422,0.031265112061730424,0
22
+ hendrycksTest-college_medicine,acc_norm,0.26011560693641617,0.03345036916788992,0
23
+ hendrycksTest-college_physics,acc,0.2549019607843137,0.04336432707993178,0
24
+ hendrycksTest-college_physics,acc_norm,0.30392156862745096,0.045766654032077636,0
25
+ hendrycksTest-computer_security,acc,0.29,0.04560480215720684,0
26
+ hendrycksTest-computer_security,acc_norm,0.33,0.047258156262526045,0
27
+ hendrycksTest-conceptual_physics,acc,0.23829787234042554,0.027851252973889764,0
28
+ hendrycksTest-conceptual_physics,acc_norm,0.1574468085106383,0.023809905196619695,0
29
+ hendrycksTest-econometrics,acc,0.24561403508771928,0.04049339297748142,0
30
+ hendrycksTest-econometrics,acc_norm,0.24561403508771928,0.04049339297748142,0
31
+ hendrycksTest-electrical_engineering,acc,0.2896551724137931,0.03780019230438015,0
32
+ hendrycksTest-electrical_engineering,acc_norm,0.2896551724137931,0.03780019230438014,0
33
+ hendrycksTest-elementary_mathematics,acc,0.21428571428571427,0.021132859182754433,0
34
+ hendrycksTest-elementary_mathematics,acc_norm,0.23809523809523808,0.021935878081184756,0
35
+ hendrycksTest-formal_logic,acc,0.2857142857142857,0.04040610178208841,0
36
+ hendrycksTest-formal_logic,acc_norm,0.2857142857142857,0.0404061017820884,0
37
+ hendrycksTest-global_facts,acc,0.18,0.03861229196653694,0
38
+ hendrycksTest-global_facts,acc_norm,0.19,0.039427724440366234,0
39
+ hendrycksTest-high_school_biology,acc,0.20967741935483872,0.023157879349083522,0
40
+ hendrycksTest-high_school_biology,acc_norm,0.2645161290322581,0.02509189237885928,0
41
+ hendrycksTest-high_school_chemistry,acc,0.16748768472906403,0.026273086047535414,0
42
+ hendrycksTest-high_school_chemistry,acc_norm,0.2413793103448276,0.03010833071801162,0
43
+ hendrycksTest-high_school_computer_science,acc,0.23,0.042295258468165044,0
44
+ hendrycksTest-high_school_computer_science,acc_norm,0.23,0.04229525846816505,0
45
+ hendrycksTest-high_school_european_history,acc,0.24242424242424243,0.033464098810559534,0
46
+ hendrycksTest-high_school_european_history,acc_norm,0.26666666666666666,0.03453131801885414,0
47
+ hendrycksTest-high_school_geography,acc,0.20202020202020202,0.028606204289229872,0
48
+ hendrycksTest-high_school_geography,acc_norm,0.2727272727272727,0.03173071239071724,0
49
+ hendrycksTest-high_school_government_and_politics,acc,0.18652849740932642,0.02811209121011748,0
50
+ hendrycksTest-high_school_government_and_politics,acc_norm,0.23834196891191708,0.030748905363909895,0
51
+ hendrycksTest-high_school_macroeconomics,acc,0.2282051282051282,0.021278393863586282,0
52
+ hendrycksTest-high_school_macroeconomics,acc_norm,0.258974358974359,0.02221110681006166,0
53
+ hendrycksTest-high_school_mathematics,acc,0.2222222222222222,0.025348097468097838,0
54
+ hendrycksTest-high_school_mathematics,acc_norm,0.3148148148148148,0.028317533496066468,0
55
+ hendrycksTest-high_school_microeconomics,acc,0.20588235294117646,0.026265024608275882,0
56
+ hendrycksTest-high_school_microeconomics,acc_norm,0.29411764705882354,0.029597329730978093,0
57
+ hendrycksTest-high_school_physics,acc,0.24503311258278146,0.03511807571804724,0
58
+ hendrycksTest-high_school_physics,acc_norm,0.2119205298013245,0.033367670865679766,0
59
+ hendrycksTest-high_school_psychology,acc,0.22201834862385322,0.017818849564796634,0
60
+ hendrycksTest-high_school_psychology,acc_norm,0.23486238532110093,0.018175110510343578,0
61
+ hendrycksTest-high_school_statistics,acc,0.2222222222222222,0.02835321286686346,0
62
+ hendrycksTest-high_school_statistics,acc_norm,0.24537037037037038,0.029346665094372937,0
63
+ hendrycksTest-high_school_us_history,acc,0.28431372549019607,0.03166009679399812,0
64
+ hendrycksTest-high_school_us_history,acc_norm,0.27941176470588236,0.031493281045079556,0
65
+ hendrycksTest-high_school_world_history,acc,0.25738396624472576,0.028458820991460285,0
66
+ hendrycksTest-high_school_world_history,acc_norm,0.2742616033755274,0.029041333510598035,0
67
+ hendrycksTest-human_aging,acc,0.3273542600896861,0.031493846709941306,0
68
+ hendrycksTest-human_aging,acc_norm,0.2645739910313901,0.029605103217038308,0
69
+ hendrycksTest-human_sexuality,acc,0.31297709923664124,0.04066962905677697,0
70
+ hendrycksTest-human_sexuality,acc_norm,0.3282442748091603,0.04118438565806298,0
71
+ hendrycksTest-international_law,acc,0.1652892561983471,0.03390780612972776,0
72
+ hendrycksTest-international_law,acc_norm,0.4462809917355372,0.0453793517794788,0
73
+ hendrycksTest-jurisprudence,acc,0.23148148148148148,0.04077494709252626,0
74
+ hendrycksTest-jurisprudence,acc_norm,0.4074074074074074,0.047500773411999854,0
75
+ hendrycksTest-logical_fallacies,acc,0.1901840490797546,0.030833491146281235,0
76
+ hendrycksTest-logical_fallacies,acc_norm,0.26380368098159507,0.03462419931615623,0
77
+ hendrycksTest-machine_learning,acc,0.33035714285714285,0.044642857142857144,0
78
+ hendrycksTest-machine_learning,acc_norm,0.21428571428571427,0.03894641120044792,0
79
+ hendrycksTest-management,acc,0.17475728155339806,0.037601780060266224,0
80
+ hendrycksTest-management,acc_norm,0.23300970873786409,0.04185832598928315,0
81
+ hendrycksTest-marketing,acc,0.2863247863247863,0.02961432369045665,0
82
+ hendrycksTest-marketing,acc_norm,0.3162393162393162,0.030463656747340244,0
83
+ hendrycksTest-medical_genetics,acc,0.31,0.04648231987117316,0
84
+ hendrycksTest-medical_genetics,acc_norm,0.38,0.04878317312145633,0
85
+ hendrycksTest-miscellaneous,acc,0.25925925925925924,0.015671006009339572,0
86
+ hendrycksTest-miscellaneous,acc_norm,0.2515964240102171,0.01551732236552963,0
87
+ hendrycksTest-moral_disputes,acc,0.2630057803468208,0.02370309952525817,0
88
+ hendrycksTest-moral_disputes,acc_norm,0.3092485549132948,0.02488314057007175,0
89
+ hendrycksTest-moral_scenarios,acc,0.24022346368715083,0.014288343803925293,0
90
+ hendrycksTest-moral_scenarios,acc_norm,0.24692737430167597,0.014422292204808835,0
91
+ hendrycksTest-nutrition,acc,0.24183006535947713,0.024518195641879334,0
92
+ hendrycksTest-nutrition,acc_norm,0.38235294117647056,0.027826109307283686,0
93
+ hendrycksTest-philosophy,acc,0.20257234726688103,0.022827317491059682,0
94
+ hendrycksTest-philosophy,acc_norm,0.28938906752411575,0.025755865922632935,0
95
+ hendrycksTest-prehistory,acc,0.21604938271604937,0.022899162918445803,0
96
+ hendrycksTest-prehistory,acc_norm,0.1882716049382716,0.021751866060815875,0
97
+ hendrycksTest-professional_accounting,acc,0.2375886524822695,0.025389512552729903,0
98
+ hendrycksTest-professional_accounting,acc_norm,0.24468085106382978,0.025645553622266726,0
99
+ hendrycksTest-professional_law,acc,0.2561929595827901,0.011149173153110582,0
100
+ hendrycksTest-professional_law,acc_norm,0.28292046936114734,0.011503891323188976,0
101
+ hendrycksTest-professional_medicine,acc,0.22058823529411764,0.025187786660227248,0
102
+ hendrycksTest-professional_medicine,acc_norm,0.21323529411764705,0.024880971512294275,0
103
+ hendrycksTest-professional_psychology,acc,0.24509803921568626,0.017401816711427667,0
104
+ hendrycksTest-professional_psychology,acc_norm,0.2679738562091503,0.017917974069594726,0
105
+ hendrycksTest-public_relations,acc,0.24545454545454545,0.04122066502878285,0
106
+ hendrycksTest-public_relations,acc_norm,0.2,0.03831305140884603,0
107
+ hendrycksTest-security_studies,acc,0.2979591836734694,0.02927956741106567,0
108
+ hendrycksTest-security_studies,acc_norm,0.2571428571428571,0.02797982353874455,0
109
+ hendrycksTest-sociology,acc,0.31840796019900497,0.03294118479054095,0
110
+ hendrycksTest-sociology,acc_norm,0.3333333333333333,0.03333333333333334,0
111
+ hendrycksTest-us_foreign_policy,acc,0.33,0.04725815626252605,0
112
+ hendrycksTest-us_foreign_policy,acc_norm,0.33,0.047258156262526045,0
113
+ hendrycksTest-virology,acc,0.3072289156626506,0.03591566797824662,0
114
+ hendrycksTest-virology,acc_norm,0.25903614457831325,0.03410646614071855,0
115
+ hendrycksTest-world_religions,acc,0.2982456140350877,0.03508771929824565,0
116
+ hendrycksTest-world_religions,acc_norm,0.3684210526315789,0.036996580176568775,0
117
+ piqa,acc,0.6545157780195865,0.011094802893617745,0
118
+ piqa,acc_norm,0.6605005440696409,0.011048455047173913,0
119
+ rte,acc,0.5234657039711191,0.03006330041190266,0
120
+ winogrande,acc,0.4996053670086819,0.014052481306049516,0
1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-01-17-56-56.json ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "copa": {
4
+ "acc": 0.68,
5
+ "acc_stderr": 0.04688261722621505
6
+ },
7
+ "piqa": {
8
+ "acc": 0.6545157780195865,
9
+ "acc_stderr": 0.011094802893617745,
10
+ "acc_norm": 0.6605005440696409,
11
+ "acc_norm_stderr": 0.011048455047173913
12
+ },
13
+ "rte": {
14
+ "acc": 0.5234657039711191,
15
+ "acc_stderr": 0.03006330041190266
16
+ },
17
+ "winogrande": {
18
+ "acc": 0.4996053670086819,
19
+ "acc_stderr": 0.014052481306049516
20
+ },
21
+ "hendrycksTest-abstract_algebra": {
22
+ "acc": 0.28,
23
+ "acc_stderr": 0.04512608598542127,
24
+ "acc_norm": 0.29,
25
+ "acc_norm_stderr": 0.045604802157206824
26
+ },
27
+ "hendrycksTest-anatomy": {
28
+ "acc": 0.2074074074074074,
29
+ "acc_stderr": 0.03502553170678318,
30
+ "acc_norm": 0.1925925925925926,
31
+ "acc_norm_stderr": 0.03406542058502651
32
+ },
33
+ "hendrycksTest-astronomy": {
34
+ "acc": 0.21052631578947367,
35
+ "acc_stderr": 0.03317672787533157,
36
+ "acc_norm": 0.27631578947368424,
37
+ "acc_norm_stderr": 0.03639057569952924
38
+ },
39
+ "hendrycksTest-business_ethics": {
40
+ "acc": 0.34,
41
+ "acc_stderr": 0.04760952285695236,
42
+ "acc_norm": 0.29,
43
+ "acc_norm_stderr": 0.04560480215720683
44
+ },
45
+ "hendrycksTest-clinical_knowledge": {
46
+ "acc": 0.20754716981132076,
47
+ "acc_stderr": 0.02495991802891127,
48
+ "acc_norm": 0.32452830188679244,
49
+ "acc_norm_stderr": 0.028815615713432115
50
+ },
51
+ "hendrycksTest-college_biology": {
52
+ "acc": 0.25,
53
+ "acc_stderr": 0.03621034121889507,
54
+ "acc_norm": 0.2361111111111111,
55
+ "acc_norm_stderr": 0.03551446610810826
56
+ },
57
+ "hendrycksTest-college_chemistry": {
58
+ "acc": 0.23,
59
+ "acc_stderr": 0.042295258468165085,
60
+ "acc_norm": 0.31,
61
+ "acc_norm_stderr": 0.04648231987117316
62
+ },
63
+ "hendrycksTest-college_computer_science": {
64
+ "acc": 0.27,
65
+ "acc_stderr": 0.0446196043338474,
66
+ "acc_norm": 0.28,
67
+ "acc_norm_stderr": 0.04512608598542128
68
+ },
69
+ "hendrycksTest-college_mathematics": {
70
+ "acc": 0.26,
71
+ "acc_stderr": 0.0440844002276808,
72
+ "acc_norm": 0.31,
73
+ "acc_norm_stderr": 0.04648231987117316
74
+ },
75
+ "hendrycksTest-college_medicine": {
76
+ "acc": 0.2138728323699422,
77
+ "acc_stderr": 0.031265112061730424,
78
+ "acc_norm": 0.26011560693641617,
79
+ "acc_norm_stderr": 0.03345036916788992
80
+ },
81
+ "hendrycksTest-college_physics": {
82
+ "acc": 0.2549019607843137,
83
+ "acc_stderr": 0.04336432707993178,
84
+ "acc_norm": 0.30392156862745096,
85
+ "acc_norm_stderr": 0.045766654032077636
86
+ },
87
+ "hendrycksTest-computer_security": {
88
+ "acc": 0.29,
89
+ "acc_stderr": 0.04560480215720684,
90
+ "acc_norm": 0.33,
91
+ "acc_norm_stderr": 0.047258156262526045
92
+ },
93
+ "hendrycksTest-conceptual_physics": {
94
+ "acc": 0.23829787234042554,
95
+ "acc_stderr": 0.027851252973889764,
96
+ "acc_norm": 0.1574468085106383,
97
+ "acc_norm_stderr": 0.023809905196619695
98
+ },
99
+ "hendrycksTest-econometrics": {
100
+ "acc": 0.24561403508771928,
101
+ "acc_stderr": 0.04049339297748142,
102
+ "acc_norm": 0.24561403508771928,
103
+ "acc_norm_stderr": 0.04049339297748142
104
+ },
105
+ "hendrycksTest-electrical_engineering": {
106
+ "acc": 0.2896551724137931,
107
+ "acc_stderr": 0.03780019230438015,
108
+ "acc_norm": 0.2896551724137931,
109
+ "acc_norm_stderr": 0.03780019230438014
110
+ },
111
+ "hendrycksTest-elementary_mathematics": {
112
+ "acc": 0.21428571428571427,
113
+ "acc_stderr": 0.021132859182754433,
114
+ "acc_norm": 0.23809523809523808,
115
+ "acc_norm_stderr": 0.021935878081184756
116
+ },
117
+ "hendrycksTest-formal_logic": {
118
+ "acc": 0.2857142857142857,
119
+ "acc_stderr": 0.04040610178208841,
120
+ "acc_norm": 0.2857142857142857,
121
+ "acc_norm_stderr": 0.0404061017820884
122
+ },
123
+ "hendrycksTest-global_facts": {
124
+ "acc": 0.18,
125
+ "acc_stderr": 0.03861229196653694,
126
+ "acc_norm": 0.19,
127
+ "acc_norm_stderr": 0.039427724440366234
128
+ },
129
+ "hendrycksTest-high_school_biology": {
130
+ "acc": 0.20967741935483872,
131
+ "acc_stderr": 0.023157879349083522,
132
+ "acc_norm": 0.2645161290322581,
133
+ "acc_norm_stderr": 0.02509189237885928
134
+ },
135
+ "hendrycksTest-high_school_chemistry": {
136
+ "acc": 0.16748768472906403,
137
+ "acc_stderr": 0.026273086047535414,
138
+ "acc_norm": 0.2413793103448276,
139
+ "acc_norm_stderr": 0.03010833071801162
140
+ },
141
+ "hendrycksTest-high_school_computer_science": {
142
+ "acc": 0.23,
143
+ "acc_stderr": 0.042295258468165044,
144
+ "acc_norm": 0.23,
145
+ "acc_norm_stderr": 0.04229525846816505
146
+ },
147
+ "hendrycksTest-high_school_european_history": {
148
+ "acc": 0.24242424242424243,
149
+ "acc_stderr": 0.033464098810559534,
150
+ "acc_norm": 0.26666666666666666,
151
+ "acc_norm_stderr": 0.03453131801885414
152
+ },
153
+ "hendrycksTest-high_school_geography": {
154
+ "acc": 0.20202020202020202,
155
+ "acc_stderr": 0.028606204289229872,
156
+ "acc_norm": 0.2727272727272727,
157
+ "acc_norm_stderr": 0.03173071239071724
158
+ },
159
+ "hendrycksTest-high_school_government_and_politics": {
160
+ "acc": 0.18652849740932642,
161
+ "acc_stderr": 0.02811209121011748,
162
+ "acc_norm": 0.23834196891191708,
163
+ "acc_norm_stderr": 0.030748905363909895
164
+ },
165
+ "hendrycksTest-high_school_macroeconomics": {
166
+ "acc": 0.2282051282051282,
167
+ "acc_stderr": 0.021278393863586282,
168
+ "acc_norm": 0.258974358974359,
169
+ "acc_norm_stderr": 0.02221110681006166
170
+ },
171
+ "hendrycksTest-high_school_mathematics": {
172
+ "acc": 0.2222222222222222,
173
+ "acc_stderr": 0.025348097468097838,
174
+ "acc_norm": 0.3148148148148148,
175
+ "acc_norm_stderr": 0.028317533496066468
176
+ },
177
+ "hendrycksTest-high_school_microeconomics": {
178
+ "acc": 0.20588235294117646,
179
+ "acc_stderr": 0.026265024608275882,
180
+ "acc_norm": 0.29411764705882354,
181
+ "acc_norm_stderr": 0.029597329730978093
182
+ },
183
+ "hendrycksTest-high_school_physics": {
184
+ "acc": 0.24503311258278146,
185
+ "acc_stderr": 0.03511807571804724,
186
+ "acc_norm": 0.2119205298013245,
187
+ "acc_norm_stderr": 0.033367670865679766
188
+ },
189
+ "hendrycksTest-high_school_psychology": {
190
+ "acc": 0.22201834862385322,
191
+ "acc_stderr": 0.017818849564796634,
192
+ "acc_norm": 0.23486238532110093,
193
+ "acc_norm_stderr": 0.018175110510343578
194
+ },
195
+ "hendrycksTest-high_school_statistics": {
196
+ "acc": 0.2222222222222222,
197
+ "acc_stderr": 0.02835321286686346,
198
+ "acc_norm": 0.24537037037037038,
199
+ "acc_norm_stderr": 0.029346665094372937
200
+ },
201
+ "hendrycksTest-high_school_us_history": {
202
+ "acc": 0.28431372549019607,
203
+ "acc_stderr": 0.03166009679399812,
204
+ "acc_norm": 0.27941176470588236,
205
+ "acc_norm_stderr": 0.031493281045079556
206
+ },
207
+ "hendrycksTest-high_school_world_history": {
208
+ "acc": 0.25738396624472576,
209
+ "acc_stderr": 0.028458820991460285,
210
+ "acc_norm": 0.2742616033755274,
211
+ "acc_norm_stderr": 0.029041333510598035
212
+ },
213
+ "hendrycksTest-human_aging": {
214
+ "acc": 0.3273542600896861,
215
+ "acc_stderr": 0.031493846709941306,
216
+ "acc_norm": 0.2645739910313901,
217
+ "acc_norm_stderr": 0.029605103217038308
218
+ },
219
+ "hendrycksTest-human_sexuality": {
220
+ "acc": 0.31297709923664124,
221
+ "acc_stderr": 0.04066962905677697,
222
+ "acc_norm": 0.3282442748091603,
223
+ "acc_norm_stderr": 0.04118438565806298
224
+ },
225
+ "hendrycksTest-international_law": {
226
+ "acc": 0.1652892561983471,
227
+ "acc_stderr": 0.03390780612972776,
228
+ "acc_norm": 0.4462809917355372,
229
+ "acc_norm_stderr": 0.0453793517794788
230
+ },
231
+ "hendrycksTest-jurisprudence": {
232
+ "acc": 0.23148148148148148,
233
+ "acc_stderr": 0.04077494709252626,
234
+ "acc_norm": 0.4074074074074074,
235
+ "acc_norm_stderr": 0.047500773411999854
236
+ },
237
+ "hendrycksTest-logical_fallacies": {
238
+ "acc": 0.1901840490797546,
239
+ "acc_stderr": 0.030833491146281235,
240
+ "acc_norm": 0.26380368098159507,
241
+ "acc_norm_stderr": 0.03462419931615623
242
+ },
243
+ "hendrycksTest-machine_learning": {
244
+ "acc": 0.33035714285714285,
245
+ "acc_stderr": 0.044642857142857144,
246
+ "acc_norm": 0.21428571428571427,
247
+ "acc_norm_stderr": 0.03894641120044792
248
+ },
249
+ "hendrycksTest-management": {
250
+ "acc": 0.17475728155339806,
251
+ "acc_stderr": 0.037601780060266224,
252
+ "acc_norm": 0.23300970873786409,
253
+ "acc_norm_stderr": 0.04185832598928315
254
+ },
255
+ "hendrycksTest-marketing": {
256
+ "acc": 0.2863247863247863,
257
+ "acc_stderr": 0.02961432369045665,
258
+ "acc_norm": 0.3162393162393162,
259
+ "acc_norm_stderr": 0.030463656747340244
260
+ },
261
+ "hendrycksTest-medical_genetics": {
262
+ "acc": 0.31,
263
+ "acc_stderr": 0.04648231987117316,
264
+ "acc_norm": 0.38,
265
+ "acc_norm_stderr": 0.04878317312145633
266
+ },
267
+ "hendrycksTest-miscellaneous": {
268
+ "acc": 0.25925925925925924,
269
+ "acc_stderr": 0.015671006009339572,
270
+ "acc_norm": 0.2515964240102171,
271
+ "acc_norm_stderr": 0.01551732236552963
272
+ },
273
+ "hendrycksTest-moral_disputes": {
274
+ "acc": 0.2630057803468208,
275
+ "acc_stderr": 0.02370309952525817,
276
+ "acc_norm": 0.3092485549132948,
277
+ "acc_norm_stderr": 0.02488314057007175
278
+ },
279
+ "hendrycksTest-moral_scenarios": {
280
+ "acc": 0.24022346368715083,
281
+ "acc_stderr": 0.014288343803925293,
282
+ "acc_norm": 0.24692737430167597,
283
+ "acc_norm_stderr": 0.014422292204808835
284
+ },
285
+ "hendrycksTest-nutrition": {
286
+ "acc": 0.24183006535947713,
287
+ "acc_stderr": 0.024518195641879334,
288
+ "acc_norm": 0.38235294117647056,
289
+ "acc_norm_stderr": 0.027826109307283686
290
+ },
291
+ "hendrycksTest-philosophy": {
292
+ "acc": 0.20257234726688103,
293
+ "acc_stderr": 0.022827317491059682,
294
+ "acc_norm": 0.28938906752411575,
295
+ "acc_norm_stderr": 0.025755865922632935
296
+ },
297
+ "hendrycksTest-prehistory": {
298
+ "acc": 0.21604938271604937,
299
+ "acc_stderr": 0.022899162918445803,
300
+ "acc_norm": 0.1882716049382716,
301
+ "acc_norm_stderr": 0.021751866060815875
302
+ },
303
+ "hendrycksTest-professional_accounting": {
304
+ "acc": 0.2375886524822695,
305
+ "acc_stderr": 0.025389512552729903,
306
+ "acc_norm": 0.24468085106382978,
307
+ "acc_norm_stderr": 0.025645553622266726
308
+ },
309
+ "hendrycksTest-professional_law": {
310
+ "acc": 0.2561929595827901,
311
+ "acc_stderr": 0.011149173153110582,
312
+ "acc_norm": 0.28292046936114734,
313
+ "acc_norm_stderr": 0.011503891323188976
314
+ },
315
+ "hendrycksTest-professional_medicine": {
316
+ "acc": 0.22058823529411764,
317
+ "acc_stderr": 0.025187786660227248,
318
+ "acc_norm": 0.21323529411764705,
319
+ "acc_norm_stderr": 0.024880971512294275
320
+ },
321
+ "hendrycksTest-professional_psychology": {
322
+ "acc": 0.24509803921568626,
323
+ "acc_stderr": 0.017401816711427667,
324
+ "acc_norm": 0.2679738562091503,
325
+ "acc_norm_stderr": 0.017917974069594726
326
+ },
327
+ "hendrycksTest-public_relations": {
328
+ "acc": 0.24545454545454545,
329
+ "acc_stderr": 0.04122066502878285,
330
+ "acc_norm": 0.2,
331
+ "acc_norm_stderr": 0.03831305140884603
332
+ },
333
+ "hendrycksTest-security_studies": {
334
+ "acc": 0.2979591836734694,
335
+ "acc_stderr": 0.02927956741106567,
336
+ "acc_norm": 0.2571428571428571,
337
+ "acc_norm_stderr": 0.02797982353874455
338
+ },
339
+ "hendrycksTest-sociology": {
340
+ "acc": 0.31840796019900497,
341
+ "acc_stderr": 0.03294118479054095,
342
+ "acc_norm": 0.3333333333333333,
343
+ "acc_norm_stderr": 0.03333333333333334
344
+ },
345
+ "hendrycksTest-us_foreign_policy": {
346
+ "acc": 0.33,
347
+ "acc_stderr": 0.04725815626252605,
348
+ "acc_norm": 0.33,
349
+ "acc_norm_stderr": 0.047258156262526045
350
+ },
351
+ "hendrycksTest-virology": {
352
+ "acc": 0.3072289156626506,
353
+ "acc_stderr": 0.03591566797824662,
354
+ "acc_norm": 0.25903614457831325,
355
+ "acc_norm_stderr": 0.03410646614071855
356
+ },
357
+ "hendrycksTest-world_religions": {
358
+ "acc": 0.2982456140350877,
359
+ "acc_stderr": 0.03508771929824565,
360
+ "acc_norm": 0.3684210526315789,
361
+ "acc_norm_stderr": 0.036996580176568775
362
+ }
363
+ },
364
+ "versions": {
365
+ "copa": 0,
366
+ "piqa": 0,
367
+ "rte": 0,
368
+ "winogrande": 0,
369
+ "hendrycksTest-abstract_algebra": 0,
370
+ "hendrycksTest-anatomy": 0,
371
+ "hendrycksTest-astronomy": 0,
372
+ "hendrycksTest-business_ethics": 0,
373
+ "hendrycksTest-clinical_knowledge": 0,
374
+ "hendrycksTest-college_biology": 0,
375
+ "hendrycksTest-college_chemistry": 0,
376
+ "hendrycksTest-college_computer_science": 0,
377
+ "hendrycksTest-college_mathematics": 0,
378
+ "hendrycksTest-college_medicine": 0,
379
+ "hendrycksTest-college_physics": 0,
380
+ "hendrycksTest-computer_security": 0,
381
+ "hendrycksTest-conceptual_physics": 0,
382
+ "hendrycksTest-econometrics": 0,
383
+ "hendrycksTest-electrical_engineering": 0,
384
+ "hendrycksTest-elementary_mathematics": 0,
385
+ "hendrycksTest-formal_logic": 0,
386
+ "hendrycksTest-global_facts": 0,
387
+ "hendrycksTest-high_school_biology": 0,
388
+ "hendrycksTest-high_school_chemistry": 0,
389
+ "hendrycksTest-high_school_computer_science": 0,
390
+ "hendrycksTest-high_school_european_history": 0,
391
+ "hendrycksTest-high_school_geography": 0,
392
+ "hendrycksTest-high_school_government_and_politics": 0,
393
+ "hendrycksTest-high_school_macroeconomics": 0,
394
+ "hendrycksTest-high_school_mathematics": 0,
395
+ "hendrycksTest-high_school_microeconomics": 0,
396
+ "hendrycksTest-high_school_physics": 0,
397
+ "hendrycksTest-high_school_psychology": 0,
398
+ "hendrycksTest-high_school_statistics": 0,
399
+ "hendrycksTest-high_school_us_history": 0,
400
+ "hendrycksTest-high_school_world_history": 0,
401
+ "hendrycksTest-human_aging": 0,
402
+ "hendrycksTest-human_sexuality": 0,
403
+ "hendrycksTest-international_law": 0,
404
+ "hendrycksTest-jurisprudence": 0,
405
+ "hendrycksTest-logical_fallacies": 0,
406
+ "hendrycksTest-machine_learning": 0,
407
+ "hendrycksTest-management": 0,
408
+ "hendrycksTest-marketing": 0,
409
+ "hendrycksTest-medical_genetics": 0,
410
+ "hendrycksTest-miscellaneous": 0,
411
+ "hendrycksTest-moral_disputes": 0,
412
+ "hendrycksTest-moral_scenarios": 0,
413
+ "hendrycksTest-nutrition": 0,
414
+ "hendrycksTest-philosophy": 0,
415
+ "hendrycksTest-prehistory": 0,
416
+ "hendrycksTest-professional_accounting": 0,
417
+ "hendrycksTest-professional_law": 0,
418
+ "hendrycksTest-professional_medicine": 0,
419
+ "hendrycksTest-professional_psychology": 0,
420
+ "hendrycksTest-public_relations": 0,
421
+ "hendrycksTest-security_studies": 0,
422
+ "hendrycksTest-sociology": 0,
423
+ "hendrycksTest-us_foreign_policy": 0,
424
+ "hendrycksTest-virology": 0,
425
+ "hendrycksTest-world_religions": 0
426
+ }
427
+ }
1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-03-10-25-38.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ arc_challenge,acc,0.19539249146757678,0.01158690718995291,0
3
+ arc_challenge,acc_norm,0.24573378839590443,0.012581033453730107,0
4
+ arc_easy,acc,0.47895622895622897,0.01025069260202259,0
5
+ arc_easy,acc_norm,0.43013468013468015,0.010159130445178497,0
6
+ boolq,acc,0.5700305810397553,0.008658853690729258,1
7
+ hellaswag,acc,0.31099382593108943,0.004619542392006404,0
8
+ hellaswag,acc_norm,0.35600477992431784,0.004778380758851136,0
9
+ sciq,acc,0.79,0.012886662332274531,0
10
+ sciq,acc_norm,0.712,0.01432694179723156,0
1b121b21b/evaluation/lm1-1b1-21b-results_lm-eval_global_step39672_2022-12-03-10-25-38.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "boolq": {
4
+ "acc": 0.5700305810397553,
5
+ "acc_stderr": 0.008658853690729258
6
+ },
7
+ "hellaswag": {
8
+ "acc": 0.31099382593108943,
9
+ "acc_stderr": 0.004619542392006404,
10
+ "acc_norm": 0.35600477992431784,
11
+ "acc_norm_stderr": 0.004778380758851136
12
+ },
13
+ "arc_challenge": {
14
+ "acc": 0.19539249146757678,
15
+ "acc_stderr": 0.01158690718995291,
16
+ "acc_norm": 0.24573378839590443,
17
+ "acc_norm_stderr": 0.012581033453730107
18
+ },
19
+ "arc_easy": {
20
+ "acc": 0.47895622895622897,
21
+ "acc_stderr": 0.01025069260202259,
22
+ "acc_norm": 0.43013468013468015,
23
+ "acc_norm_stderr": 0.010159130445178497
24
+ },
25
+ "sciq": {
26
+ "acc": 0.79,
27
+ "acc_stderr": 0.012886662332274531,
28
+ "acc_norm": 0.712,
29
+ "acc_norm_stderr": 0.01432694179723156
30
+ }
31
+ },
32
+ "versions": {
33
+ "boolq": 1,
34
+ "hellaswag": 0,
35
+ "arc_challenge": 0,
36
+ "arc_easy": 0,
37
+ "sciq": 0
38
+ }
39
+ }
1b121b21b/global_step39672/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d294520f89b443e0c1dbf95aa9190033bd9a9e3edffa8282aadd78f4d8608d99
3
+ size 51395415
1b121b21b/global_step39672/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7ef7b500a66787eee9db13dd2ca2acddd51faff88156819b22a4c9bd808069
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140186e714667fdcb1e495e27e1685f226884f7f4c2d84421cb56bac90d3a294
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c1aeda2125129811a5d0ec7eabd3ea1ab926f2a7bcb47aed076b93de04a276
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c9093b498ba2e9bb98e8a1adc9aff3e852697f47ea2c26b43d5c943f2d1db3
3
+ size 51395565
1b121b21b/global_step39672/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc15255d78cb44ca951211e0bd4ac600054cfaf81f75b211a48b50ebd011c2b
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd80e8ae92d6a31863309a041d9e25d2ded3181970c86b0186cfd34583df159
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3cc3242143f6fbeab5e26f922cdef749866fe8a3899b56d0febb9b4629b9337
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dafa85566d94a757313d19af7faa2a855b25e07b9e6ec51e4f0cd64589087bf
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348594104bedc6b6eeba82a559b6cc161bc0dca99a37f77ecf34358e00478a79
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf277af701839d2e470416f08dd16ace9ea796cc98e6e507166e045aaf0eb747
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b94a3dafba54629c068eefeecf00d05d5ff3a6440b130062551a2752a3551d9
3
+ size 51395426
1b121b21b/global_step39672/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:164d1aca796a26d12300d7437a254b3b5e9f0d2b5f201510481dbdb2858fab9b
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45a0f3fe500129f06e92ada8836f281406c5648891c3b0391ae7948c3422912c
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86adde93cb07e99a7844fd428f10bfac514e9fda0b387df22eb28efbb12cf6cc
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d47a791e190435acc4ea2ac0bdcdae527476dfa5f827ae7f0bb3213a7e75b0
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22df623e82a1ebd65cf6643256b557af86653bf0173c6e942709d13d18e1388
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7828acf17c56bf95bcba492da5e061ad8a35d8db42a78c1a047a11ca1c7cfb94
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54aa66977312ecdc2505227db1a42fbed838353f084e6ff9138f0a68b02245db
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b730b9da3ec8003fe292d8cc900ff824b4d80de607853ad10046fa6bd9468b14
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f399197374a42ac840a9c2721432e7f95497dfe86a6f926da04ac2c28327ba1
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820ac5f7c61fc6a0007f8bfd86716bc29f012304d9d79dac7995c05ba3f2ac57
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d380e3a09fee1058fa07ffb30aabf719fba9d291b1e9af495a2ab62028ab7b
3
+ size 51395490
1b121b21b/global_step39672/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8a85098b0b2411b669575f7d9d11ea99bdb494bd48104c5bb8f7680968b10c
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb27c99446acb93dc3f4d2cec7cab831c3739de1e15aeeb0f994ac2b1c0ba75a
3
+ size 51395565
1b121b21b/global_step39672/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d7ce0aae359e4188168f1ae5646c32a0f1ce9d9aac4fc04fe9382d11e4f398
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50280c8f2b0ffe0e2ab6c2c68aab1608fdaeb501676895f66e813a36f00105c0
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960a591ed3c00c107b90edfa1ac66c62ce28fc25a90503532b90d95b56853b33
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf2b314f0fed6a8733e29ee009c8b220e4dccf43fed7f087daac202b7318ee7
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0312d6730d1f4b84e9c1632b35330c960ad15ae5e38001274aaa1a15d3bb36
3
+ size 51395565
1b121b21b/global_step39672/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc7006d8a5858aabe6c5f4f4edadab0d03a3f50ab4c95f8b1746c564df3d53e3
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2442624048be293eb9690834718d6f2081793c454dda59567a05287fd551db0e
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728e1964a58af094d1234a652eea5a96c0cd80c7e6eac72e5c644949503c794d
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97755dba974da4ef314a2ecdd6783e7b3735b9f81360da33a50aedcba12ff9ae
3
+ size 51395426
1b121b21b/global_step39672/bf16_zero_pp_rank_130_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99add5a01b34259601a64a70cf75254deed6ed028100b855348667ded7856314
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_131_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3ed135938c449f46748993f61b834a4f5dbd3fcbd1654ce38113c716cda0c9
3
+ size 51395565
1b121b21b/global_step39672/bf16_zero_pp_rank_132_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43737a0579badc97355c31847d806006321ae03112be4ada39169a87ef8e8d76
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_133_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d0ac6fed95d44a7b9eea4ef454dbef631b8e213e60b46a4efb5489aae79f5b
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_134_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4774fe4d97dc073555dc3552c79e39218e8c990ba1149744cf3d84492573150b
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_135_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3dfeb239207be5742bc301fff04a711542e19c0df721a95c7d4d730a3cd0e89
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_136_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c64bd3c4938fe4ff8a63669c929d190fec1933163ff933dacf5f252f7d5aad8
3
+ size 51395373
1b121b21b/global_step39672/bf16_zero_pp_rank_137_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef71134fcfd417959e6f86b01134bc9be67c4c95558a78ad76efced88002849a
3
+ size 51395501
1b121b21b/global_step39672/bf16_zero_pp_rank_138_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c309f359e60685165fab0fcadaf89f2b2f7dc4b203ed9e4d5da5d96033f452bf
3
+ size 51395437
1b121b21b/global_step39672/bf16_zero_pp_rank_139_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b38c1e7c59800ead5254ebf7e644fc706154a8d2b86db7b405f9ac7210b350
3
+ size 51395373