Farouk
Training in progress, step 200
6dbca0b
raw
history blame
No virus
23.4 kB
{
"best_metric": 0.662463366985321,
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_codellama_blob_1/checkpoint-200",
"epoch": 0.1404001404001404,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 1.5667,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 0.9201,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 0.6019,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 0.861,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 1.064,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 1.0007,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 1.0533,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.8498,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.9189,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.9369,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 1.1375,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.8369,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.689,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.8885,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.9666,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.744,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.8998,
"step": 17
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.8371,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.7615,
"step": 19
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.6963,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 0.5974,
"step": 21
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 1.4922,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 1.1272,
"step": 23
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.4373,
"step": 24
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 1.0598,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 1.191,
"step": 26
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.6499,
"step": 27
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.7526,
"step": 28
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.7252,
"step": 29
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.8555,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.479,
"step": 31
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.5273,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.3907,
"step": 33
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.5284,
"step": 34
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.8696,
"step": 35
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.9264,
"step": 36
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.6812,
"step": 37
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.4482,
"step": 38
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 1.9031,
"step": 39
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 1.3295,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.7498,
"step": 41
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 1.4452,
"step": 42
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 1.224,
"step": 43
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.3272,
"step": 44
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.7366,
"step": 45
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 1.0331,
"step": 46
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.8471,
"step": 47
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.8171,
"step": 48
},
{
"epoch": 0.03,
"learning_rate": 0.0004,
"loss": 0.418,
"step": 49
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.6942,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.1712,
"step": 51
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.5041,
"step": 52
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.4619,
"step": 53
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.5011,
"step": 54
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.5,
"step": 55
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.917,
"step": 56
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.8142,
"step": 57
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.8708,
"step": 58
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.7987,
"step": 59
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.9699,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 1.0015,
"step": 61
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.8052,
"step": 62
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 0.8645,
"step": 63
},
{
"epoch": 0.04,
"learning_rate": 0.0004,
"loss": 1.1444,
"step": 64
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 1.0087,
"step": 65
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.5679,
"step": 66
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.5507,
"step": 67
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 1.1764,
"step": 68
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.6332,
"step": 69
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.8759,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.8385,
"step": 71
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.7869,
"step": 72
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 1.4457,
"step": 73
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.9331,
"step": 74
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.8943,
"step": 75
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.5783,
"step": 76
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.6433,
"step": 77
},
{
"epoch": 0.05,
"learning_rate": 0.0004,
"loss": 0.7517,
"step": 78
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.5061,
"step": 79
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.7332,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 1.6546,
"step": 81
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.4723,
"step": 82
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.2814,
"step": 83
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.4068,
"step": 84
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 1.1729,
"step": 85
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.698,
"step": 86
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.7746,
"step": 87
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.5065,
"step": 88
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.4654,
"step": 89
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.5724,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 1.1535,
"step": 91
},
{
"epoch": 0.06,
"learning_rate": 0.0004,
"loss": 0.3854,
"step": 92
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 1.6848,
"step": 93
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.6667,
"step": 94
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.4526,
"step": 95
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.7112,
"step": 96
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.6816,
"step": 97
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.7496,
"step": 98
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 1.189,
"step": 99
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 1.4239,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.4378,
"step": 101
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 2.3054,
"step": 102
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.1593,
"step": 103
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.2481,
"step": 104
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.6469,
"step": 105
},
{
"epoch": 0.07,
"learning_rate": 0.0004,
"loss": 0.7417,
"step": 106
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.7767,
"step": 107
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.831,
"step": 108
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.7954,
"step": 109
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.6376,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.6208,
"step": 111
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.9038,
"step": 112
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.9994,
"step": 113
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 1.0233,
"step": 114
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.7573,
"step": 115
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.5333,
"step": 116
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.4886,
"step": 117
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.75,
"step": 118
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.6377,
"step": 119
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.6518,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 0.0004,
"loss": 0.6409,
"step": 121
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.5996,
"step": 122
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.7964,
"step": 123
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.5898,
"step": 124
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.8753,
"step": 125
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.6304,
"step": 126
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.5428,
"step": 127
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.8571,
"step": 128
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.8339,
"step": 129
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.6087,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.3547,
"step": 131
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.3494,
"step": 132
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.5394,
"step": 133
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.4315,
"step": 134
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 0.3575,
"step": 135
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.4983,
"step": 136
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.4292,
"step": 137
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.6315,
"step": 138
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.9827,
"step": 139
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 1.4456,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.5151,
"step": 141
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.4651,
"step": 142
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.8682,
"step": 143
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 1.1543,
"step": 144
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.3378,
"step": 145
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.5381,
"step": 146
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.5189,
"step": 147
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.3988,
"step": 148
},
{
"epoch": 0.1,
"learning_rate": 0.0004,
"loss": 0.9694,
"step": 149
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 1.2404,
"step": 150
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.3767,
"step": 151
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.5564,
"step": 152
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.2665,
"step": 153
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.172,
"step": 154
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.6584,
"step": 155
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.7588,
"step": 156
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.9431,
"step": 157
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.9456,
"step": 158
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.7471,
"step": 159
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.8477,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.7679,
"step": 161
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 0.8937,
"step": 162
},
{
"epoch": 0.11,
"learning_rate": 0.0004,
"loss": 1.2858,
"step": 163
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.7806,
"step": 164
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.8034,
"step": 165
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.6372,
"step": 166
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.8217,
"step": 167
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.3706,
"step": 168
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 1.0851,
"step": 169
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.6719,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 1.0366,
"step": 171
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.5603,
"step": 172
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.5651,
"step": 173
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.3804,
"step": 174
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.5946,
"step": 175
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.7649,
"step": 176
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.5035,
"step": 177
},
{
"epoch": 0.12,
"learning_rate": 0.0004,
"loss": 0.8066,
"step": 178
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.8046,
"step": 179
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.4233,
"step": 180
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.7945,
"step": 181
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.5722,
"step": 182
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.6088,
"step": 183
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.4229,
"step": 184
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.8723,
"step": 185
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.9287,
"step": 186
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.8082,
"step": 187
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.4144,
"step": 188
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.372,
"step": 189
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.5725,
"step": 190
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.3837,
"step": 191
},
{
"epoch": 0.13,
"learning_rate": 0.0004,
"loss": 0.5155,
"step": 192
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.5472,
"step": 193
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.6488,
"step": 194
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 1.4385,
"step": 195
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.3418,
"step": 196
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.8811,
"step": 197
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.8176,
"step": 198
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 0.5379,
"step": 199
},
{
"epoch": 0.14,
"learning_rate": 0.0004,
"loss": 1.0277,
"step": 200
},
{
"epoch": 0.14,
"eval_loss": 0.662463366985321,
"eval_runtime": 262.5709,
"eval_samples_per_second": 1.904,
"eval_steps_per_second": 0.952,
"step": 200
},
{
"epoch": 0.14,
"mmlu_eval_accuracy": 0.41276444254447825,
"mmlu_eval_accuracy_abstract_algebra": 0.09090909090909091,
"mmlu_eval_accuracy_anatomy": 0.42857142857142855,
"mmlu_eval_accuracy_astronomy": 0.5,
"mmlu_eval_accuracy_business_ethics": 0.2727272727272727,
"mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655,
"mmlu_eval_accuracy_college_biology": 0.3125,
"mmlu_eval_accuracy_college_chemistry": 0.375,
"mmlu_eval_accuracy_college_computer_science": 0.5454545454545454,
"mmlu_eval_accuracy_college_mathematics": 0.2727272727272727,
"mmlu_eval_accuracy_college_medicine": 0.6363636363636364,
"mmlu_eval_accuracy_college_physics": 0.2727272727272727,
"mmlu_eval_accuracy_computer_security": 0.36363636363636365,
"mmlu_eval_accuracy_conceptual_physics": 0.3076923076923077,
"mmlu_eval_accuracy_econometrics": 0.4166666666666667,
"mmlu_eval_accuracy_electrical_engineering": 0.5,
"mmlu_eval_accuracy_elementary_mathematics": 0.4146341463414634,
"mmlu_eval_accuracy_formal_logic": 0.5714285714285714,
"mmlu_eval_accuracy_global_facts": 0.2,
"mmlu_eval_accuracy_high_school_biology": 0.1875,
"mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091,
"mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666,
"mmlu_eval_accuracy_high_school_european_history": 0.4444444444444444,
"mmlu_eval_accuracy_high_school_geography": 0.5909090909090909,
"mmlu_eval_accuracy_high_school_government_and_politics": 0.5714285714285714,
"mmlu_eval_accuracy_high_school_macroeconomics": 0.32558139534883723,
"mmlu_eval_accuracy_high_school_mathematics": 0.4482758620689655,
"mmlu_eval_accuracy_high_school_microeconomics": 0.5714285714285714,
"mmlu_loss": 0.7435930466353893,
"step": 200
}
],
"max_steps": 30000,
"num_train_epochs": 22,
"total_flos": 3.977831564987597e+16,
"trial_name": null,
"trial_params": null
}