byt5_re_1k / trainer_state.json
Alexziyu's picture
1
4c9a1e6
{
"best_metric": 1.4208089113235474,
"best_model_checkpoint": "AlexWang99/byt5_re_1k/checkpoint-362",
"epoch": 181.0,
"eval_steps": 500,
"global_step": 362,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 7.775562286376953,
"eval_runtime": 10.6848,
"eval_samples_per_second": 935.909,
"eval_steps_per_second": 1.217,
"step": 2
},
{
"epoch": 2.0,
"eval_loss": 6.888936519622803,
"eval_runtime": 10.8293,
"eval_samples_per_second": 923.42,
"eval_steps_per_second": 1.2,
"step": 4
},
{
"epoch": 3.0,
"eval_loss": 6.1838483810424805,
"eval_runtime": 10.6887,
"eval_samples_per_second": 935.569,
"eval_steps_per_second": 1.216,
"step": 6
},
{
"epoch": 4.0,
"eval_loss": 5.626400947570801,
"eval_runtime": 10.9681,
"eval_samples_per_second": 911.734,
"eval_steps_per_second": 1.185,
"step": 8
},
{
"epoch": 5.0,
"eval_loss": 5.153331279754639,
"eval_runtime": 10.7369,
"eval_samples_per_second": 931.367,
"eval_steps_per_second": 1.211,
"step": 10
},
{
"epoch": 6.0,
"eval_loss": 4.734142780303955,
"eval_runtime": 11.004,
"eval_samples_per_second": 908.762,
"eval_steps_per_second": 1.181,
"step": 12
},
{
"epoch": 7.0,
"eval_loss": 4.333576679229736,
"eval_runtime": 10.7676,
"eval_samples_per_second": 928.708,
"eval_steps_per_second": 1.207,
"step": 14
},
{
"epoch": 8.0,
"eval_loss": 3.938502788543701,
"eval_runtime": 10.782,
"eval_samples_per_second": 927.476,
"eval_steps_per_second": 1.206,
"step": 16
},
{
"epoch": 9.0,
"eval_loss": 3.557016134262085,
"eval_runtime": 11.022,
"eval_samples_per_second": 907.278,
"eval_steps_per_second": 1.179,
"step": 18
},
{
"epoch": 10.0,
"eval_loss": 3.2168831825256348,
"eval_runtime": 10.7843,
"eval_samples_per_second": 927.273,
"eval_steps_per_second": 1.205,
"step": 20
},
{
"epoch": 11.0,
"eval_loss": 2.9309566020965576,
"eval_runtime": 10.8755,
"eval_samples_per_second": 919.501,
"eval_steps_per_second": 1.195,
"step": 22
},
{
"epoch": 12.0,
"eval_loss": 2.7042219638824463,
"eval_runtime": 10.7967,
"eval_samples_per_second": 926.21,
"eval_steps_per_second": 1.204,
"step": 24
},
{
"epoch": 13.0,
"eval_loss": 2.5138416290283203,
"eval_runtime": 10.8666,
"eval_samples_per_second": 920.248,
"eval_steps_per_second": 1.196,
"step": 26
},
{
"epoch": 14.0,
"eval_loss": 2.3652985095977783,
"eval_runtime": 10.7919,
"eval_samples_per_second": 926.619,
"eval_steps_per_second": 1.205,
"step": 28
},
{
"epoch": 15.0,
"eval_loss": 2.248030185699463,
"eval_runtime": 10.8705,
"eval_samples_per_second": 919.923,
"eval_steps_per_second": 1.196,
"step": 30
},
{
"epoch": 16.0,
"eval_loss": 2.13440203666687,
"eval_runtime": 10.7977,
"eval_samples_per_second": 926.12,
"eval_steps_per_second": 1.204,
"step": 32
},
{
"epoch": 17.0,
"eval_loss": 2.0899171829223633,
"eval_runtime": 10.7912,
"eval_samples_per_second": 926.685,
"eval_steps_per_second": 1.205,
"step": 34
},
{
"epoch": 18.0,
"eval_loss": 2.100477457046509,
"eval_runtime": 10.8653,
"eval_samples_per_second": 920.357,
"eval_steps_per_second": 1.196,
"step": 36
},
{
"epoch": 19.0,
"eval_loss": 2.047548532485962,
"eval_runtime": 10.7885,
"eval_samples_per_second": 926.915,
"eval_steps_per_second": 1.205,
"step": 38
},
{
"epoch": 20.0,
"eval_loss": 1.996556043624878,
"eval_runtime": 11.0201,
"eval_samples_per_second": 907.429,
"eval_steps_per_second": 1.18,
"step": 40
},
{
"epoch": 21.0,
"eval_loss": 1.9695481061935425,
"eval_runtime": 10.783,
"eval_samples_per_second": 927.385,
"eval_steps_per_second": 1.206,
"step": 42
},
{
"epoch": 22.0,
"eval_loss": 1.9760867357254028,
"eval_runtime": 10.8889,
"eval_samples_per_second": 918.37,
"eval_steps_per_second": 1.194,
"step": 44
},
{
"epoch": 23.0,
"eval_loss": 1.9658013582229614,
"eval_runtime": 10.7879,
"eval_samples_per_second": 926.962,
"eval_steps_per_second": 1.205,
"step": 46
},
{
"epoch": 24.0,
"eval_loss": 1.9119044542312622,
"eval_runtime": 10.8618,
"eval_samples_per_second": 920.659,
"eval_steps_per_second": 1.197,
"step": 48
},
{
"epoch": 25.0,
"eval_loss": 1.89186429977417,
"eval_runtime": 10.7933,
"eval_samples_per_second": 926.503,
"eval_steps_per_second": 1.204,
"step": 50
},
{
"epoch": 26.0,
"eval_loss": 1.8932195901870728,
"eval_runtime": 10.873,
"eval_samples_per_second": 919.709,
"eval_steps_per_second": 1.196,
"step": 52
},
{
"epoch": 27.0,
"eval_loss": 1.9076615571975708,
"eval_runtime": 10.7917,
"eval_samples_per_second": 926.637,
"eval_steps_per_second": 1.205,
"step": 54
},
{
"epoch": 28.0,
"eval_loss": 1.8671808242797852,
"eval_runtime": 10.7945,
"eval_samples_per_second": 926.402,
"eval_steps_per_second": 1.204,
"step": 56
},
{
"epoch": 29.0,
"eval_loss": 1.8664835691452026,
"eval_runtime": 10.8739,
"eval_samples_per_second": 919.632,
"eval_steps_per_second": 1.196,
"step": 58
},
{
"epoch": 30.0,
"eval_loss": 1.8679856061935425,
"eval_runtime": 10.7966,
"eval_samples_per_second": 926.219,
"eval_steps_per_second": 1.204,
"step": 60
},
{
"epoch": 31.0,
"eval_loss": 1.858332633972168,
"eval_runtime": 10.8742,
"eval_samples_per_second": 919.609,
"eval_steps_per_second": 1.195,
"step": 62
},
{
"epoch": 32.0,
"eval_loss": 1.8586550951004028,
"eval_runtime": 10.7775,
"eval_samples_per_second": 927.859,
"eval_steps_per_second": 1.206,
"step": 64
},
{
"epoch": 33.0,
"eval_loss": 1.836769700050354,
"eval_runtime": 10.8615,
"eval_samples_per_second": 920.686,
"eval_steps_per_second": 1.197,
"step": 66
},
{
"epoch": 34.0,
"eval_loss": 1.8279472589492798,
"eval_runtime": 10.7787,
"eval_samples_per_second": 927.754,
"eval_steps_per_second": 1.206,
"step": 68
},
{
"epoch": 35.0,
"eval_loss": 1.817927360534668,
"eval_runtime": 10.7813,
"eval_samples_per_second": 927.534,
"eval_steps_per_second": 1.206,
"step": 70
},
{
"epoch": 36.0,
"eval_loss": 1.7950810194015503,
"eval_runtime": 11.0203,
"eval_samples_per_second": 907.419,
"eval_steps_per_second": 1.18,
"step": 72
},
{
"epoch": 37.0,
"eval_loss": 1.7667052745819092,
"eval_runtime": 10.7915,
"eval_samples_per_second": 926.655,
"eval_steps_per_second": 1.205,
"step": 74
},
{
"epoch": 38.0,
"eval_loss": 1.7852015495300293,
"eval_runtime": 10.8703,
"eval_samples_per_second": 919.941,
"eval_steps_per_second": 1.196,
"step": 76
},
{
"epoch": 39.0,
"eval_loss": 1.7741429805755615,
"eval_runtime": 10.7837,
"eval_samples_per_second": 927.326,
"eval_steps_per_second": 1.206,
"step": 78
},
{
"epoch": 40.0,
"eval_loss": 1.718443512916565,
"eval_runtime": 10.8729,
"eval_samples_per_second": 919.72,
"eval_steps_per_second": 1.196,
"step": 80
},
{
"epoch": 41.0,
"eval_loss": 1.6899281740188599,
"eval_runtime": 10.789,
"eval_samples_per_second": 926.869,
"eval_steps_per_second": 1.205,
"step": 82
},
{
"epoch": 42.0,
"eval_loss": 1.6984148025512695,
"eval_runtime": 10.8669,
"eval_samples_per_second": 920.229,
"eval_steps_per_second": 1.196,
"step": 84
},
{
"epoch": 43.0,
"eval_loss": 1.68986177444458,
"eval_runtime": 10.7878,
"eval_samples_per_second": 926.976,
"eval_steps_per_second": 1.205,
"step": 86
},
{
"epoch": 44.0,
"eval_loss": 1.643083930015564,
"eval_runtime": 10.7947,
"eval_samples_per_second": 926.384,
"eval_steps_per_second": 1.204,
"step": 88
},
{
"epoch": 45.0,
"eval_loss": 1.6373401880264282,
"eval_runtime": 10.8675,
"eval_samples_per_second": 920.171,
"eval_steps_per_second": 1.196,
"step": 90
},
{
"epoch": 46.0,
"eval_loss": 1.6289523839950562,
"eval_runtime": 10.7853,
"eval_samples_per_second": 927.192,
"eval_steps_per_second": 1.205,
"step": 92
},
{
"epoch": 47.0,
"eval_loss": 1.6165605783462524,
"eval_runtime": 11.0285,
"eval_samples_per_second": 906.741,
"eval_steps_per_second": 1.179,
"step": 94
},
{
"epoch": 48.0,
"eval_loss": 1.6076592206954956,
"eval_runtime": 10.7945,
"eval_samples_per_second": 926.401,
"eval_steps_per_second": 1.204,
"step": 96
},
{
"epoch": 49.0,
"eval_loss": 1.604373574256897,
"eval_runtime": 10.8717,
"eval_samples_per_second": 919.822,
"eval_steps_per_second": 1.196,
"step": 98
},
{
"epoch": 50.0,
"eval_loss": 1.616941213607788,
"eval_runtime": 10.793,
"eval_samples_per_second": 926.53,
"eval_steps_per_second": 1.204,
"step": 100
},
{
"epoch": 51.0,
"eval_loss": 1.6135770082473755,
"eval_runtime": 11.0422,
"eval_samples_per_second": 905.614,
"eval_steps_per_second": 1.177,
"step": 102
},
{
"epoch": 52.0,
"eval_loss": 1.584197759628296,
"eval_runtime": 10.7862,
"eval_samples_per_second": 927.109,
"eval_steps_per_second": 1.205,
"step": 104
},
{
"epoch": 53.0,
"eval_loss": 1.5821086168289185,
"eval_runtime": 11.0219,
"eval_samples_per_second": 907.286,
"eval_steps_per_second": 1.179,
"step": 106
},
{
"epoch": 54.0,
"eval_loss": 1.592121958732605,
"eval_runtime": 10.7783,
"eval_samples_per_second": 927.787,
"eval_steps_per_second": 1.206,
"step": 108
},
{
"epoch": 55.0,
"eval_loss": 1.598443865776062,
"eval_runtime": 11.0215,
"eval_samples_per_second": 907.318,
"eval_steps_per_second": 1.18,
"step": 110
},
{
"epoch": 56.0,
"eval_loss": 1.562904715538025,
"eval_runtime": 10.8689,
"eval_samples_per_second": 920.053,
"eval_steps_per_second": 1.196,
"step": 112
},
{
"epoch": 57.0,
"eval_loss": 1.556736707687378,
"eval_runtime": 10.7877,
"eval_samples_per_second": 926.98,
"eval_steps_per_second": 1.205,
"step": 114
},
{
"epoch": 58.0,
"eval_loss": 1.554081916809082,
"eval_runtime": 10.8724,
"eval_samples_per_second": 919.757,
"eval_steps_per_second": 1.196,
"step": 116
},
{
"epoch": 59.0,
"eval_loss": 1.5780812501907349,
"eval_runtime": 10.7882,
"eval_samples_per_second": 926.94,
"eval_steps_per_second": 1.205,
"step": 118
},
{
"epoch": 60.0,
"eval_loss": 1.5569387674331665,
"eval_runtime": 10.878,
"eval_samples_per_second": 919.289,
"eval_steps_per_second": 1.195,
"step": 120
},
{
"epoch": 61.0,
"eval_loss": 1.5472592115402222,
"eval_runtime": 10.797,
"eval_samples_per_second": 926.18,
"eval_steps_per_second": 1.204,
"step": 122
},
{
"epoch": 62.0,
"eval_loss": 1.5447337627410889,
"eval_runtime": 10.7871,
"eval_samples_per_second": 927.031,
"eval_steps_per_second": 1.205,
"step": 124
},
{
"epoch": 63.0,
"eval_loss": 1.5714285373687744,
"eval_runtime": 11.0266,
"eval_samples_per_second": 906.896,
"eval_steps_per_second": 1.179,
"step": 126
},
{
"epoch": 64.0,
"eval_loss": 1.572940468788147,
"eval_runtime": 10.7931,
"eval_samples_per_second": 926.518,
"eval_steps_per_second": 1.204,
"step": 128
},
{
"epoch": 65.0,
"eval_loss": 1.5434839725494385,
"eval_runtime": 10.8657,
"eval_samples_per_second": 920.328,
"eval_steps_per_second": 1.196,
"step": 130
},
{
"epoch": 66.0,
"eval_loss": 1.5406599044799805,
"eval_runtime": 10.7896,
"eval_samples_per_second": 926.816,
"eval_steps_per_second": 1.205,
"step": 132
},
{
"epoch": 67.0,
"eval_loss": 1.5430865287780762,
"eval_runtime": 10.868,
"eval_samples_per_second": 920.137,
"eval_steps_per_second": 1.196,
"step": 134
},
{
"epoch": 68.0,
"eval_loss": 1.5774503946304321,
"eval_runtime": 10.7888,
"eval_samples_per_second": 926.889,
"eval_steps_per_second": 1.205,
"step": 136
},
{
"epoch": 69.0,
"eval_loss": 1.5680021047592163,
"eval_runtime": 10.8677,
"eval_samples_per_second": 920.161,
"eval_steps_per_second": 1.196,
"step": 138
},
{
"epoch": 70.0,
"eval_loss": 1.5386120080947876,
"eval_runtime": 10.7802,
"eval_samples_per_second": 927.629,
"eval_steps_per_second": 1.206,
"step": 140
},
{
"epoch": 71.0,
"eval_loss": 1.537217378616333,
"eval_runtime": 10.7876,
"eval_samples_per_second": 926.994,
"eval_steps_per_second": 1.205,
"step": 142
},
{
"epoch": 72.0,
"eval_loss": 1.5415935516357422,
"eval_runtime": 10.8784,
"eval_samples_per_second": 919.255,
"eval_steps_per_second": 1.195,
"step": 144
},
{
"epoch": 73.0,
"eval_loss": 1.5851819515228271,
"eval_runtime": 10.7879,
"eval_samples_per_second": 926.962,
"eval_steps_per_second": 1.205,
"step": 146
},
{
"epoch": 74.0,
"eval_loss": 1.5615135431289673,
"eval_runtime": 11.0229,
"eval_samples_per_second": 907.205,
"eval_steps_per_second": 1.179,
"step": 148
},
{
"epoch": 75.0,
"eval_loss": 1.5356107950210571,
"eval_runtime": 10.7892,
"eval_samples_per_second": 926.852,
"eval_steps_per_second": 1.205,
"step": 150
},
{
"epoch": 76.0,
"eval_loss": 1.5345250368118286,
"eval_runtime": 10.8628,
"eval_samples_per_second": 920.574,
"eval_steps_per_second": 1.197,
"step": 152
},
{
"epoch": 77.0,
"eval_loss": 1.5348494052886963,
"eval_runtime": 10.7982,
"eval_samples_per_second": 926.08,
"eval_steps_per_second": 1.204,
"step": 154
},
{
"epoch": 78.0,
"eval_loss": 1.5350173711776733,
"eval_runtime": 11.0234,
"eval_samples_per_second": 907.165,
"eval_steps_per_second": 1.179,
"step": 156
},
{
"epoch": 79.0,
"eval_loss": 1.5344058275222778,
"eval_runtime": 10.7938,
"eval_samples_per_second": 926.459,
"eval_steps_per_second": 1.204,
"step": 158
},
{
"epoch": 80.0,
"eval_loss": 1.5319613218307495,
"eval_runtime": 11.0103,
"eval_samples_per_second": 908.24,
"eval_steps_per_second": 1.181,
"step": 160
},
{
"epoch": 81.0,
"eval_loss": 1.5311990976333618,
"eval_runtime": 10.7993,
"eval_samples_per_second": 925.99,
"eval_steps_per_second": 1.204,
"step": 162
},
{
"epoch": 82.0,
"eval_loss": 1.5308246612548828,
"eval_runtime": 11.0279,
"eval_samples_per_second": 906.787,
"eval_steps_per_second": 1.179,
"step": 164
},
{
"epoch": 83.0,
"eval_loss": 1.530852198600769,
"eval_runtime": 10.8812,
"eval_samples_per_second": 919.019,
"eval_steps_per_second": 1.195,
"step": 166
},
{
"epoch": 84.0,
"eval_loss": 1.528510332107544,
"eval_runtime": 10.7908,
"eval_samples_per_second": 926.72,
"eval_steps_per_second": 1.205,
"step": 168
},
{
"epoch": 85.0,
"eval_loss": 1.5265289545059204,
"eval_runtime": 10.8831,
"eval_samples_per_second": 918.858,
"eval_steps_per_second": 1.195,
"step": 170
},
{
"epoch": 86.0,
"eval_loss": 1.525810956954956,
"eval_runtime": 10.7922,
"eval_samples_per_second": 926.595,
"eval_steps_per_second": 1.205,
"step": 172
},
{
"epoch": 87.0,
"eval_loss": 1.5260074138641357,
"eval_runtime": 10.8658,
"eval_samples_per_second": 920.317,
"eval_steps_per_second": 1.196,
"step": 174
},
{
"epoch": 88.0,
"eval_loss": 1.523621678352356,
"eval_runtime": 10.8062,
"eval_samples_per_second": 925.398,
"eval_steps_per_second": 1.203,
"step": 176
},
{
"epoch": 89.0,
"eval_loss": 1.522126317024231,
"eval_runtime": 10.7844,
"eval_samples_per_second": 927.263,
"eval_steps_per_second": 1.205,
"step": 178
},
{
"epoch": 90.0,
"eval_loss": 1.5219790935516357,
"eval_runtime": 10.8694,
"eval_samples_per_second": 920.018,
"eval_steps_per_second": 1.196,
"step": 180
},
{
"epoch": 91.0,
"eval_loss": 1.5207847356796265,
"eval_runtime": 10.7979,
"eval_samples_per_second": 926.103,
"eval_steps_per_second": 1.204,
"step": 182
},
{
"epoch": 92.0,
"eval_loss": 1.517883062362671,
"eval_runtime": 10.8783,
"eval_samples_per_second": 919.261,
"eval_steps_per_second": 1.195,
"step": 184
},
{
"epoch": 93.0,
"eval_loss": 1.515496015548706,
"eval_runtime": 10.783,
"eval_samples_per_second": 927.387,
"eval_steps_per_second": 1.206,
"step": 186
},
{
"epoch": 94.0,
"eval_loss": 1.5140419006347656,
"eval_runtime": 11.0298,
"eval_samples_per_second": 906.635,
"eval_steps_per_second": 1.179,
"step": 188
},
{
"epoch": 95.0,
"eval_loss": 1.5137407779693604,
"eval_runtime": 10.8014,
"eval_samples_per_second": 925.809,
"eval_steps_per_second": 1.204,
"step": 190
},
{
"epoch": 96.0,
"eval_loss": 1.5128830671310425,
"eval_runtime": 10.8766,
"eval_samples_per_second": 919.402,
"eval_steps_per_second": 1.195,
"step": 192
},
{
"epoch": 97.0,
"eval_loss": 1.5099443197250366,
"eval_runtime": 10.809,
"eval_samples_per_second": 925.158,
"eval_steps_per_second": 1.203,
"step": 194
},
{
"epoch": 98.0,
"eval_loss": 1.506999135017395,
"eval_runtime": 10.9785,
"eval_samples_per_second": 910.872,
"eval_steps_per_second": 1.184,
"step": 196
},
{
"epoch": 99.0,
"eval_loss": 1.5044018030166626,
"eval_runtime": 10.883,
"eval_samples_per_second": 918.868,
"eval_steps_per_second": 1.195,
"step": 198
},
{
"epoch": 100.0,
"eval_loss": 1.5024514198303223,
"eval_runtime": 10.7906,
"eval_samples_per_second": 926.73,
"eval_steps_per_second": 1.205,
"step": 200
},
{
"epoch": 101.0,
"eval_loss": 1.501634120941162,
"eval_runtime": 10.7793,
"eval_samples_per_second": 927.703,
"eval_steps_per_second": 1.206,
"step": 202
},
{
"epoch": 102.0,
"eval_loss": 1.5009431838989258,
"eval_runtime": 10.7085,
"eval_samples_per_second": 933.839,
"eval_steps_per_second": 1.214,
"step": 204
},
{
"epoch": 103.0,
"eval_loss": 1.499057650566101,
"eval_runtime": 10.6874,
"eval_samples_per_second": 935.68,
"eval_steps_per_second": 1.216,
"step": 206
},
{
"epoch": 104.0,
"eval_loss": 1.4963877201080322,
"eval_runtime": 10.6752,
"eval_samples_per_second": 936.753,
"eval_steps_per_second": 1.218,
"step": 208
},
{
"epoch": 105.0,
"eval_loss": 1.4944827556610107,
"eval_runtime": 10.6755,
"eval_samples_per_second": 936.725,
"eval_steps_per_second": 1.218,
"step": 210
},
{
"epoch": 106.0,
"eval_loss": 1.4929250478744507,
"eval_runtime": 10.682,
"eval_samples_per_second": 936.158,
"eval_steps_per_second": 1.217,
"step": 212
},
{
"epoch": 107.0,
"eval_loss": 1.4915369749069214,
"eval_runtime": 10.6838,
"eval_samples_per_second": 935.995,
"eval_steps_per_second": 1.217,
"step": 214
},
{
"epoch": 108.0,
"eval_loss": 1.49076509475708,
"eval_runtime": 10.6769,
"eval_samples_per_second": 936.603,
"eval_steps_per_second": 1.218,
"step": 216
},
{
"epoch": 109.0,
"eval_loss": 1.4891281127929688,
"eval_runtime": 10.6778,
"eval_samples_per_second": 936.521,
"eval_steps_per_second": 1.217,
"step": 218
},
{
"epoch": 110.0,
"eval_loss": 1.486264944076538,
"eval_runtime": 10.67,
"eval_samples_per_second": 937.205,
"eval_steps_per_second": 1.218,
"step": 220
},
{
"epoch": 111.0,
"eval_loss": 1.481858730316162,
"eval_runtime": 10.6684,
"eval_samples_per_second": 937.351,
"eval_steps_per_second": 1.219,
"step": 222
},
{
"epoch": 112.0,
"eval_loss": 1.4790288209915161,
"eval_runtime": 10.6764,
"eval_samples_per_second": 936.646,
"eval_steps_per_second": 1.218,
"step": 224
},
{
"epoch": 113.0,
"eval_loss": 1.477858066558838,
"eval_runtime": 10.672,
"eval_samples_per_second": 937.028,
"eval_steps_per_second": 1.218,
"step": 226
},
{
"epoch": 114.0,
"eval_loss": 1.477932095527649,
"eval_runtime": 10.6779,
"eval_samples_per_second": 936.51,
"eval_steps_per_second": 1.217,
"step": 228
},
{
"epoch": 115.0,
"eval_loss": 1.4761333465576172,
"eval_runtime": 10.6829,
"eval_samples_per_second": 936.078,
"eval_steps_per_second": 1.217,
"step": 230
},
{
"epoch": 116.0,
"eval_loss": 1.472651481628418,
"eval_runtime": 10.6687,
"eval_samples_per_second": 937.324,
"eval_steps_per_second": 1.219,
"step": 232
},
{
"epoch": 117.0,
"eval_loss": 1.4697270393371582,
"eval_runtime": 10.6649,
"eval_samples_per_second": 937.657,
"eval_steps_per_second": 1.219,
"step": 234
},
{
"epoch": 118.0,
"eval_loss": 1.4677116870880127,
"eval_runtime": 10.6723,
"eval_samples_per_second": 937.001,
"eval_steps_per_second": 1.218,
"step": 236
},
{
"epoch": 119.0,
"eval_loss": 1.4658899307250977,
"eval_runtime": 10.6745,
"eval_samples_per_second": 936.811,
"eval_steps_per_second": 1.218,
"step": 238
},
{
"epoch": 120.0,
"eval_loss": 1.4640589952468872,
"eval_runtime": 10.6687,
"eval_samples_per_second": 937.323,
"eval_steps_per_second": 1.219,
"step": 240
},
{
"epoch": 121.0,
"eval_loss": 1.4623686075210571,
"eval_runtime": 10.6735,
"eval_samples_per_second": 936.896,
"eval_steps_per_second": 1.218,
"step": 242
},
{
"epoch": 122.0,
"eval_loss": 1.4619512557983398,
"eval_runtime": 10.6694,
"eval_samples_per_second": 937.256,
"eval_steps_per_second": 1.218,
"step": 244
},
{
"epoch": 123.0,
"eval_loss": 1.4633115530014038,
"eval_runtime": 10.6685,
"eval_samples_per_second": 937.343,
"eval_steps_per_second": 1.219,
"step": 246
},
{
"epoch": 124.0,
"eval_loss": 1.464633584022522,
"eval_runtime": 10.6778,
"eval_samples_per_second": 936.522,
"eval_steps_per_second": 1.217,
"step": 248
},
{
"epoch": 125.0,
"eval_loss": 1.464717984199524,
"eval_runtime": 10.7082,
"eval_samples_per_second": 933.861,
"eval_steps_per_second": 1.214,
"step": 250
},
{
"epoch": 126.0,
"eval_loss": 1.4619494676589966,
"eval_runtime": 10.6743,
"eval_samples_per_second": 936.83,
"eval_steps_per_second": 1.218,
"step": 252
},
{
"epoch": 127.0,
"eval_loss": 1.4600682258605957,
"eval_runtime": 10.6839,
"eval_samples_per_second": 935.989,
"eval_steps_per_second": 1.217,
"step": 254
},
{
"epoch": 128.0,
"eval_loss": 1.460466980934143,
"eval_runtime": 10.6674,
"eval_samples_per_second": 937.434,
"eval_steps_per_second": 1.219,
"step": 256
},
{
"epoch": 129.0,
"eval_loss": 1.4635497331619263,
"eval_runtime": 10.685,
"eval_samples_per_second": 935.888,
"eval_steps_per_second": 1.217,
"step": 258
},
{
"epoch": 130.0,
"eval_loss": 1.4629032611846924,
"eval_runtime": 10.6911,
"eval_samples_per_second": 935.356,
"eval_steps_per_second": 1.216,
"step": 260
},
{
"epoch": 131.0,
"eval_loss": 1.4596558809280396,
"eval_runtime": 10.6919,
"eval_samples_per_second": 935.291,
"eval_steps_per_second": 1.216,
"step": 262
},
{
"epoch": 132.0,
"eval_loss": 1.4560246467590332,
"eval_runtime": 10.6769,
"eval_samples_per_second": 936.603,
"eval_steps_per_second": 1.218,
"step": 264
},
{
"epoch": 133.0,
"eval_loss": 1.453616738319397,
"eval_runtime": 10.6767,
"eval_samples_per_second": 936.622,
"eval_steps_per_second": 1.218,
"step": 266
},
{
"epoch": 134.0,
"eval_loss": 1.4538458585739136,
"eval_runtime": 10.6773,
"eval_samples_per_second": 936.563,
"eval_steps_per_second": 1.218,
"step": 268
},
{
"epoch": 135.0,
"eval_loss": 1.4554544687271118,
"eval_runtime": 10.6749,
"eval_samples_per_second": 936.779,
"eval_steps_per_second": 1.218,
"step": 270
},
{
"epoch": 136.0,
"eval_loss": 1.458116054534912,
"eval_runtime": 10.6729,
"eval_samples_per_second": 936.95,
"eval_steps_per_second": 1.218,
"step": 272
},
{
"epoch": 137.0,
"eval_loss": 1.458404541015625,
"eval_runtime": 10.6614,
"eval_samples_per_second": 937.965,
"eval_steps_per_second": 1.219,
"step": 274
},
{
"epoch": 138.0,
"eval_loss": 1.4573506116867065,
"eval_runtime": 10.6734,
"eval_samples_per_second": 936.913,
"eval_steps_per_second": 1.218,
"step": 276
},
{
"epoch": 139.0,
"eval_loss": 1.4543631076812744,
"eval_runtime": 10.6672,
"eval_samples_per_second": 937.452,
"eval_steps_per_second": 1.219,
"step": 278
},
{
"epoch": 140.0,
"eval_loss": 1.4508144855499268,
"eval_runtime": 10.6794,
"eval_samples_per_second": 936.382,
"eval_steps_per_second": 1.217,
"step": 280
},
{
"epoch": 141.0,
"eval_loss": 1.447538137435913,
"eval_runtime": 10.6758,
"eval_samples_per_second": 936.702,
"eval_steps_per_second": 1.218,
"step": 282
},
{
"epoch": 142.0,
"eval_loss": 1.4447238445281982,
"eval_runtime": 10.6659,
"eval_samples_per_second": 937.568,
"eval_steps_per_second": 1.219,
"step": 284
},
{
"epoch": 143.0,
"eval_loss": 1.4447556734085083,
"eval_runtime": 10.6716,
"eval_samples_per_second": 937.067,
"eval_steps_per_second": 1.218,
"step": 286
},
{
"epoch": 144.0,
"eval_loss": 1.4442577362060547,
"eval_runtime": 10.676,
"eval_samples_per_second": 936.684,
"eval_steps_per_second": 1.218,
"step": 288
},
{
"epoch": 145.0,
"eval_loss": 1.4424697160720825,
"eval_runtime": 10.668,
"eval_samples_per_second": 937.381,
"eval_steps_per_second": 1.219,
"step": 290
},
{
"epoch": 146.0,
"eval_loss": 1.4408490657806396,
"eval_runtime": 10.6762,
"eval_samples_per_second": 936.664,
"eval_steps_per_second": 1.218,
"step": 292
},
{
"epoch": 147.0,
"eval_loss": 1.4410812854766846,
"eval_runtime": 10.6701,
"eval_samples_per_second": 937.198,
"eval_steps_per_second": 1.218,
"step": 294
},
{
"epoch": 148.0,
"eval_loss": 1.4422898292541504,
"eval_runtime": 10.7048,
"eval_samples_per_second": 934.163,
"eval_steps_per_second": 1.214,
"step": 296
},
{
"epoch": 149.0,
"eval_loss": 1.4430429935455322,
"eval_runtime": 10.7029,
"eval_samples_per_second": 934.33,
"eval_steps_per_second": 1.215,
"step": 298
},
{
"epoch": 150.0,
"eval_loss": 1.4431097507476807,
"eval_runtime": 10.701,
"eval_samples_per_second": 934.49,
"eval_steps_per_second": 1.215,
"step": 300
},
{
"epoch": 151.0,
"eval_loss": 1.4416182041168213,
"eval_runtime": 10.7066,
"eval_samples_per_second": 934.002,
"eval_steps_per_second": 1.214,
"step": 302
},
{
"epoch": 152.0,
"eval_loss": 1.4399303197860718,
"eval_runtime": 10.6813,
"eval_samples_per_second": 936.212,
"eval_steps_per_second": 1.217,
"step": 304
},
{
"epoch": 153.0,
"eval_loss": 1.437761664390564,
"eval_runtime": 10.6766,
"eval_samples_per_second": 936.628,
"eval_steps_per_second": 1.218,
"step": 306
},
{
"epoch": 154.0,
"eval_loss": 1.436241865158081,
"eval_runtime": 10.6783,
"eval_samples_per_second": 936.479,
"eval_steps_per_second": 1.217,
"step": 308
},
{
"epoch": 155.0,
"eval_loss": 1.4360020160675049,
"eval_runtime": 10.7009,
"eval_samples_per_second": 934.499,
"eval_steps_per_second": 1.215,
"step": 310
},
{
"epoch": 156.0,
"eval_loss": 1.4338910579681396,
"eval_runtime": 10.6868,
"eval_samples_per_second": 935.733,
"eval_steps_per_second": 1.216,
"step": 312
},
{
"epoch": 157.0,
"eval_loss": 1.4325634241104126,
"eval_runtime": 10.6777,
"eval_samples_per_second": 936.527,
"eval_steps_per_second": 1.217,
"step": 314
},
{
"epoch": 158.0,
"eval_loss": 1.4329997301101685,
"eval_runtime": 10.6692,
"eval_samples_per_second": 937.275,
"eval_steps_per_second": 1.218,
"step": 316
},
{
"epoch": 159.0,
"eval_loss": 1.4324475526809692,
"eval_runtime": 10.6817,
"eval_samples_per_second": 936.178,
"eval_steps_per_second": 1.217,
"step": 318
},
{
"epoch": 160.0,
"eval_loss": 1.4320900440216064,
"eval_runtime": 10.6759,
"eval_samples_per_second": 936.685,
"eval_steps_per_second": 1.218,
"step": 320
},
{
"epoch": 161.0,
"eval_loss": 1.4325189590454102,
"eval_runtime": 10.6696,
"eval_samples_per_second": 937.244,
"eval_steps_per_second": 1.218,
"step": 322
},
{
"epoch": 162.0,
"eval_loss": 1.4319082498550415,
"eval_runtime": 10.6597,
"eval_samples_per_second": 938.116,
"eval_steps_per_second": 1.22,
"step": 324
},
{
"epoch": 163.0,
"eval_loss": 1.432405710220337,
"eval_runtime": 10.6632,
"eval_samples_per_second": 937.807,
"eval_steps_per_second": 1.219,
"step": 326
},
{
"epoch": 164.0,
"eval_loss": 1.4340990781784058,
"eval_runtime": 10.6793,
"eval_samples_per_second": 936.393,
"eval_steps_per_second": 1.217,
"step": 328
},
{
"epoch": 165.0,
"eval_loss": 1.4348700046539307,
"eval_runtime": 10.6745,
"eval_samples_per_second": 936.81,
"eval_steps_per_second": 1.218,
"step": 330
},
{
"epoch": 166.0,
"eval_loss": 1.4340572357177734,
"eval_runtime": 10.6758,
"eval_samples_per_second": 936.694,
"eval_steps_per_second": 1.218,
"step": 332
},
{
"epoch": 167.0,
"eval_loss": 1.432230830192566,
"eval_runtime": 10.6679,
"eval_samples_per_second": 937.39,
"eval_steps_per_second": 1.219,
"step": 334
},
{
"epoch": 168.0,
"eval_loss": 1.4290224313735962,
"eval_runtime": 10.6717,
"eval_samples_per_second": 937.056,
"eval_steps_per_second": 1.218,
"step": 336
},
{
"epoch": 169.0,
"eval_loss": 1.4260591268539429,
"eval_runtime": 10.6646,
"eval_samples_per_second": 937.683,
"eval_steps_per_second": 1.219,
"step": 338
},
{
"epoch": 170.0,
"eval_loss": 1.4241833686828613,
"eval_runtime": 10.6743,
"eval_samples_per_second": 936.827,
"eval_steps_per_second": 1.218,
"step": 340
},
{
"epoch": 171.0,
"eval_loss": 1.4232484102249146,
"eval_runtime": 10.6668,
"eval_samples_per_second": 937.489,
"eval_steps_per_second": 1.219,
"step": 342
},
{
"epoch": 172.0,
"eval_loss": 1.4233402013778687,
"eval_runtime": 10.674,
"eval_samples_per_second": 936.859,
"eval_steps_per_second": 1.218,
"step": 344
},
{
"epoch": 173.0,
"eval_loss": 1.4248952865600586,
"eval_runtime": 10.6819,
"eval_samples_per_second": 936.165,
"eval_steps_per_second": 1.217,
"step": 346
},
{
"epoch": 174.0,
"eval_loss": 1.4270906448364258,
"eval_runtime": 10.6759,
"eval_samples_per_second": 936.692,
"eval_steps_per_second": 1.218,
"step": 348
},
{
"epoch": 175.0,
"eval_loss": 1.4285681247711182,
"eval_runtime": 10.6797,
"eval_samples_per_second": 936.356,
"eval_steps_per_second": 1.217,
"step": 350
},
{
"epoch": 176.0,
"eval_loss": 1.4294575452804565,
"eval_runtime": 10.6749,
"eval_samples_per_second": 936.78,
"eval_steps_per_second": 1.218,
"step": 352
},
{
"epoch": 177.0,
"eval_loss": 1.428400993347168,
"eval_runtime": 10.6621,
"eval_samples_per_second": 937.902,
"eval_steps_per_second": 1.219,
"step": 354
},
{
"epoch": 178.0,
"eval_loss": 1.4263620376586914,
"eval_runtime": 10.6806,
"eval_samples_per_second": 936.281,
"eval_steps_per_second": 1.217,
"step": 356
},
{
"epoch": 179.0,
"eval_loss": 1.4240350723266602,
"eval_runtime": 10.669,
"eval_samples_per_second": 937.296,
"eval_steps_per_second": 1.218,
"step": 358
},
{
"epoch": 180.0,
"eval_loss": 1.4221690893173218,
"eval_runtime": 10.6788,
"eval_samples_per_second": 936.431,
"eval_steps_per_second": 1.217,
"step": 360
},
{
"epoch": 181.0,
"eval_loss": 1.4208089113235474,
"eval_runtime": 10.6801,
"eval_samples_per_second": 936.319,
"eval_steps_per_second": 1.217,
"step": 362
}
],
"logging_steps": 500,
"max_steps": 400,
"num_train_epochs": 200,
"save_steps": 500,
"total_flos": 5196677296128000.0,
"trial_name": null,
"trial_params": null
}