|
{ |
|
"best_metric": 0.9622641509433962, |
|
"best_model_checkpoint": "wav2vec2-2Class-easy-train-test-large/checkpoint-2520", |
|
"epoch": 782.2222222222222, |
|
"eval_steps": 500, |
|
"global_step": 8800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.4088050314465409, |
|
"eval_loss": 0.7003181576728821, |
|
"eval_runtime": 1.8048, |
|
"eval_samples_per_second": 88.1, |
|
"eval_steps_per_second": 5.541, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.4088050314465409, |
|
"eval_loss": 0.7001124620437622, |
|
"eval_runtime": 1.7728, |
|
"eval_samples_per_second": 89.69, |
|
"eval_steps_per_second": 5.641, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.41509433962264153, |
|
"eval_loss": 0.69970703125, |
|
"eval_runtime": 1.7593, |
|
"eval_samples_per_second": 90.375, |
|
"eval_steps_per_second": 5.684, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.42138364779874216, |
|
"eval_loss": 0.6991450786590576, |
|
"eval_runtime": 1.7582, |
|
"eval_samples_per_second": 90.433, |
|
"eval_steps_per_second": 5.688, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 0.8353477716445923, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"loss": 0.6976, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.4276729559748428, |
|
"eval_loss": 0.6984724998474121, |
|
"eval_runtime": 1.7849, |
|
"eval_samples_per_second": 89.08, |
|
"eval_steps_per_second": 5.603, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.44025157232704404, |
|
"eval_loss": 0.697744607925415, |
|
"eval_runtime": 2.127, |
|
"eval_samples_per_second": 74.753, |
|
"eval_steps_per_second": 4.701, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_accuracy": 0.44654088050314467, |
|
"eval_loss": 0.6968724727630615, |
|
"eval_runtime": 2.2513, |
|
"eval_samples_per_second": 70.624, |
|
"eval_steps_per_second": 4.442, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.46540880503144655, |
|
"eval_loss": 0.6957085728645325, |
|
"eval_runtime": 2.1194, |
|
"eval_samples_per_second": 75.021, |
|
"eval_steps_per_second": 4.718, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"grad_norm": 0.45805710554122925, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 0.6952, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.46540880503144655, |
|
"eval_loss": 0.6945385932922363, |
|
"eval_runtime": 2.2918, |
|
"eval_samples_per_second": 69.378, |
|
"eval_steps_per_second": 4.363, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_accuracy": 0.4779874213836478, |
|
"eval_loss": 0.6933900117874146, |
|
"eval_runtime": 2.2504, |
|
"eval_samples_per_second": 70.654, |
|
"eval_steps_per_second": 4.444, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_accuracy": 0.49056603773584906, |
|
"eval_loss": 0.692146360874176, |
|
"eval_runtime": 2.1543, |
|
"eval_samples_per_second": 73.804, |
|
"eval_steps_per_second": 4.642, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5471698113207547, |
|
"eval_loss": 0.6906170845031738, |
|
"eval_runtime": 2.0832, |
|
"eval_samples_per_second": 76.326, |
|
"eval_steps_per_second": 4.8, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_accuracy": 0.610062893081761, |
|
"eval_loss": 0.6892228722572327, |
|
"eval_runtime": 2.0269, |
|
"eval_samples_per_second": 78.443, |
|
"eval_steps_per_second": 4.934, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"grad_norm": 0.6493268609046936, |
|
"learning_rate": 5.1136363636363635e-06, |
|
"loss": 0.6911, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_accuracy": 0.6037735849056604, |
|
"eval_loss": 0.6878040432929993, |
|
"eval_runtime": 2.1502, |
|
"eval_samples_per_second": 73.946, |
|
"eval_steps_per_second": 4.651, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_accuracy": 0.5911949685534591, |
|
"eval_loss": 0.6863483190536499, |
|
"eval_runtime": 2.0844, |
|
"eval_samples_per_second": 76.279, |
|
"eval_steps_per_second": 4.797, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5911949685534591, |
|
"eval_loss": 0.6847361326217651, |
|
"eval_runtime": 2.1372, |
|
"eval_samples_per_second": 74.395, |
|
"eval_steps_per_second": 4.679, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6830993294715881, |
|
"eval_runtime": 2.3473, |
|
"eval_samples_per_second": 67.739, |
|
"eval_steps_per_second": 4.26, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"grad_norm": 0.5862739086151123, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.6852, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6815393567085266, |
|
"eval_runtime": 2.1307, |
|
"eval_samples_per_second": 74.623, |
|
"eval_steps_per_second": 4.693, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.679994523525238, |
|
"eval_runtime": 2.082, |
|
"eval_samples_per_second": 76.37, |
|
"eval_steps_per_second": 4.803, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6782289147377014, |
|
"eval_runtime": 2.1302, |
|
"eval_samples_per_second": 74.641, |
|
"eval_steps_per_second": 4.694, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6765275001525879, |
|
"eval_runtime": 2.0229, |
|
"eval_samples_per_second": 78.601, |
|
"eval_steps_per_second": 4.943, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6749551892280579, |
|
"eval_runtime": 2.0505, |
|
"eval_samples_per_second": 77.542, |
|
"eval_steps_per_second": 4.877, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"grad_norm": 0.10243403911590576, |
|
"learning_rate": 8.522727272727273e-06, |
|
"loss": 0.6783, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6732170581817627, |
|
"eval_runtime": 2.0616, |
|
"eval_samples_per_second": 77.125, |
|
"eval_steps_per_second": 4.851, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6713252067565918, |
|
"eval_runtime": 2.1605, |
|
"eval_samples_per_second": 73.595, |
|
"eval_steps_per_second": 4.629, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6694673895835876, |
|
"eval_runtime": 2.0526, |
|
"eval_samples_per_second": 77.462, |
|
"eval_steps_per_second": 4.872, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6674391031265259, |
|
"eval_runtime": 2.1284, |
|
"eval_samples_per_second": 74.704, |
|
"eval_steps_per_second": 4.698, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"grad_norm": 0.3114006221294403, |
|
"learning_rate": 1.0227272727272727e-05, |
|
"loss": 0.6676, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6654335856437683, |
|
"eval_runtime": 1.9991, |
|
"eval_samples_per_second": 79.535, |
|
"eval_steps_per_second": 5.002, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6630644202232361, |
|
"eval_runtime": 2.0451, |
|
"eval_samples_per_second": 77.745, |
|
"eval_steps_per_second": 4.89, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6605831980705261, |
|
"eval_runtime": 2.0625, |
|
"eval_samples_per_second": 77.092, |
|
"eval_steps_per_second": 4.849, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6578991413116455, |
|
"eval_runtime": 2.0381, |
|
"eval_samples_per_second": 78.014, |
|
"eval_steps_per_second": 4.907, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"eval_accuracy": 0.5849056603773585, |
|
"eval_loss": 0.6539114713668823, |
|
"eval_runtime": 1.9774, |
|
"eval_samples_per_second": 80.407, |
|
"eval_steps_per_second": 5.057, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"grad_norm": 0.2134709656238556, |
|
"learning_rate": 1.1931818181818181e-05, |
|
"loss": 0.6516, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5974842767295597, |
|
"eval_loss": 0.6492742896080017, |
|
"eval_runtime": 2.0601, |
|
"eval_samples_per_second": 77.182, |
|
"eval_steps_per_second": 4.854, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 32.98, |
|
"eval_accuracy": 0.610062893081761, |
|
"eval_loss": 0.6441397070884705, |
|
"eval_runtime": 2.0739, |
|
"eval_samples_per_second": 76.667, |
|
"eval_steps_per_second": 4.822, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_accuracy": 0.6226415094339622, |
|
"eval_loss": 0.6348815560340881, |
|
"eval_runtime": 2.1526, |
|
"eval_samples_per_second": 73.865, |
|
"eval_steps_per_second": 4.646, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 34.93, |
|
"eval_accuracy": 0.6289308176100629, |
|
"eval_loss": 0.6257140040397644, |
|
"eval_runtime": 2.0081, |
|
"eval_samples_per_second": 79.179, |
|
"eval_steps_per_second": 4.98, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"grad_norm": 0.8974349498748779, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.6124, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6415094339622641, |
|
"eval_loss": 0.611738920211792, |
|
"eval_runtime": 1.9854, |
|
"eval_samples_per_second": 80.083, |
|
"eval_steps_per_second": 5.037, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.5910706520080566, |
|
"eval_runtime": 2.0618, |
|
"eval_samples_per_second": 77.117, |
|
"eval_steps_per_second": 4.85, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 37.96, |
|
"eval_accuracy": 0.6918238993710691, |
|
"eval_loss": 0.5672016143798828, |
|
"eval_runtime": 2.0402, |
|
"eval_samples_per_second": 77.932, |
|
"eval_steps_per_second": 4.901, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"eval_accuracy": 0.7232704402515723, |
|
"eval_loss": 0.5392354130744934, |
|
"eval_runtime": 2.2936, |
|
"eval_samples_per_second": 69.324, |
|
"eval_steps_per_second": 4.36, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.7736309170722961, |
|
"learning_rate": 1.534090909090909e-05, |
|
"loss": 0.5073, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7547169811320755, |
|
"eval_loss": 0.5041937232017517, |
|
"eval_runtime": 2.1247, |
|
"eval_samples_per_second": 74.835, |
|
"eval_steps_per_second": 4.707, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_accuracy": 0.7672955974842768, |
|
"eval_loss": 0.47902750968933105, |
|
"eval_runtime": 2.163, |
|
"eval_samples_per_second": 73.509, |
|
"eval_steps_per_second": 4.623, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_accuracy": 0.779874213836478, |
|
"eval_loss": 0.47594940662384033, |
|
"eval_runtime": 2.1321, |
|
"eval_samples_per_second": 74.574, |
|
"eval_steps_per_second": 4.69, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 42.93, |
|
"eval_accuracy": 0.7987421383647799, |
|
"eval_loss": 0.4369964003562927, |
|
"eval_runtime": 2.1555, |
|
"eval_samples_per_second": 73.765, |
|
"eval_steps_per_second": 4.639, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7987421383647799, |
|
"eval_loss": 0.43516698479652405, |
|
"eval_runtime": 2.032, |
|
"eval_samples_per_second": 78.249, |
|
"eval_steps_per_second": 4.921, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"grad_norm": 0.4976819157600403, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 0.3489, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"eval_accuracy": 0.7987421383647799, |
|
"eval_loss": 0.4422326385974884, |
|
"eval_runtime": 2.1135, |
|
"eval_samples_per_second": 75.231, |
|
"eval_steps_per_second": 4.732, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 45.96, |
|
"eval_accuracy": 0.8050314465408805, |
|
"eval_loss": 0.41540881991386414, |
|
"eval_runtime": 2.0847, |
|
"eval_samples_per_second": 76.27, |
|
"eval_steps_per_second": 4.797, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 46.93, |
|
"eval_accuracy": 0.8050314465408805, |
|
"eval_loss": 0.4131433367729187, |
|
"eval_runtime": 1.9752, |
|
"eval_samples_per_second": 80.498, |
|
"eval_steps_per_second": 5.063, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8113207547169812, |
|
"eval_loss": 0.3975575864315033, |
|
"eval_runtime": 2.01, |
|
"eval_samples_per_second": 79.104, |
|
"eval_steps_per_second": 4.975, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"grad_norm": 0.5197520852088928, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.2962, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"eval_accuracy": 0.8113207547169812, |
|
"eval_loss": 0.39397454261779785, |
|
"eval_runtime": 2.0261, |
|
"eval_samples_per_second": 78.474, |
|
"eval_steps_per_second": 4.935, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 49.96, |
|
"eval_accuracy": 0.8238993710691824, |
|
"eval_loss": 0.371494859457016, |
|
"eval_runtime": 2.0246, |
|
"eval_samples_per_second": 78.535, |
|
"eval_steps_per_second": 4.939, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 50.93, |
|
"eval_accuracy": 0.8427672955974843, |
|
"eval_loss": 0.34951409697532654, |
|
"eval_runtime": 2.3286, |
|
"eval_samples_per_second": 68.281, |
|
"eval_steps_per_second": 4.294, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8364779874213837, |
|
"eval_loss": 0.3481156826019287, |
|
"eval_runtime": 1.9542, |
|
"eval_samples_per_second": 81.362, |
|
"eval_steps_per_second": 5.117, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"eval_accuracy": 0.8176100628930818, |
|
"eval_loss": 0.3817409873008728, |
|
"eval_runtime": 2.0789, |
|
"eval_samples_per_second": 76.484, |
|
"eval_steps_per_second": 4.81, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"grad_norm": 0.5608111023902893, |
|
"learning_rate": 2.0454545454545454e-05, |
|
"loss": 0.2573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 53.96, |
|
"eval_accuracy": 0.8490566037735849, |
|
"eval_loss": 0.3412492871284485, |
|
"eval_runtime": 2.0746, |
|
"eval_samples_per_second": 76.642, |
|
"eval_steps_per_second": 4.82, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 54.93, |
|
"eval_accuracy": 0.8490566037735849, |
|
"eval_loss": 0.32929155230522156, |
|
"eval_runtime": 1.9991, |
|
"eval_samples_per_second": 79.538, |
|
"eval_steps_per_second": 5.002, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8427672955974843, |
|
"eval_loss": 0.3547687232494354, |
|
"eval_runtime": 2.1242, |
|
"eval_samples_per_second": 74.851, |
|
"eval_steps_per_second": 4.708, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 56.98, |
|
"eval_accuracy": 0.8427672955974843, |
|
"eval_loss": 0.3044220209121704, |
|
"eval_runtime": 2.0508, |
|
"eval_samples_per_second": 77.532, |
|
"eval_steps_per_second": 4.876, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"grad_norm": 0.894092321395874, |
|
"learning_rate": 2.215909090909091e-05, |
|
"loss": 0.2279, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 57.96, |
|
"eval_accuracy": 0.8490566037735849, |
|
"eval_loss": 0.32347577810287476, |
|
"eval_runtime": 2.2095, |
|
"eval_samples_per_second": 71.963, |
|
"eval_steps_per_second": 4.526, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 58.93, |
|
"eval_accuracy": 0.8490566037735849, |
|
"eval_loss": 0.3371436297893524, |
|
"eval_runtime": 2.1055, |
|
"eval_samples_per_second": 75.518, |
|
"eval_steps_per_second": 4.75, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8490566037735849, |
|
"eval_loss": 0.31275492906570435, |
|
"eval_runtime": 2.1311, |
|
"eval_samples_per_second": 74.61, |
|
"eval_steps_per_second": 4.692, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"eval_accuracy": 0.8553459119496856, |
|
"eval_loss": 0.32111966609954834, |
|
"eval_runtime": 2.0639, |
|
"eval_samples_per_second": 77.038, |
|
"eval_steps_per_second": 4.845, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"eval_accuracy": 0.8616352201257862, |
|
"eval_loss": 0.302960604429245, |
|
"eval_runtime": 2.0241, |
|
"eval_samples_per_second": 78.552, |
|
"eval_steps_per_second": 4.94, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 62.22, |
|
"grad_norm": 0.4315973222255707, |
|
"learning_rate": 2.3863636363636362e-05, |
|
"loss": 0.2167, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 62.93, |
|
"eval_accuracy": 0.8616352201257862, |
|
"eval_loss": 0.29696550965309143, |
|
"eval_runtime": 2.034, |
|
"eval_samples_per_second": 78.169, |
|
"eval_steps_per_second": 4.916, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8679245283018868, |
|
"eval_loss": 0.29949402809143066, |
|
"eval_runtime": 2.095, |
|
"eval_samples_per_second": 75.897, |
|
"eval_steps_per_second": 4.773, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 64.98, |
|
"eval_accuracy": 0.8742138364779874, |
|
"eval_loss": 0.2867083251476288, |
|
"eval_runtime": 2.0417, |
|
"eval_samples_per_second": 77.876, |
|
"eval_steps_per_second": 4.898, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 65.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.26363295316696167, |
|
"eval_runtime": 2.1382, |
|
"eval_samples_per_second": 74.363, |
|
"eval_steps_per_second": 4.677, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"grad_norm": 0.37665870785713196, |
|
"learning_rate": 2.556818181818182e-05, |
|
"loss": 0.207, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 66.93, |
|
"eval_accuracy": 0.8805031446540881, |
|
"eval_loss": 0.28482353687286377, |
|
"eval_runtime": 2.1166, |
|
"eval_samples_per_second": 75.119, |
|
"eval_steps_per_second": 4.724, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8867924528301887, |
|
"eval_loss": 0.2750767767429352, |
|
"eval_runtime": 2.1981, |
|
"eval_samples_per_second": 72.336, |
|
"eval_steps_per_second": 4.549, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 68.98, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.256393700838089, |
|
"eval_runtime": 2.033, |
|
"eval_samples_per_second": 78.211, |
|
"eval_steps_per_second": 4.919, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 69.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.25443732738494873, |
|
"eval_runtime": 2.0096, |
|
"eval_samples_per_second": 79.121, |
|
"eval_steps_per_second": 4.976, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 70.93, |
|
"eval_accuracy": 0.8742138364779874, |
|
"eval_loss": 0.2954423129558563, |
|
"eval_runtime": 2.1018, |
|
"eval_samples_per_second": 75.649, |
|
"eval_steps_per_second": 4.758, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"grad_norm": 0.7302255630493164, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.1899, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.25169771909713745, |
|
"eval_runtime": 2.041, |
|
"eval_samples_per_second": 77.904, |
|
"eval_steps_per_second": 4.9, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 72.98, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.2506076693534851, |
|
"eval_runtime": 2.0257, |
|
"eval_samples_per_second": 78.49, |
|
"eval_steps_per_second": 4.936, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 73.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.2434261441230774, |
|
"eval_runtime": 2.0325, |
|
"eval_samples_per_second": 78.23, |
|
"eval_steps_per_second": 4.92, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 74.93, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.23832084238529205, |
|
"eval_runtime": 2.1871, |
|
"eval_samples_per_second": 72.699, |
|
"eval_steps_per_second": 4.572, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"grad_norm": 0.5180615186691284, |
|
"learning_rate": 2.897727272727273e-05, |
|
"loss": 0.1801, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.23464229702949524, |
|
"eval_runtime": 2.026, |
|
"eval_samples_per_second": 78.48, |
|
"eval_steps_per_second": 4.936, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 76.98, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.22975026071071625, |
|
"eval_runtime": 2.0881, |
|
"eval_samples_per_second": 76.147, |
|
"eval_steps_per_second": 4.789, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 77.96, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.2403678596019745, |
|
"eval_runtime": 2.075, |
|
"eval_samples_per_second": 76.626, |
|
"eval_steps_per_second": 4.819, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 78.93, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.2674010097980499, |
|
"eval_runtime": 2.037, |
|
"eval_samples_per_second": 78.057, |
|
"eval_steps_per_second": 4.909, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.2135472297668457, |
|
"learning_rate": 2.9924242424242427e-05, |
|
"loss": 0.1692, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.2231501042842865, |
|
"eval_runtime": 2.0398, |
|
"eval_samples_per_second": 77.949, |
|
"eval_steps_per_second": 4.902, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 80.98, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.2390480935573578, |
|
"eval_runtime": 1.9822, |
|
"eval_samples_per_second": 80.213, |
|
"eval_steps_per_second": 5.045, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 81.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.20583955943584442, |
|
"eval_runtime": 2.0665, |
|
"eval_samples_per_second": 76.94, |
|
"eval_steps_per_second": 4.839, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 82.93, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.2114023119211197, |
|
"eval_runtime": 2.0736, |
|
"eval_samples_per_second": 76.678, |
|
"eval_steps_per_second": 4.823, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.24830691516399384, |
|
"eval_runtime": 2.0148, |
|
"eval_samples_per_second": 78.915, |
|
"eval_steps_per_second": 4.963, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 84.44, |
|
"grad_norm": 0.5111488103866577, |
|
"learning_rate": 2.9734848484848486e-05, |
|
"loss": 0.1691, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 84.98, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.2259017676115036, |
|
"eval_runtime": 2.2201, |
|
"eval_samples_per_second": 71.618, |
|
"eval_steps_per_second": 4.504, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 85.96, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.20239894092082977, |
|
"eval_runtime": 2.0671, |
|
"eval_samples_per_second": 76.918, |
|
"eval_steps_per_second": 4.838, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 86.93, |
|
"eval_accuracy": 0.89937106918239, |
|
"eval_loss": 0.20193150639533997, |
|
"eval_runtime": 2.0416, |
|
"eval_samples_per_second": 77.879, |
|
"eval_steps_per_second": 4.898, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19625458121299744, |
|
"eval_runtime": 2.0196, |
|
"eval_samples_per_second": 78.73, |
|
"eval_steps_per_second": 4.952, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"grad_norm": 0.4683234989643097, |
|
"learning_rate": 2.9545454545454545e-05, |
|
"loss": 0.1609, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 88.98, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.21583892405033112, |
|
"eval_runtime": 2.0254, |
|
"eval_samples_per_second": 78.503, |
|
"eval_steps_per_second": 4.937, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 89.96, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.197691410779953, |
|
"eval_runtime": 1.9978, |
|
"eval_samples_per_second": 79.586, |
|
"eval_steps_per_second": 5.005, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 90.93, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.19791610538959503, |
|
"eval_runtime": 2.0853, |
|
"eval_samples_per_second": 76.248, |
|
"eval_steps_per_second": 4.795, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.20358721911907196, |
|
"eval_runtime": 2.1963, |
|
"eval_samples_per_second": 72.393, |
|
"eval_steps_per_second": 4.553, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 92.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19769711792469025, |
|
"eval_runtime": 2.0089, |
|
"eval_samples_per_second": 79.146, |
|
"eval_steps_per_second": 4.978, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"grad_norm": 0.6099847555160522, |
|
"learning_rate": 2.9356060606060604e-05, |
|
"loss": 0.1516, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 93.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.1974458247423172, |
|
"eval_runtime": 2.1182, |
|
"eval_samples_per_second": 75.065, |
|
"eval_steps_per_second": 4.721, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 94.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.1993919163942337, |
|
"eval_runtime": 2.0707, |
|
"eval_samples_per_second": 76.787, |
|
"eval_steps_per_second": 4.829, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.1955273449420929, |
|
"eval_runtime": 2.0163, |
|
"eval_samples_per_second": 78.858, |
|
"eval_steps_per_second": 4.96, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 96.98, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.19483698904514313, |
|
"eval_runtime": 2.0495, |
|
"eval_samples_per_second": 77.581, |
|
"eval_steps_per_second": 4.879, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 97.78, |
|
"grad_norm": 1.0578981637954712, |
|
"learning_rate": 2.9166666666666666e-05, |
|
"loss": 0.1386, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 97.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19463855028152466, |
|
"eval_runtime": 2.0625, |
|
"eval_samples_per_second": 77.091, |
|
"eval_steps_per_second": 4.849, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 98.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19323910772800446, |
|
"eval_runtime": 2.0028, |
|
"eval_samples_per_second": 79.389, |
|
"eval_steps_per_second": 4.993, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.1841806173324585, |
|
"eval_runtime": 2.1056, |
|
"eval_samples_per_second": 75.512, |
|
"eval_steps_per_second": 4.749, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 100.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.18839451670646667, |
|
"eval_runtime": 1.9858, |
|
"eval_samples_per_second": 80.07, |
|
"eval_steps_per_second": 5.036, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 101.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.1899903267621994, |
|
"eval_runtime": 2.2196, |
|
"eval_samples_per_second": 71.635, |
|
"eval_steps_per_second": 4.505, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 102.22, |
|
"grad_norm": 0.6229210495948792, |
|
"learning_rate": 2.897727272727273e-05, |
|
"loss": 0.1279, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 102.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.184115469455719, |
|
"eval_runtime": 2.0229, |
|
"eval_samples_per_second": 78.602, |
|
"eval_steps_per_second": 4.944, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19207227230072021, |
|
"eval_runtime": 1.9639, |
|
"eval_samples_per_second": 80.962, |
|
"eval_steps_per_second": 5.092, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 104.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19926591217517853, |
|
"eval_runtime": 2.0509, |
|
"eval_samples_per_second": 77.526, |
|
"eval_steps_per_second": 4.876, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 105.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.19455212354660034, |
|
"eval_runtime": 2.0496, |
|
"eval_samples_per_second": 77.577, |
|
"eval_steps_per_second": 4.879, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 106.67, |
|
"grad_norm": 1.2741256952285767, |
|
"learning_rate": 2.8787878787878788e-05, |
|
"loss": 0.1258, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 106.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.18963727355003357, |
|
"eval_runtime": 2.0026, |
|
"eval_samples_per_second": 79.395, |
|
"eval_steps_per_second": 4.993, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.1884273737668991, |
|
"eval_runtime": 2.0343, |
|
"eval_samples_per_second": 78.16, |
|
"eval_steps_per_second": 4.916, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 108.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.17940251529216766, |
|
"eval_runtime": 2.1734, |
|
"eval_samples_per_second": 73.156, |
|
"eval_steps_per_second": 4.601, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 109.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.18589730560779572, |
|
"eval_runtime": 2.0874, |
|
"eval_samples_per_second": 76.17, |
|
"eval_steps_per_second": 4.791, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 110.93, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.2194768339395523, |
|
"eval_runtime": 2.0717, |
|
"eval_samples_per_second": 76.747, |
|
"eval_steps_per_second": 4.827, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"grad_norm": 0.3613344430923462, |
|
"learning_rate": 2.859848484848485e-05, |
|
"loss": 0.1258, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.20826272666454315, |
|
"eval_runtime": 1.9861, |
|
"eval_samples_per_second": 80.057, |
|
"eval_steps_per_second": 5.035, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 112.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.21202689409255981, |
|
"eval_runtime": 2.0132, |
|
"eval_samples_per_second": 78.98, |
|
"eval_steps_per_second": 4.967, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 113.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.20663346350193024, |
|
"eval_runtime": 2.02, |
|
"eval_samples_per_second": 78.711, |
|
"eval_steps_per_second": 4.95, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 114.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.1931203156709671, |
|
"eval_runtime": 2.033, |
|
"eval_samples_per_second": 78.208, |
|
"eval_steps_per_second": 4.919, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 115.56, |
|
"grad_norm": 0.7503376007080078, |
|
"learning_rate": 2.8409090909090912e-05, |
|
"loss": 0.1023, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.19000084698200226, |
|
"eval_runtime": 2.0014, |
|
"eval_samples_per_second": 79.446, |
|
"eval_steps_per_second": 4.997, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 116.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.20288796722888947, |
|
"eval_runtime": 2.0774, |
|
"eval_samples_per_second": 76.539, |
|
"eval_steps_per_second": 4.814, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 117.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19505923986434937, |
|
"eval_runtime": 2.0552, |
|
"eval_samples_per_second": 77.366, |
|
"eval_steps_per_second": 4.866, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 118.93, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.20838169753551483, |
|
"eval_runtime": 2.2371, |
|
"eval_samples_per_second": 71.074, |
|
"eval_steps_per_second": 4.47, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"grad_norm": 0.2376416176557541, |
|
"learning_rate": 2.821969696969697e-05, |
|
"loss": 0.0997, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2159019112586975, |
|
"eval_runtime": 2.0579, |
|
"eval_samples_per_second": 77.264, |
|
"eval_steps_per_second": 4.859, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 120.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.21662545204162598, |
|
"eval_runtime": 2.0756, |
|
"eval_samples_per_second": 76.605, |
|
"eval_steps_per_second": 4.818, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 121.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.197323277592659, |
|
"eval_runtime": 2.0227, |
|
"eval_samples_per_second": 78.607, |
|
"eval_steps_per_second": 4.944, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 122.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.18507684767246246, |
|
"eval_runtime": 2.0728, |
|
"eval_samples_per_second": 76.706, |
|
"eval_steps_per_second": 4.824, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.20666691660881042, |
|
"eval_runtime": 1.9717, |
|
"eval_samples_per_second": 80.642, |
|
"eval_steps_per_second": 5.072, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 124.44, |
|
"grad_norm": 0.3115290403366089, |
|
"learning_rate": 2.803030303030303e-05, |
|
"loss": 0.1021, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 124.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.19534242153167725, |
|
"eval_runtime": 2.0497, |
|
"eval_samples_per_second": 77.571, |
|
"eval_steps_per_second": 4.879, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 125.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.17650572955608368, |
|
"eval_runtime": 2.239, |
|
"eval_samples_per_second": 71.015, |
|
"eval_steps_per_second": 4.466, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 126.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.18782062828540802, |
|
"eval_runtime": 2.0533, |
|
"eval_samples_per_second": 77.437, |
|
"eval_steps_per_second": 4.87, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.20708344876766205, |
|
"eval_runtime": 2.0414, |
|
"eval_samples_per_second": 77.887, |
|
"eval_steps_per_second": 4.899, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 128.89, |
|
"grad_norm": 1.2413551807403564, |
|
"learning_rate": 2.784090909090909e-05, |
|
"loss": 0.0883, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 128.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.2241077572107315, |
|
"eval_runtime": 1.9826, |
|
"eval_samples_per_second": 80.197, |
|
"eval_steps_per_second": 5.044, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 129.96, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.23481474816799164, |
|
"eval_runtime": 1.9747, |
|
"eval_samples_per_second": 80.518, |
|
"eval_steps_per_second": 5.064, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 130.93, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.24748335778713226, |
|
"eval_runtime": 1.9737, |
|
"eval_samples_per_second": 80.559, |
|
"eval_steps_per_second": 5.067, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.21596243977546692, |
|
"eval_runtime": 2.0455, |
|
"eval_samples_per_second": 77.733, |
|
"eval_steps_per_second": 4.889, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 132.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.20896825194358826, |
|
"eval_runtime": 2.047, |
|
"eval_samples_per_second": 77.675, |
|
"eval_steps_per_second": 4.885, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"grad_norm": 0.56540846824646, |
|
"learning_rate": 2.7651515151515152e-05, |
|
"loss": 0.0769, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 133.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.21468934416770935, |
|
"eval_runtime": 1.9936, |
|
"eval_samples_per_second": 79.754, |
|
"eval_steps_per_second": 5.016, |
|
"step": 1507 |
|
}, |
|
{ |
|
"epoch": 134.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.22008037567138672, |
|
"eval_runtime": 2.0857, |
|
"eval_samples_per_second": 76.234, |
|
"eval_steps_per_second": 4.795, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.23723578453063965, |
|
"eval_runtime": 2.1872, |
|
"eval_samples_per_second": 72.695, |
|
"eval_steps_per_second": 4.572, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 136.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.21990692615509033, |
|
"eval_runtime": 2.0473, |
|
"eval_samples_per_second": 77.664, |
|
"eval_steps_per_second": 4.885, |
|
"step": 1541 |
|
}, |
|
{ |
|
"epoch": 137.78, |
|
"grad_norm": 1.0245180130004883, |
|
"learning_rate": 2.7462121212121214e-05, |
|
"loss": 0.0786, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 137.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.2087443619966507, |
|
"eval_runtime": 2.0577, |
|
"eval_samples_per_second": 77.271, |
|
"eval_steps_per_second": 4.86, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 138.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.18779344856739044, |
|
"eval_runtime": 2.0799, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 4.808, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.1914655864238739, |
|
"eval_runtime": 2.043, |
|
"eval_samples_per_second": 77.827, |
|
"eval_steps_per_second": 4.895, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 140.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.23168283700942993, |
|
"eval_runtime": 2.0313, |
|
"eval_samples_per_second": 78.277, |
|
"eval_steps_per_second": 4.923, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 141.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.2865447700023651, |
|
"eval_runtime": 2.0095, |
|
"eval_samples_per_second": 79.125, |
|
"eval_steps_per_second": 4.976, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 142.22, |
|
"grad_norm": 1.393044352531433, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.0714, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 142.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.22998519241809845, |
|
"eval_runtime": 2.1842, |
|
"eval_samples_per_second": 72.794, |
|
"eval_steps_per_second": 4.578, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.27265357971191406, |
|
"eval_runtime": 2.0318, |
|
"eval_samples_per_second": 78.258, |
|
"eval_steps_per_second": 4.922, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 144.98, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.28114742040634155, |
|
"eval_runtime": 2.0949, |
|
"eval_samples_per_second": 75.9, |
|
"eval_steps_per_second": 4.774, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 145.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.21014899015426636, |
|
"eval_runtime": 2.0829, |
|
"eval_samples_per_second": 76.335, |
|
"eval_steps_per_second": 4.801, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 146.67, |
|
"grad_norm": 1.1527929306030273, |
|
"learning_rate": 2.7083333333333335e-05, |
|
"loss": 0.0702, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 146.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.20363318920135498, |
|
"eval_runtime": 2.0224, |
|
"eval_samples_per_second": 78.618, |
|
"eval_steps_per_second": 4.945, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22154641151428223, |
|
"eval_runtime": 2.0286, |
|
"eval_samples_per_second": 78.378, |
|
"eval_steps_per_second": 4.929, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 148.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.21356013417243958, |
|
"eval_runtime": 1.9745, |
|
"eval_samples_per_second": 80.526, |
|
"eval_steps_per_second": 5.065, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 149.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.20560431480407715, |
|
"eval_runtime": 2.0343, |
|
"eval_samples_per_second": 78.161, |
|
"eval_steps_per_second": 4.916, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 150.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.20028233528137207, |
|
"eval_runtime": 2.0476, |
|
"eval_samples_per_second": 77.65, |
|
"eval_steps_per_second": 4.884, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 151.11, |
|
"grad_norm": 0.6037131547927856, |
|
"learning_rate": 2.6893939393939398e-05, |
|
"loss": 0.0676, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.22495229542255402, |
|
"eval_runtime": 2.0653, |
|
"eval_samples_per_second": 76.985, |
|
"eval_steps_per_second": 4.842, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 152.98, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.1910940259695053, |
|
"eval_runtime": 2.2097, |
|
"eval_samples_per_second": 71.955, |
|
"eval_steps_per_second": 4.525, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 153.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2189728170633316, |
|
"eval_runtime": 2.049, |
|
"eval_samples_per_second": 77.598, |
|
"eval_steps_per_second": 4.88, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 154.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.1975589245557785, |
|
"eval_runtime": 2.0536, |
|
"eval_samples_per_second": 77.426, |
|
"eval_steps_per_second": 4.87, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 155.56, |
|
"grad_norm": 0.9841188788414001, |
|
"learning_rate": 2.6704545454545453e-05, |
|
"loss": 0.0674, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.18743836879730225, |
|
"eval_runtime": 2.0593, |
|
"eval_samples_per_second": 77.211, |
|
"eval_steps_per_second": 4.856, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 156.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2022770792245865, |
|
"eval_runtime": 2.0432, |
|
"eval_samples_per_second": 77.821, |
|
"eval_steps_per_second": 4.894, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 157.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.21527531743049622, |
|
"eval_runtime": 1.9951, |
|
"eval_samples_per_second": 79.694, |
|
"eval_steps_per_second": 5.012, |
|
"step": 1777 |
|
}, |
|
{ |
|
"epoch": 158.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.22451625764369965, |
|
"eval_runtime": 2.1442, |
|
"eval_samples_per_second": 74.155, |
|
"eval_steps_per_second": 4.664, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"grad_norm": 0.5377254486083984, |
|
"learning_rate": 2.6515151515151516e-05, |
|
"loss": 0.0548, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2431740015745163, |
|
"eval_runtime": 2.2699, |
|
"eval_samples_per_second": 70.046, |
|
"eval_steps_per_second": 4.405, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 160.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2071038782596588, |
|
"eval_runtime": 2.0506, |
|
"eval_samples_per_second": 77.538, |
|
"eval_steps_per_second": 4.877, |
|
"step": 1811 |
|
}, |
|
{ |
|
"epoch": 161.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.18368059396743774, |
|
"eval_runtime": 2.2081, |
|
"eval_samples_per_second": 72.006, |
|
"eval_steps_per_second": 4.529, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 162.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.19161438941955566, |
|
"eval_runtime": 1.9999, |
|
"eval_samples_per_second": 79.505, |
|
"eval_steps_per_second": 5.0, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.22212089598178864, |
|
"eval_runtime": 2.0001, |
|
"eval_samples_per_second": 79.497, |
|
"eval_steps_per_second": 5.0, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 164.44, |
|
"grad_norm": 0.5433365702629089, |
|
"learning_rate": 2.6325757575757575e-05, |
|
"loss": 0.0616, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 164.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.21204246580600739, |
|
"eval_runtime": 2.035, |
|
"eval_samples_per_second": 78.132, |
|
"eval_steps_per_second": 4.914, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 165.96, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.18882697820663452, |
|
"eval_runtime": 2.0581, |
|
"eval_samples_per_second": 77.256, |
|
"eval_steps_per_second": 4.859, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 166.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.19714578986167908, |
|
"eval_runtime": 2.002, |
|
"eval_samples_per_second": 79.422, |
|
"eval_steps_per_second": 4.995, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.21613995730876923, |
|
"eval_runtime": 2.0979, |
|
"eval_samples_per_second": 75.789, |
|
"eval_steps_per_second": 4.767, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 168.89, |
|
"grad_norm": 0.4616011083126068, |
|
"learning_rate": 2.6136363636363637e-05, |
|
"loss": 0.0467, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 168.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22824302315711975, |
|
"eval_runtime": 2.0023, |
|
"eval_samples_per_second": 79.407, |
|
"eval_steps_per_second": 4.994, |
|
"step": 1901 |
|
}, |
|
{ |
|
"epoch": 169.96, |
|
"eval_accuracy": 0.9056603773584906, |
|
"eval_loss": 0.31181007623672485, |
|
"eval_runtime": 2.2272, |
|
"eval_samples_per_second": 71.39, |
|
"eval_steps_per_second": 4.49, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 170.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23191651701927185, |
|
"eval_runtime": 2.0759, |
|
"eval_samples_per_second": 76.592, |
|
"eval_steps_per_second": 4.817, |
|
"step": 1923 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.27404358983039856, |
|
"eval_runtime": 2.0769, |
|
"eval_samples_per_second": 76.555, |
|
"eval_steps_per_second": 4.815, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 172.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2666384279727936, |
|
"eval_runtime": 2.1046, |
|
"eval_samples_per_second": 75.548, |
|
"eval_steps_per_second": 4.751, |
|
"step": 1946 |
|
}, |
|
{ |
|
"epoch": 173.33, |
|
"grad_norm": 1.0961925983428955, |
|
"learning_rate": 2.59469696969697e-05, |
|
"loss": 0.0609, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 173.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23152852058410645, |
|
"eval_runtime": 2.0323, |
|
"eval_samples_per_second": 78.237, |
|
"eval_steps_per_second": 4.921, |
|
"step": 1957 |
|
}, |
|
{ |
|
"epoch": 174.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22292692959308624, |
|
"eval_runtime": 2.0749, |
|
"eval_samples_per_second": 76.629, |
|
"eval_steps_per_second": 4.819, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.21578945219516754, |
|
"eval_runtime": 2.0472, |
|
"eval_samples_per_second": 77.668, |
|
"eval_steps_per_second": 4.885, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 176.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.22257991135120392, |
|
"eval_runtime": 2.1698, |
|
"eval_samples_per_second": 73.278, |
|
"eval_steps_per_second": 4.609, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 177.78, |
|
"grad_norm": 1.6022953987121582, |
|
"learning_rate": 2.575757575757576e-05, |
|
"loss": 0.0522, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 177.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.22241446375846863, |
|
"eval_runtime": 2.0341, |
|
"eval_samples_per_second": 78.167, |
|
"eval_steps_per_second": 4.916, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 178.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.21375904977321625, |
|
"eval_runtime": 2.1094, |
|
"eval_samples_per_second": 75.377, |
|
"eval_steps_per_second": 4.741, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.21769364178180695, |
|
"eval_runtime": 1.9898, |
|
"eval_samples_per_second": 79.909, |
|
"eval_steps_per_second": 5.026, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 180.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.19169649481773376, |
|
"eval_runtime": 2.1326, |
|
"eval_samples_per_second": 74.558, |
|
"eval_steps_per_second": 4.689, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 181.96, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.19741381704807281, |
|
"eval_runtime": 2.1931, |
|
"eval_samples_per_second": 72.5, |
|
"eval_steps_per_second": 4.56, |
|
"step": 2047 |
|
}, |
|
{ |
|
"epoch": 182.22, |
|
"grad_norm": 0.7399430274963379, |
|
"learning_rate": 2.556818181818182e-05, |
|
"loss": 0.0515, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 182.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.21981187164783478, |
|
"eval_runtime": 2.0417, |
|
"eval_samples_per_second": 77.878, |
|
"eval_steps_per_second": 4.898, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.24247391521930695, |
|
"eval_runtime": 2.1999, |
|
"eval_samples_per_second": 72.278, |
|
"eval_steps_per_second": 4.546, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 184.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.24488882720470428, |
|
"eval_runtime": 2.0767, |
|
"eval_samples_per_second": 76.565, |
|
"eval_steps_per_second": 4.815, |
|
"step": 2081 |
|
}, |
|
{ |
|
"epoch": 185.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.23463451862335205, |
|
"eval_runtime": 2.0674, |
|
"eval_samples_per_second": 76.907, |
|
"eval_steps_per_second": 4.837, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 186.67, |
|
"grad_norm": 0.67291659116745, |
|
"learning_rate": 2.5378787878787876e-05, |
|
"loss": 0.045, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 186.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23308323323726654, |
|
"eval_runtime": 2.2603, |
|
"eval_samples_per_second": 70.346, |
|
"eval_steps_per_second": 4.424, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2660614252090454, |
|
"eval_runtime": 2.0509, |
|
"eval_samples_per_second": 77.527, |
|
"eval_steps_per_second": 4.876, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 188.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22910529375076294, |
|
"eval_runtime": 2.0536, |
|
"eval_samples_per_second": 77.423, |
|
"eval_steps_per_second": 4.869, |
|
"step": 2126 |
|
}, |
|
{ |
|
"epoch": 189.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.23477251827716827, |
|
"eval_runtime": 2.0092, |
|
"eval_samples_per_second": 79.134, |
|
"eval_steps_per_second": 4.977, |
|
"step": 2137 |
|
}, |
|
{ |
|
"epoch": 190.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23087622225284576, |
|
"eval_runtime": 2.0403, |
|
"eval_samples_per_second": 77.929, |
|
"eval_steps_per_second": 4.901, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 191.11, |
|
"grad_norm": 0.11660194396972656, |
|
"learning_rate": 2.518939393939394e-05, |
|
"loss": 0.0403, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.27889564633369446, |
|
"eval_runtime": 2.0147, |
|
"eval_samples_per_second": 78.921, |
|
"eval_steps_per_second": 4.964, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 192.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2540048658847809, |
|
"eval_runtime": 2.1082, |
|
"eval_samples_per_second": 75.42, |
|
"eval_steps_per_second": 4.743, |
|
"step": 2171 |
|
}, |
|
{ |
|
"epoch": 193.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.23720349371433258, |
|
"eval_runtime": 2.1791, |
|
"eval_samples_per_second": 72.966, |
|
"eval_steps_per_second": 4.589, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 194.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2507873773574829, |
|
"eval_runtime": 1.986, |
|
"eval_samples_per_second": 80.061, |
|
"eval_steps_per_second": 5.035, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 195.56, |
|
"grad_norm": 0.8518453240394592, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0476, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2193620353937149, |
|
"eval_runtime": 2.1819, |
|
"eval_samples_per_second": 72.874, |
|
"eval_steps_per_second": 4.583, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 196.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.23066306114196777, |
|
"eval_runtime": 2.0482, |
|
"eval_samples_per_second": 77.628, |
|
"eval_steps_per_second": 4.882, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 197.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2719472646713257, |
|
"eval_runtime": 1.9901, |
|
"eval_samples_per_second": 79.896, |
|
"eval_steps_per_second": 5.025, |
|
"step": 2227 |
|
}, |
|
{ |
|
"epoch": 198.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.28040099143981934, |
|
"eval_runtime": 2.0617, |
|
"eval_samples_per_second": 77.122, |
|
"eval_steps_per_second": 4.85, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 0.09039253741502762, |
|
"learning_rate": 2.481060606060606e-05, |
|
"loss": 0.0457, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2755438983440399, |
|
"eval_runtime": 2.0773, |
|
"eval_samples_per_second": 76.541, |
|
"eval_steps_per_second": 4.814, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 200.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2353052794933319, |
|
"eval_runtime": 1.9899, |
|
"eval_samples_per_second": 79.904, |
|
"eval_steps_per_second": 5.025, |
|
"step": 2261 |
|
}, |
|
{ |
|
"epoch": 201.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.21893078088760376, |
|
"eval_runtime": 2.1045, |
|
"eval_samples_per_second": 75.552, |
|
"eval_steps_per_second": 4.752, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 202.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.21625204384326935, |
|
"eval_runtime": 2.0731, |
|
"eval_samples_per_second": 76.697, |
|
"eval_steps_per_second": 4.824, |
|
"step": 2283 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2110479772090912, |
|
"eval_runtime": 2.1463, |
|
"eval_samples_per_second": 74.079, |
|
"eval_steps_per_second": 4.659, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 204.44, |
|
"grad_norm": 0.9943685531616211, |
|
"learning_rate": 2.4621212121212123e-05, |
|
"loss": 0.0393, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 204.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23164410889148712, |
|
"eval_runtime": 2.0606, |
|
"eval_samples_per_second": 77.162, |
|
"eval_steps_per_second": 4.853, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 205.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.24650876224040985, |
|
"eval_runtime": 2.0011, |
|
"eval_samples_per_second": 79.455, |
|
"eval_steps_per_second": 4.997, |
|
"step": 2317 |
|
}, |
|
{ |
|
"epoch": 206.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23763243854045868, |
|
"eval_runtime": 2.0999, |
|
"eval_samples_per_second": 75.719, |
|
"eval_steps_per_second": 4.762, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2170635461807251, |
|
"eval_runtime": 2.1575, |
|
"eval_samples_per_second": 73.697, |
|
"eval_steps_per_second": 4.635, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 208.89, |
|
"grad_norm": 0.46173095703125, |
|
"learning_rate": 2.4431818181818185e-05, |
|
"loss": 0.0443, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 208.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23952844738960266, |
|
"eval_runtime": 2.0014, |
|
"eval_samples_per_second": 79.445, |
|
"eval_steps_per_second": 4.997, |
|
"step": 2351 |
|
}, |
|
{ |
|
"epoch": 209.96, |
|
"eval_accuracy": 0.8930817610062893, |
|
"eval_loss": 0.2906019687652588, |
|
"eval_runtime": 2.0133, |
|
"eval_samples_per_second": 78.977, |
|
"eval_steps_per_second": 4.967, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 210.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2608316242694855, |
|
"eval_runtime": 2.1558, |
|
"eval_samples_per_second": 73.755, |
|
"eval_steps_per_second": 4.639, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23210321366786957, |
|
"eval_runtime": 2.0606, |
|
"eval_samples_per_second": 77.161, |
|
"eval_steps_per_second": 4.853, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 212.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.24640053510665894, |
|
"eval_runtime": 2.2148, |
|
"eval_samples_per_second": 71.79, |
|
"eval_steps_per_second": 4.515, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 213.33, |
|
"grad_norm": 0.94215327501297, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 0.0539, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 213.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.2441636025905609, |
|
"eval_runtime": 2.172, |
|
"eval_samples_per_second": 73.203, |
|
"eval_steps_per_second": 4.604, |
|
"step": 2407 |
|
}, |
|
{ |
|
"epoch": 214.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2511676847934723, |
|
"eval_runtime": 2.0176, |
|
"eval_samples_per_second": 78.806, |
|
"eval_steps_per_second": 4.956, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22649481892585754, |
|
"eval_runtime": 2.0103, |
|
"eval_samples_per_second": 79.091, |
|
"eval_steps_per_second": 4.974, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 216.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.21274729073047638, |
|
"eval_runtime": 2.0508, |
|
"eval_samples_per_second": 77.529, |
|
"eval_steps_per_second": 4.876, |
|
"step": 2441 |
|
}, |
|
{ |
|
"epoch": 217.78, |
|
"grad_norm": 0.7381362318992615, |
|
"learning_rate": 2.4053030303030303e-05, |
|
"loss": 0.0415, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 217.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.284365177154541, |
|
"eval_runtime": 2.0321, |
|
"eval_samples_per_second": 78.244, |
|
"eval_steps_per_second": 4.921, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 218.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.24891048669815063, |
|
"eval_runtime": 2.0843, |
|
"eval_samples_per_second": 76.285, |
|
"eval_steps_per_second": 4.798, |
|
"step": 2463 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.21200108528137207, |
|
"eval_runtime": 1.9938, |
|
"eval_samples_per_second": 79.748, |
|
"eval_steps_per_second": 5.016, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 220.98, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.2015109807252884, |
|
"eval_runtime": 2.2098, |
|
"eval_samples_per_second": 71.951, |
|
"eval_steps_per_second": 4.525, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 221.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.25095799565315247, |
|
"eval_runtime": 2.0817, |
|
"eval_samples_per_second": 76.381, |
|
"eval_steps_per_second": 4.804, |
|
"step": 2497 |
|
}, |
|
{ |
|
"epoch": 222.22, |
|
"grad_norm": 0.3756774961948395, |
|
"learning_rate": 2.3863636363636362e-05, |
|
"loss": 0.0325, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 222.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2875436246395111, |
|
"eval_runtime": 2.0148, |
|
"eval_samples_per_second": 78.915, |
|
"eval_steps_per_second": 4.963, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.9622641509433962, |
|
"eval_loss": 0.19936275482177734, |
|
"eval_runtime": 2.0208, |
|
"eval_samples_per_second": 78.682, |
|
"eval_steps_per_second": 4.949, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 224.98, |
|
"eval_accuracy": 0.9622641509433962, |
|
"eval_loss": 0.20330873131752014, |
|
"eval_runtime": 2.1708, |
|
"eval_samples_per_second": 73.243, |
|
"eval_steps_per_second": 4.606, |
|
"step": 2531 |
|
}, |
|
{ |
|
"epoch": 225.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2391451746225357, |
|
"eval_runtime": 1.9988, |
|
"eval_samples_per_second": 79.549, |
|
"eval_steps_per_second": 5.003, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 226.67, |
|
"grad_norm": 0.6930297017097473, |
|
"learning_rate": 2.3674242424242424e-05, |
|
"loss": 0.0249, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 226.93, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.30440014600753784, |
|
"eval_runtime": 2.0166, |
|
"eval_samples_per_second": 78.847, |
|
"eval_steps_per_second": 4.959, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2825218439102173, |
|
"eval_runtime": 2.2235, |
|
"eval_samples_per_second": 71.51, |
|
"eval_steps_per_second": 4.497, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 228.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.234725683927536, |
|
"eval_runtime": 2.0151, |
|
"eval_samples_per_second": 78.905, |
|
"eval_steps_per_second": 4.963, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 229.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.24049904942512512, |
|
"eval_runtime": 2.0305, |
|
"eval_samples_per_second": 78.304, |
|
"eval_steps_per_second": 4.925, |
|
"step": 2587 |
|
}, |
|
{ |
|
"epoch": 230.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.25367188453674316, |
|
"eval_runtime": 2.1765, |
|
"eval_samples_per_second": 73.054, |
|
"eval_steps_per_second": 4.595, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 231.11, |
|
"grad_norm": 0.8203662037849426, |
|
"learning_rate": 2.3484848484848487e-05, |
|
"loss": 0.0358, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.27088040113449097, |
|
"eval_runtime": 2.0677, |
|
"eval_samples_per_second": 76.895, |
|
"eval_steps_per_second": 4.836, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 232.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2444712519645691, |
|
"eval_runtime": 2.123, |
|
"eval_samples_per_second": 74.893, |
|
"eval_steps_per_second": 4.71, |
|
"step": 2621 |
|
}, |
|
{ |
|
"epoch": 233.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.24358882009983063, |
|
"eval_runtime": 2.0612, |
|
"eval_samples_per_second": 77.139, |
|
"eval_steps_per_second": 4.852, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 234.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.22266939282417297, |
|
"eval_runtime": 2.0145, |
|
"eval_samples_per_second": 78.929, |
|
"eval_steps_per_second": 4.964, |
|
"step": 2643 |
|
}, |
|
{ |
|
"epoch": 235.56, |
|
"grad_norm": 0.7004448771476746, |
|
"learning_rate": 2.3295454545454546e-05, |
|
"loss": 0.0345, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.22081993520259857, |
|
"eval_runtime": 2.0852, |
|
"eval_samples_per_second": 76.252, |
|
"eval_steps_per_second": 4.796, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 236.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.22930140793323517, |
|
"eval_runtime": 2.038, |
|
"eval_samples_per_second": 78.019, |
|
"eval_steps_per_second": 4.907, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 237.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2159855216741562, |
|
"eval_runtime": 2.2011, |
|
"eval_samples_per_second": 72.236, |
|
"eval_steps_per_second": 4.543, |
|
"step": 2677 |
|
}, |
|
{ |
|
"epoch": 238.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2085605412721634, |
|
"eval_runtime": 2.0845, |
|
"eval_samples_per_second": 76.277, |
|
"eval_steps_per_second": 4.797, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"grad_norm": 1.642115592956543, |
|
"learning_rate": 2.3106060606060608e-05, |
|
"loss": 0.0339, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.26398828625679016, |
|
"eval_runtime": 1.9895, |
|
"eval_samples_per_second": 79.918, |
|
"eval_steps_per_second": 5.026, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 240.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2953893542289734, |
|
"eval_runtime": 2.0677, |
|
"eval_samples_per_second": 76.899, |
|
"eval_steps_per_second": 4.836, |
|
"step": 2711 |
|
}, |
|
{ |
|
"epoch": 241.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2507174611091614, |
|
"eval_runtime": 2.1213, |
|
"eval_samples_per_second": 74.953, |
|
"eval_steps_per_second": 4.714, |
|
"step": 2722 |
|
}, |
|
{ |
|
"epoch": 242.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.227327361702919, |
|
"eval_runtime": 1.9774, |
|
"eval_samples_per_second": 80.407, |
|
"eval_steps_per_second": 5.057, |
|
"step": 2733 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.24215646088123322, |
|
"eval_runtime": 2.0297, |
|
"eval_samples_per_second": 78.336, |
|
"eval_steps_per_second": 4.927, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 244.44, |
|
"grad_norm": 1.2598336935043335, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 0.0309, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 244.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2931080758571625, |
|
"eval_runtime": 2.1459, |
|
"eval_samples_per_second": 74.093, |
|
"eval_steps_per_second": 4.66, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 245.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2694746255874634, |
|
"eval_runtime": 2.0392, |
|
"eval_samples_per_second": 77.97, |
|
"eval_steps_per_second": 4.904, |
|
"step": 2767 |
|
}, |
|
{ |
|
"epoch": 246.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.26456066966056824, |
|
"eval_runtime": 2.1011, |
|
"eval_samples_per_second": 75.673, |
|
"eval_steps_per_second": 4.759, |
|
"step": 2778 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23147591948509216, |
|
"eval_runtime": 2.0349, |
|
"eval_samples_per_second": 78.135, |
|
"eval_steps_per_second": 4.914, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 248.89, |
|
"grad_norm": 1.3385041952133179, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.0301, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 248.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2269720882177353, |
|
"eval_runtime": 2.0267, |
|
"eval_samples_per_second": 78.453, |
|
"eval_steps_per_second": 4.934, |
|
"step": 2801 |
|
}, |
|
{ |
|
"epoch": 249.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.244718536734581, |
|
"eval_runtime": 2.0507, |
|
"eval_samples_per_second": 77.533, |
|
"eval_steps_per_second": 4.876, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 250.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2586061358451843, |
|
"eval_runtime": 2.0836, |
|
"eval_samples_per_second": 76.312, |
|
"eval_steps_per_second": 4.799, |
|
"step": 2823 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3038959503173828, |
|
"eval_runtime": 2.0093, |
|
"eval_samples_per_second": 79.132, |
|
"eval_steps_per_second": 4.977, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 252.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.27771249413490295, |
|
"eval_runtime": 2.1305, |
|
"eval_samples_per_second": 74.63, |
|
"eval_steps_per_second": 4.694, |
|
"step": 2846 |
|
}, |
|
{ |
|
"epoch": 253.33, |
|
"grad_norm": 0.40545353293418884, |
|
"learning_rate": 2.2537878787878788e-05, |
|
"loss": 0.0335, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 253.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.256588876247406, |
|
"eval_runtime": 2.1001, |
|
"eval_samples_per_second": 75.709, |
|
"eval_steps_per_second": 4.762, |
|
"step": 2857 |
|
}, |
|
{ |
|
"epoch": 254.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.26031869649887085, |
|
"eval_runtime": 2.2094, |
|
"eval_samples_per_second": 71.966, |
|
"eval_steps_per_second": 4.526, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.26985806226730347, |
|
"eval_runtime": 1.9916, |
|
"eval_samples_per_second": 79.835, |
|
"eval_steps_per_second": 5.021, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 256.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2838137149810791, |
|
"eval_runtime": 1.9992, |
|
"eval_samples_per_second": 79.532, |
|
"eval_steps_per_second": 5.002, |
|
"step": 2891 |
|
}, |
|
{ |
|
"epoch": 257.78, |
|
"grad_norm": 0.2661449611186981, |
|
"learning_rate": 2.2348484848484847e-05, |
|
"loss": 0.0249, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 257.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2572626769542694, |
|
"eval_runtime": 2.0448, |
|
"eval_samples_per_second": 77.758, |
|
"eval_steps_per_second": 4.89, |
|
"step": 2902 |
|
}, |
|
{ |
|
"epoch": 258.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2652382254600525, |
|
"eval_runtime": 2.0483, |
|
"eval_samples_per_second": 77.627, |
|
"eval_steps_per_second": 4.882, |
|
"step": 2913 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.26221606135368347, |
|
"eval_runtime": 1.9761, |
|
"eval_samples_per_second": 80.461, |
|
"eval_steps_per_second": 5.06, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 260.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2583387494087219, |
|
"eval_runtime": 2.0285, |
|
"eval_samples_per_second": 78.384, |
|
"eval_steps_per_second": 4.93, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 261.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23241400718688965, |
|
"eval_runtime": 2.1753, |
|
"eval_samples_per_second": 73.094, |
|
"eval_steps_per_second": 4.597, |
|
"step": 2947 |
|
}, |
|
{ |
|
"epoch": 262.22, |
|
"grad_norm": 0.5177292227745056, |
|
"learning_rate": 2.215909090909091e-05, |
|
"loss": 0.0308, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 262.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2781696319580078, |
|
"eval_runtime": 2.0731, |
|
"eval_samples_per_second": 76.695, |
|
"eval_steps_per_second": 4.824, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2519301474094391, |
|
"eval_runtime": 2.1326, |
|
"eval_samples_per_second": 74.556, |
|
"eval_steps_per_second": 4.689, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 264.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2634475529193878, |
|
"eval_runtime": 2.0868, |
|
"eval_samples_per_second": 76.194, |
|
"eval_steps_per_second": 4.792, |
|
"step": 2981 |
|
}, |
|
{ |
|
"epoch": 265.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2647358775138855, |
|
"eval_runtime": 2.023, |
|
"eval_samples_per_second": 78.596, |
|
"eval_steps_per_second": 4.943, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"grad_norm": 0.311382532119751, |
|
"learning_rate": 2.1969696969696972e-05, |
|
"loss": 0.0282, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 266.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.25880536437034607, |
|
"eval_runtime": 2.0166, |
|
"eval_samples_per_second": 78.845, |
|
"eval_steps_per_second": 4.959, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.23151087760925293, |
|
"eval_runtime": 2.1955, |
|
"eval_samples_per_second": 72.42, |
|
"eval_steps_per_second": 4.555, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 268.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.22928977012634277, |
|
"eval_runtime": 2.1352, |
|
"eval_samples_per_second": 74.465, |
|
"eval_steps_per_second": 4.683, |
|
"step": 3026 |
|
}, |
|
{ |
|
"epoch": 269.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.23751527070999146, |
|
"eval_runtime": 2.0031, |
|
"eval_samples_per_second": 79.378, |
|
"eval_steps_per_second": 4.992, |
|
"step": 3037 |
|
}, |
|
{ |
|
"epoch": 270.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.24385805428028107, |
|
"eval_runtime": 2.109, |
|
"eval_samples_per_second": 75.392, |
|
"eval_steps_per_second": 4.742, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 271.11, |
|
"grad_norm": 0.8252888321876526, |
|
"learning_rate": 2.178030303030303e-05, |
|
"loss": 0.0347, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2541854679584503, |
|
"eval_runtime": 2.1279, |
|
"eval_samples_per_second": 74.722, |
|
"eval_steps_per_second": 4.699, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 272.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.24015812575817108, |
|
"eval_runtime": 1.9697, |
|
"eval_samples_per_second": 80.724, |
|
"eval_steps_per_second": 5.077, |
|
"step": 3071 |
|
}, |
|
{ |
|
"epoch": 273.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2365039885044098, |
|
"eval_runtime": 2.1369, |
|
"eval_samples_per_second": 74.406, |
|
"eval_steps_per_second": 4.68, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 274.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2757132053375244, |
|
"eval_runtime": 2.0094, |
|
"eval_samples_per_second": 79.128, |
|
"eval_steps_per_second": 4.977, |
|
"step": 3093 |
|
}, |
|
{ |
|
"epoch": 275.56, |
|
"grad_norm": 0.06441498547792435, |
|
"learning_rate": 2.1590909090909093e-05, |
|
"loss": 0.0211, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.25078749656677246, |
|
"eval_runtime": 2.0059, |
|
"eval_samples_per_second": 79.266, |
|
"eval_steps_per_second": 4.985, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 276.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23951387405395508, |
|
"eval_runtime": 2.174, |
|
"eval_samples_per_second": 73.137, |
|
"eval_steps_per_second": 4.6, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 277.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.25363460183143616, |
|
"eval_runtime": 2.0281, |
|
"eval_samples_per_second": 78.399, |
|
"eval_steps_per_second": 4.931, |
|
"step": 3127 |
|
}, |
|
{ |
|
"epoch": 278.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.26847586035728455, |
|
"eval_runtime": 2.2802, |
|
"eval_samples_per_second": 69.729, |
|
"eval_steps_per_second": 4.385, |
|
"step": 3138 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"grad_norm": 0.5554720759391785, |
|
"learning_rate": 2.1401515151515152e-05, |
|
"loss": 0.0248, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.2974900007247925, |
|
"eval_runtime": 2.0423, |
|
"eval_samples_per_second": 77.852, |
|
"eval_steps_per_second": 4.896, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 280.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3234010636806488, |
|
"eval_runtime": 2.0793, |
|
"eval_samples_per_second": 76.469, |
|
"eval_steps_per_second": 4.809, |
|
"step": 3161 |
|
}, |
|
{ |
|
"epoch": 281.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2707124352455139, |
|
"eval_runtime": 2.0919, |
|
"eval_samples_per_second": 76.007, |
|
"eval_steps_per_second": 4.78, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 282.93, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.22501063346862793, |
|
"eval_runtime": 1.9726, |
|
"eval_samples_per_second": 80.606, |
|
"eval_steps_per_second": 5.07, |
|
"step": 3183 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.23188871145248413, |
|
"eval_runtime": 1.9745, |
|
"eval_samples_per_second": 80.526, |
|
"eval_steps_per_second": 5.065, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 284.44, |
|
"grad_norm": 0.20468498766422272, |
|
"learning_rate": 2.121212121212121e-05, |
|
"loss": 0.0243, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 284.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.25254714488983154, |
|
"eval_runtime": 2.1319, |
|
"eval_samples_per_second": 74.582, |
|
"eval_steps_per_second": 4.691, |
|
"step": 3206 |
|
}, |
|
{ |
|
"epoch": 285.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.26610061526298523, |
|
"eval_runtime": 2.0326, |
|
"eval_samples_per_second": 78.226, |
|
"eval_steps_per_second": 4.92, |
|
"step": 3217 |
|
}, |
|
{ |
|
"epoch": 286.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.28444719314575195, |
|
"eval_runtime": 2.0467, |
|
"eval_samples_per_second": 77.687, |
|
"eval_steps_per_second": 4.886, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2571127116680145, |
|
"eval_runtime": 2.1631, |
|
"eval_samples_per_second": 73.504, |
|
"eval_steps_per_second": 4.623, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 288.89, |
|
"grad_norm": 1.0598843097686768, |
|
"learning_rate": 2.1022727272727274e-05, |
|
"loss": 0.0223, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 288.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.251703679561615, |
|
"eval_runtime": 2.09, |
|
"eval_samples_per_second": 76.075, |
|
"eval_steps_per_second": 4.785, |
|
"step": 3251 |
|
}, |
|
{ |
|
"epoch": 289.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2636191248893738, |
|
"eval_runtime": 2.0348, |
|
"eval_samples_per_second": 78.14, |
|
"eval_steps_per_second": 4.914, |
|
"step": 3262 |
|
}, |
|
{ |
|
"epoch": 290.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.26941102743148804, |
|
"eval_runtime": 2.0598, |
|
"eval_samples_per_second": 77.193, |
|
"eval_steps_per_second": 4.855, |
|
"step": 3273 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.23060773313045502, |
|
"eval_runtime": 2.0528, |
|
"eval_samples_per_second": 77.454, |
|
"eval_steps_per_second": 4.871, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 292.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.23769862949848175, |
|
"eval_runtime": 2.0936, |
|
"eval_samples_per_second": 75.945, |
|
"eval_steps_per_second": 4.776, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 293.33, |
|
"grad_norm": 0.6022414565086365, |
|
"learning_rate": 2.0833333333333333e-05, |
|
"loss": 0.0234, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 293.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.26981261372566223, |
|
"eval_runtime": 2.0959, |
|
"eval_samples_per_second": 75.861, |
|
"eval_steps_per_second": 4.771, |
|
"step": 3307 |
|
}, |
|
{ |
|
"epoch": 294.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28393277525901794, |
|
"eval_runtime": 2.0125, |
|
"eval_samples_per_second": 79.007, |
|
"eval_steps_per_second": 4.969, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.25016099214553833, |
|
"eval_runtime": 2.1941, |
|
"eval_samples_per_second": 72.467, |
|
"eval_steps_per_second": 4.558, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 296.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.27042049169540405, |
|
"eval_runtime": 2.0192, |
|
"eval_samples_per_second": 78.742, |
|
"eval_steps_per_second": 4.952, |
|
"step": 3341 |
|
}, |
|
{ |
|
"epoch": 297.78, |
|
"grad_norm": 0.03581221029162407, |
|
"learning_rate": 2.0643939393939395e-05, |
|
"loss": 0.0256, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 297.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.28789857029914856, |
|
"eval_runtime": 2.1148, |
|
"eval_samples_per_second": 75.183, |
|
"eval_steps_per_second": 4.729, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 298.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3078269362449646, |
|
"eval_runtime": 2.0062, |
|
"eval_samples_per_second": 79.253, |
|
"eval_steps_per_second": 4.984, |
|
"step": 3363 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31602492928504944, |
|
"eval_runtime": 2.0641, |
|
"eval_samples_per_second": 77.031, |
|
"eval_steps_per_second": 4.845, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 300.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2705954313278198, |
|
"eval_runtime": 2.0316, |
|
"eval_samples_per_second": 78.263, |
|
"eval_steps_per_second": 4.922, |
|
"step": 3386 |
|
}, |
|
{ |
|
"epoch": 301.96, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2504004240036011, |
|
"eval_runtime": 2.1492, |
|
"eval_samples_per_second": 73.982, |
|
"eval_steps_per_second": 4.653, |
|
"step": 3397 |
|
}, |
|
{ |
|
"epoch": 302.22, |
|
"grad_norm": 2.553766965866089, |
|
"learning_rate": 2.0454545454545454e-05, |
|
"loss": 0.0224, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 302.93, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.24540336430072784, |
|
"eval_runtime": 2.0269, |
|
"eval_samples_per_second": 78.443, |
|
"eval_steps_per_second": 4.934, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_accuracy": 0.9559748427672956, |
|
"eval_loss": 0.24798454344272614, |
|
"eval_runtime": 2.0863, |
|
"eval_samples_per_second": 76.213, |
|
"eval_steps_per_second": 4.793, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 304.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2511013150215149, |
|
"eval_runtime": 2.0476, |
|
"eval_samples_per_second": 77.651, |
|
"eval_steps_per_second": 4.884, |
|
"step": 3431 |
|
}, |
|
{ |
|
"epoch": 305.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2796252369880676, |
|
"eval_runtime": 2.1539, |
|
"eval_samples_per_second": 73.819, |
|
"eval_steps_per_second": 4.643, |
|
"step": 3442 |
|
}, |
|
{ |
|
"epoch": 306.67, |
|
"grad_norm": 0.41460466384887695, |
|
"learning_rate": 2.0265151515151516e-05, |
|
"loss": 0.0155, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 306.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.29322367906570435, |
|
"eval_runtime": 2.093, |
|
"eval_samples_per_second": 75.966, |
|
"eval_steps_per_second": 4.778, |
|
"step": 3453 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2996874153614044, |
|
"eval_runtime": 2.0951, |
|
"eval_samples_per_second": 75.893, |
|
"eval_steps_per_second": 4.773, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 308.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3044210970401764, |
|
"eval_runtime": 1.9749, |
|
"eval_samples_per_second": 80.512, |
|
"eval_steps_per_second": 5.064, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 309.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3255678415298462, |
|
"eval_runtime": 2.0175, |
|
"eval_samples_per_second": 78.81, |
|
"eval_steps_per_second": 4.957, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 310.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3400976359844208, |
|
"eval_runtime": 2.0285, |
|
"eval_samples_per_second": 78.381, |
|
"eval_steps_per_second": 4.93, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 311.11, |
|
"grad_norm": 0.5975369811058044, |
|
"learning_rate": 2.007575757575758e-05, |
|
"loss": 0.0226, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.30681127309799194, |
|
"eval_runtime": 2.0805, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 4.807, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 312.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.30169352889060974, |
|
"eval_runtime": 2.1998, |
|
"eval_samples_per_second": 72.279, |
|
"eval_steps_per_second": 4.546, |
|
"step": 3521 |
|
}, |
|
{ |
|
"epoch": 313.96, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.29409661889076233, |
|
"eval_runtime": 2.1625, |
|
"eval_samples_per_second": 73.527, |
|
"eval_steps_per_second": 4.624, |
|
"step": 3532 |
|
}, |
|
{ |
|
"epoch": 314.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2840117812156677, |
|
"eval_runtime": 2.0614, |
|
"eval_samples_per_second": 77.134, |
|
"eval_steps_per_second": 4.851, |
|
"step": 3543 |
|
}, |
|
{ |
|
"epoch": 315.56, |
|
"grad_norm": 0.4768455922603607, |
|
"learning_rate": 1.9886363636363634e-05, |
|
"loss": 0.0153, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.28999558091163635, |
|
"eval_runtime": 2.0423, |
|
"eval_samples_per_second": 77.855, |
|
"eval_steps_per_second": 4.897, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 316.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.29232266545295715, |
|
"eval_runtime": 2.0108, |
|
"eval_samples_per_second": 79.073, |
|
"eval_steps_per_second": 4.973, |
|
"step": 3566 |
|
}, |
|
{ |
|
"epoch": 317.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2964979112148285, |
|
"eval_runtime": 1.9633, |
|
"eval_samples_per_second": 80.988, |
|
"eval_steps_per_second": 5.094, |
|
"step": 3577 |
|
}, |
|
{ |
|
"epoch": 318.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3201989531517029, |
|
"eval_runtime": 2.0683, |
|
"eval_samples_per_second": 76.876, |
|
"eval_steps_per_second": 4.835, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"grad_norm": 0.01774447225034237, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 0.0183, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.33252981305122375, |
|
"eval_runtime": 1.9991, |
|
"eval_samples_per_second": 79.534, |
|
"eval_steps_per_second": 5.002, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 320.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.34411394596099854, |
|
"eval_runtime": 1.9595, |
|
"eval_samples_per_second": 81.143, |
|
"eval_steps_per_second": 5.103, |
|
"step": 3611 |
|
}, |
|
{ |
|
"epoch": 321.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3004206120967865, |
|
"eval_runtime": 2.102, |
|
"eval_samples_per_second": 75.644, |
|
"eval_steps_per_second": 4.757, |
|
"step": 3622 |
|
}, |
|
{ |
|
"epoch": 322.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3022076487541199, |
|
"eval_runtime": 2.1248, |
|
"eval_samples_per_second": 74.83, |
|
"eval_steps_per_second": 4.706, |
|
"step": 3633 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.29579004645347595, |
|
"eval_runtime": 2.073, |
|
"eval_samples_per_second": 76.702, |
|
"eval_steps_per_second": 4.824, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 324.44, |
|
"grad_norm": 0.43064549565315247, |
|
"learning_rate": 1.950757575757576e-05, |
|
"loss": 0.0257, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 324.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2942567765712738, |
|
"eval_runtime": 2.08, |
|
"eval_samples_per_second": 76.442, |
|
"eval_steps_per_second": 4.808, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 325.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2944892942905426, |
|
"eval_runtime": 1.9313, |
|
"eval_samples_per_second": 82.326, |
|
"eval_steps_per_second": 5.178, |
|
"step": 3667 |
|
}, |
|
{ |
|
"epoch": 326.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.29099544882774353, |
|
"eval_runtime": 2.085, |
|
"eval_samples_per_second": 76.26, |
|
"eval_steps_per_second": 4.796, |
|
"step": 3678 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.2856423258781433, |
|
"eval_runtime": 2.1029, |
|
"eval_samples_per_second": 75.609, |
|
"eval_steps_per_second": 4.755, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 328.89, |
|
"grad_norm": 0.7020539045333862, |
|
"learning_rate": 1.9318181818181818e-05, |
|
"loss": 0.0164, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 328.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.25798845291137695, |
|
"eval_runtime": 2.0372, |
|
"eval_samples_per_second": 78.047, |
|
"eval_steps_per_second": 4.909, |
|
"step": 3701 |
|
}, |
|
{ |
|
"epoch": 329.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2566261291503906, |
|
"eval_runtime": 2.1479, |
|
"eval_samples_per_second": 74.027, |
|
"eval_steps_per_second": 4.656, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 330.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2538098394870758, |
|
"eval_runtime": 2.0665, |
|
"eval_samples_per_second": 76.941, |
|
"eval_steps_per_second": 4.839, |
|
"step": 3723 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.24481499195098877, |
|
"eval_runtime": 2.0898, |
|
"eval_samples_per_second": 76.084, |
|
"eval_steps_per_second": 4.785, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 332.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2543666958808899, |
|
"eval_runtime": 2.035, |
|
"eval_samples_per_second": 78.134, |
|
"eval_steps_per_second": 4.914, |
|
"step": 3746 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"grad_norm": 0.9068632125854492, |
|
"learning_rate": 1.912878787878788e-05, |
|
"loss": 0.0222, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 333.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3075094223022461, |
|
"eval_runtime": 2.101, |
|
"eval_samples_per_second": 75.678, |
|
"eval_steps_per_second": 4.76, |
|
"step": 3757 |
|
}, |
|
{ |
|
"epoch": 334.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.27574771642684937, |
|
"eval_runtime": 2.0253, |
|
"eval_samples_per_second": 78.507, |
|
"eval_steps_per_second": 4.938, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2714598774909973, |
|
"eval_runtime": 2.2265, |
|
"eval_samples_per_second": 71.412, |
|
"eval_steps_per_second": 4.491, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 336.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3330034911632538, |
|
"eval_runtime": 2.0552, |
|
"eval_samples_per_second": 77.365, |
|
"eval_steps_per_second": 4.866, |
|
"step": 3791 |
|
}, |
|
{ |
|
"epoch": 337.78, |
|
"grad_norm": 0.03231671825051308, |
|
"learning_rate": 1.893939393939394e-05, |
|
"loss": 0.0212, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 337.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35598525404930115, |
|
"eval_runtime": 2.0188, |
|
"eval_samples_per_second": 78.762, |
|
"eval_steps_per_second": 4.954, |
|
"step": 3802 |
|
}, |
|
{ |
|
"epoch": 338.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28320637345314026, |
|
"eval_runtime": 2.1352, |
|
"eval_samples_per_second": 74.467, |
|
"eval_steps_per_second": 4.683, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2855217754840851, |
|
"eval_runtime": 2.1886, |
|
"eval_samples_per_second": 72.648, |
|
"eval_steps_per_second": 4.569, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 340.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.30631041526794434, |
|
"eval_runtime": 2.0061, |
|
"eval_samples_per_second": 79.26, |
|
"eval_steps_per_second": 4.985, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 341.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.29151424765586853, |
|
"eval_runtime": 2.0201, |
|
"eval_samples_per_second": 78.71, |
|
"eval_steps_per_second": 4.95, |
|
"step": 3847 |
|
}, |
|
{ |
|
"epoch": 342.22, |
|
"grad_norm": 0.07481174916028976, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.016, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 342.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.28358563780784607, |
|
"eval_runtime": 1.9309, |
|
"eval_samples_per_second": 82.344, |
|
"eval_steps_per_second": 5.179, |
|
"step": 3858 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28052231669425964, |
|
"eval_runtime": 1.9926, |
|
"eval_samples_per_second": 79.797, |
|
"eval_steps_per_second": 5.019, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 344.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.26776131987571716, |
|
"eval_runtime": 2.1218, |
|
"eval_samples_per_second": 74.936, |
|
"eval_steps_per_second": 4.713, |
|
"step": 3881 |
|
}, |
|
{ |
|
"epoch": 345.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2839824855327606, |
|
"eval_runtime": 2.0764, |
|
"eval_samples_per_second": 76.575, |
|
"eval_steps_per_second": 4.816, |
|
"step": 3892 |
|
}, |
|
{ |
|
"epoch": 346.67, |
|
"grad_norm": 1.4776334762573242, |
|
"learning_rate": 1.856060606060606e-05, |
|
"loss": 0.0163, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 346.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3048093914985657, |
|
"eval_runtime": 2.1233, |
|
"eval_samples_per_second": 74.885, |
|
"eval_steps_per_second": 4.71, |
|
"step": 3903 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.27605798840522766, |
|
"eval_runtime": 1.9601, |
|
"eval_samples_per_second": 81.117, |
|
"eval_steps_per_second": 5.102, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 348.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30447614192962646, |
|
"eval_runtime": 2.0457, |
|
"eval_samples_per_second": 77.724, |
|
"eval_steps_per_second": 4.888, |
|
"step": 3926 |
|
}, |
|
{ |
|
"epoch": 349.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.26728910207748413, |
|
"eval_runtime": 2.0205, |
|
"eval_samples_per_second": 78.692, |
|
"eval_steps_per_second": 4.949, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 350.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2830033600330353, |
|
"eval_runtime": 2.0741, |
|
"eval_samples_per_second": 76.66, |
|
"eval_steps_per_second": 4.821, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 351.11, |
|
"grad_norm": 0.30603834986686707, |
|
"learning_rate": 1.837121212121212e-05, |
|
"loss": 0.0185, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31495675444602966, |
|
"eval_runtime": 2.0088, |
|
"eval_samples_per_second": 79.152, |
|
"eval_steps_per_second": 4.978, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 352.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2967083156108856, |
|
"eval_runtime": 2.0921, |
|
"eval_samples_per_second": 75.999, |
|
"eval_steps_per_second": 4.78, |
|
"step": 3971 |
|
}, |
|
{ |
|
"epoch": 353.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2917640507221222, |
|
"eval_runtime": 2.1439, |
|
"eval_samples_per_second": 74.165, |
|
"eval_steps_per_second": 4.664, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 354.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2848517894744873, |
|
"eval_runtime": 2.0244, |
|
"eval_samples_per_second": 78.541, |
|
"eval_steps_per_second": 4.94, |
|
"step": 3993 |
|
}, |
|
{ |
|
"epoch": 355.56, |
|
"grad_norm": 0.6905023455619812, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.0189, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28043246269226074, |
|
"eval_runtime": 2.0697, |
|
"eval_samples_per_second": 76.823, |
|
"eval_steps_per_second": 4.832, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 356.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.29090604186058044, |
|
"eval_runtime": 2.3048, |
|
"eval_samples_per_second": 68.987, |
|
"eval_steps_per_second": 4.339, |
|
"step": 4016 |
|
}, |
|
{ |
|
"epoch": 357.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3029940724372864, |
|
"eval_runtime": 2.0213, |
|
"eval_samples_per_second": 78.661, |
|
"eval_steps_per_second": 4.947, |
|
"step": 4027 |
|
}, |
|
{ |
|
"epoch": 358.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.316310852766037, |
|
"eval_runtime": 2.0126, |
|
"eval_samples_per_second": 79.004, |
|
"eval_steps_per_second": 4.969, |
|
"step": 4038 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"grad_norm": 0.09516480565071106, |
|
"learning_rate": 1.799242424242424e-05, |
|
"loss": 0.0153, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.32167917490005493, |
|
"eval_runtime": 1.9486, |
|
"eval_samples_per_second": 81.598, |
|
"eval_steps_per_second": 5.132, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 360.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3025132715702057, |
|
"eval_runtime": 2.0179, |
|
"eval_samples_per_second": 78.794, |
|
"eval_steps_per_second": 4.956, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 361.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.297443687915802, |
|
"eval_runtime": 1.9969, |
|
"eval_samples_per_second": 79.622, |
|
"eval_steps_per_second": 5.008, |
|
"step": 4072 |
|
}, |
|
{ |
|
"epoch": 362.93, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.28664350509643555, |
|
"eval_runtime": 2.0131, |
|
"eval_samples_per_second": 78.984, |
|
"eval_steps_per_second": 4.968, |
|
"step": 4083 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.32455363869667053, |
|
"eval_runtime": 2.1216, |
|
"eval_samples_per_second": 74.943, |
|
"eval_steps_per_second": 4.713, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 364.44, |
|
"grad_norm": 0.14960724115371704, |
|
"learning_rate": 1.7803030303030303e-05, |
|
"loss": 0.0169, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 364.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2801210880279541, |
|
"eval_runtime": 1.87, |
|
"eval_samples_per_second": 85.025, |
|
"eval_steps_per_second": 5.347, |
|
"step": 4106 |
|
}, |
|
{ |
|
"epoch": 365.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.31326618790626526, |
|
"eval_runtime": 1.8975, |
|
"eval_samples_per_second": 83.793, |
|
"eval_steps_per_second": 5.27, |
|
"step": 4117 |
|
}, |
|
{ |
|
"epoch": 366.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3283620774745941, |
|
"eval_runtime": 1.8154, |
|
"eval_samples_per_second": 87.585, |
|
"eval_steps_per_second": 5.509, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2716998755931854, |
|
"eval_runtime": 1.7785, |
|
"eval_samples_per_second": 89.401, |
|
"eval_steps_per_second": 5.623, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 368.89, |
|
"grad_norm": 1.529534935951233, |
|
"learning_rate": 1.7613636363636366e-05, |
|
"loss": 0.0207, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 368.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.26920509338378906, |
|
"eval_runtime": 1.777, |
|
"eval_samples_per_second": 89.477, |
|
"eval_steps_per_second": 5.627, |
|
"step": 4151 |
|
}, |
|
{ |
|
"epoch": 369.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2673673927783966, |
|
"eval_runtime": 1.8105, |
|
"eval_samples_per_second": 87.819, |
|
"eval_steps_per_second": 5.523, |
|
"step": 4162 |
|
}, |
|
{ |
|
"epoch": 370.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.26433154940605164, |
|
"eval_runtime": 1.8098, |
|
"eval_samples_per_second": 87.857, |
|
"eval_steps_per_second": 5.526, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2969939410686493, |
|
"eval_runtime": 1.7874, |
|
"eval_samples_per_second": 88.954, |
|
"eval_steps_per_second": 5.595, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 372.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2687932550907135, |
|
"eval_runtime": 1.9292, |
|
"eval_samples_per_second": 82.418, |
|
"eval_steps_per_second": 5.184, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 373.33, |
|
"grad_norm": 0.41630563139915466, |
|
"learning_rate": 1.7424242424242425e-05, |
|
"loss": 0.0213, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 373.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2765069603919983, |
|
"eval_runtime": 1.9392, |
|
"eval_samples_per_second": 81.994, |
|
"eval_steps_per_second": 5.157, |
|
"step": 4207 |
|
}, |
|
{ |
|
"epoch": 374.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28704383969306946, |
|
"eval_runtime": 1.8427, |
|
"eval_samples_per_second": 86.287, |
|
"eval_steps_per_second": 5.427, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30059266090393066, |
|
"eval_runtime": 1.8146, |
|
"eval_samples_per_second": 87.624, |
|
"eval_steps_per_second": 5.511, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 376.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2943706512451172, |
|
"eval_runtime": 1.7941, |
|
"eval_samples_per_second": 88.625, |
|
"eval_steps_per_second": 5.574, |
|
"step": 4241 |
|
}, |
|
{ |
|
"epoch": 377.78, |
|
"grad_norm": 1.3894481658935547, |
|
"learning_rate": 1.7234848484848487e-05, |
|
"loss": 0.02, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 377.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3019978106021881, |
|
"eval_runtime": 1.8046, |
|
"eval_samples_per_second": 88.107, |
|
"eval_steps_per_second": 5.541, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 378.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3074227571487427, |
|
"eval_runtime": 1.7835, |
|
"eval_samples_per_second": 89.152, |
|
"eval_steps_per_second": 5.607, |
|
"step": 4263 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.29427269101142883, |
|
"eval_runtime": 1.8177, |
|
"eval_samples_per_second": 87.473, |
|
"eval_steps_per_second": 5.501, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 380.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.2825266420841217, |
|
"eval_runtime": 1.8911, |
|
"eval_samples_per_second": 84.077, |
|
"eval_steps_per_second": 5.288, |
|
"step": 4286 |
|
}, |
|
{ |
|
"epoch": 381.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2760971188545227, |
|
"eval_runtime": 1.9521, |
|
"eval_samples_per_second": 81.451, |
|
"eval_steps_per_second": 5.123, |
|
"step": 4297 |
|
}, |
|
{ |
|
"epoch": 382.22, |
|
"grad_norm": 0.021462175995111465, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 0.0143, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 382.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.29204800724983215, |
|
"eval_runtime": 1.9261, |
|
"eval_samples_per_second": 82.551, |
|
"eval_steps_per_second": 5.192, |
|
"step": 4308 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.29515865445137024, |
|
"eval_runtime": 1.8478, |
|
"eval_samples_per_second": 86.046, |
|
"eval_steps_per_second": 5.412, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 384.98, |
|
"eval_accuracy": 0.949685534591195, |
|
"eval_loss": 0.3164711594581604, |
|
"eval_runtime": 1.7929, |
|
"eval_samples_per_second": 88.684, |
|
"eval_steps_per_second": 5.578, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 385.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2803152799606323, |
|
"eval_runtime": 1.8039, |
|
"eval_samples_per_second": 88.141, |
|
"eval_steps_per_second": 5.543, |
|
"step": 4342 |
|
}, |
|
{ |
|
"epoch": 386.67, |
|
"grad_norm": 0.4159376621246338, |
|
"learning_rate": 1.6856060606060605e-05, |
|
"loss": 0.0196, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 386.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.28756093978881836, |
|
"eval_runtime": 1.7845, |
|
"eval_samples_per_second": 89.1, |
|
"eval_steps_per_second": 5.604, |
|
"step": 4353 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2759377956390381, |
|
"eval_runtime": 1.8441, |
|
"eval_samples_per_second": 86.221, |
|
"eval_steps_per_second": 5.423, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 388.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2701479494571686, |
|
"eval_runtime": 1.7826, |
|
"eval_samples_per_second": 89.198, |
|
"eval_steps_per_second": 5.61, |
|
"step": 4376 |
|
}, |
|
{ |
|
"epoch": 389.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2951464354991913, |
|
"eval_runtime": 1.9039, |
|
"eval_samples_per_second": 83.514, |
|
"eval_steps_per_second": 5.252, |
|
"step": 4387 |
|
}, |
|
{ |
|
"epoch": 390.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2950435280799866, |
|
"eval_runtime": 1.8731, |
|
"eval_samples_per_second": 84.885, |
|
"eval_steps_per_second": 5.339, |
|
"step": 4398 |
|
}, |
|
{ |
|
"epoch": 391.11, |
|
"grad_norm": 0.057938866317272186, |
|
"learning_rate": 1.6670454545454544e-05, |
|
"loss": 0.0234, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.29603102803230286, |
|
"eval_runtime": 1.9831, |
|
"eval_samples_per_second": 80.176, |
|
"eval_steps_per_second": 5.043, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 392.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3337320387363434, |
|
"eval_runtime": 1.847, |
|
"eval_samples_per_second": 86.084, |
|
"eval_steps_per_second": 5.414, |
|
"step": 4421 |
|
}, |
|
{ |
|
"epoch": 393.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33828112483024597, |
|
"eval_runtime": 1.8496, |
|
"eval_samples_per_second": 85.964, |
|
"eval_steps_per_second": 5.407, |
|
"step": 4432 |
|
}, |
|
{ |
|
"epoch": 394.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3078320026397705, |
|
"eval_runtime": 1.8258, |
|
"eval_samples_per_second": 87.084, |
|
"eval_steps_per_second": 5.477, |
|
"step": 4443 |
|
}, |
|
{ |
|
"epoch": 395.56, |
|
"grad_norm": 0.39662787318229675, |
|
"learning_rate": 1.6481060606060606e-05, |
|
"loss": 0.0161, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3138676881790161, |
|
"eval_runtime": 1.7627, |
|
"eval_samples_per_second": 90.205, |
|
"eval_steps_per_second": 5.673, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 396.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31875431537628174, |
|
"eval_runtime": 1.7584, |
|
"eval_samples_per_second": 90.422, |
|
"eval_steps_per_second": 5.687, |
|
"step": 4466 |
|
}, |
|
{ |
|
"epoch": 397.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3307281732559204, |
|
"eval_runtime": 1.7976, |
|
"eval_samples_per_second": 88.452, |
|
"eval_steps_per_second": 5.563, |
|
"step": 4477 |
|
}, |
|
{ |
|
"epoch": 398.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31634414196014404, |
|
"eval_runtime": 1.8551, |
|
"eval_samples_per_second": 85.711, |
|
"eval_steps_per_second": 5.391, |
|
"step": 4488 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"grad_norm": 0.7240819931030273, |
|
"learning_rate": 1.6291666666666665e-05, |
|
"loss": 0.0162, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3018243908882141, |
|
"eval_runtime": 1.9085, |
|
"eval_samples_per_second": 83.313, |
|
"eval_steps_per_second": 5.24, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 400.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2813258469104767, |
|
"eval_runtime": 2.0304, |
|
"eval_samples_per_second": 78.308, |
|
"eval_steps_per_second": 4.925, |
|
"step": 4511 |
|
}, |
|
{ |
|
"epoch": 401.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3019176125526428, |
|
"eval_runtime": 1.8259, |
|
"eval_samples_per_second": 87.08, |
|
"eval_steps_per_second": 5.477, |
|
"step": 4522 |
|
}, |
|
{ |
|
"epoch": 402.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.28099265694618225, |
|
"eval_runtime": 1.7238, |
|
"eval_samples_per_second": 92.239, |
|
"eval_steps_per_second": 5.801, |
|
"step": 4533 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2745566666126251, |
|
"eval_runtime": 1.7857, |
|
"eval_samples_per_second": 89.039, |
|
"eval_steps_per_second": 5.6, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 404.44, |
|
"grad_norm": 0.8649039268493652, |
|
"learning_rate": 1.6102272727272727e-05, |
|
"loss": 0.023, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 404.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2850847542285919, |
|
"eval_runtime": 1.8274, |
|
"eval_samples_per_second": 87.011, |
|
"eval_steps_per_second": 5.472, |
|
"step": 4556 |
|
}, |
|
{ |
|
"epoch": 405.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.31582126021385193, |
|
"eval_runtime": 1.742, |
|
"eval_samples_per_second": 91.274, |
|
"eval_steps_per_second": 5.741, |
|
"step": 4567 |
|
}, |
|
{ |
|
"epoch": 406.93, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.34668126702308655, |
|
"eval_runtime": 1.8815, |
|
"eval_samples_per_second": 84.506, |
|
"eval_steps_per_second": 5.315, |
|
"step": 4578 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.34958958625793457, |
|
"eval_runtime": 2.0856, |
|
"eval_samples_per_second": 76.236, |
|
"eval_steps_per_second": 4.795, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 408.89, |
|
"grad_norm": 1.8184185028076172, |
|
"learning_rate": 1.591287878787879e-05, |
|
"loss": 0.0164, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 408.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.33241006731987, |
|
"eval_runtime": 2.1278, |
|
"eval_samples_per_second": 74.727, |
|
"eval_steps_per_second": 4.7, |
|
"step": 4601 |
|
}, |
|
{ |
|
"epoch": 409.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32462239265441895, |
|
"eval_runtime": 2.221, |
|
"eval_samples_per_second": 71.589, |
|
"eval_steps_per_second": 4.502, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 410.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3765309154987335, |
|
"eval_runtime": 2.0273, |
|
"eval_samples_per_second": 78.43, |
|
"eval_steps_per_second": 4.933, |
|
"step": 4623 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3543161451816559, |
|
"eval_runtime": 2.0351, |
|
"eval_samples_per_second": 78.129, |
|
"eval_steps_per_second": 4.914, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 412.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3280029594898224, |
|
"eval_runtime": 2.1541, |
|
"eval_samples_per_second": 73.813, |
|
"eval_steps_per_second": 4.642, |
|
"step": 4646 |
|
}, |
|
{ |
|
"epoch": 413.33, |
|
"grad_norm": 1.7262401580810547, |
|
"learning_rate": 1.572348484848485e-05, |
|
"loss": 0.0189, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 413.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30754944682121277, |
|
"eval_runtime": 1.987, |
|
"eval_samples_per_second": 80.018, |
|
"eval_steps_per_second": 5.033, |
|
"step": 4657 |
|
}, |
|
{ |
|
"epoch": 414.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3012823462486267, |
|
"eval_runtime": 2.084, |
|
"eval_samples_per_second": 76.297, |
|
"eval_steps_per_second": 4.799, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3047963082790375, |
|
"eval_runtime": 2.1147, |
|
"eval_samples_per_second": 75.187, |
|
"eval_steps_per_second": 4.729, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 416.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.297464519739151, |
|
"eval_runtime": 2.0439, |
|
"eval_samples_per_second": 77.791, |
|
"eval_steps_per_second": 4.893, |
|
"step": 4691 |
|
}, |
|
{ |
|
"epoch": 417.78, |
|
"grad_norm": 0.03005032427608967, |
|
"learning_rate": 1.553409090909091e-05, |
|
"loss": 0.018, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 417.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30111947655677795, |
|
"eval_runtime": 2.0823, |
|
"eval_samples_per_second": 76.356, |
|
"eval_steps_per_second": 4.802, |
|
"step": 4702 |
|
}, |
|
{ |
|
"epoch": 418.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3059113621711731, |
|
"eval_runtime": 2.0164, |
|
"eval_samples_per_second": 78.853, |
|
"eval_steps_per_second": 4.959, |
|
"step": 4713 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3002815544605255, |
|
"eval_runtime": 2.0599, |
|
"eval_samples_per_second": 77.187, |
|
"eval_steps_per_second": 4.855, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 420.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.2898853123188019, |
|
"eval_runtime": 2.1653, |
|
"eval_samples_per_second": 73.43, |
|
"eval_steps_per_second": 4.618, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 421.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.27394920587539673, |
|
"eval_runtime": 1.976, |
|
"eval_samples_per_second": 80.464, |
|
"eval_steps_per_second": 5.061, |
|
"step": 4747 |
|
}, |
|
{ |
|
"epoch": 422.22, |
|
"grad_norm": 0.05734672769904137, |
|
"learning_rate": 1.534469696969697e-05, |
|
"loss": 0.014, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 422.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.28232210874557495, |
|
"eval_runtime": 2.0336, |
|
"eval_samples_per_second": 78.186, |
|
"eval_steps_per_second": 4.917, |
|
"step": 4758 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3002234697341919, |
|
"eval_runtime": 2.1015, |
|
"eval_samples_per_second": 75.661, |
|
"eval_steps_per_second": 4.759, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 424.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31039535999298096, |
|
"eval_runtime": 2.0591, |
|
"eval_samples_per_second": 77.218, |
|
"eval_steps_per_second": 4.856, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 425.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2992786467075348, |
|
"eval_runtime": 2.219, |
|
"eval_samples_per_second": 71.652, |
|
"eval_steps_per_second": 4.506, |
|
"step": 4792 |
|
}, |
|
{ |
|
"epoch": 426.67, |
|
"grad_norm": 0.20316560566425323, |
|
"learning_rate": 1.5155303030303031e-05, |
|
"loss": 0.0161, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 426.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.28384503722190857, |
|
"eval_runtime": 2.3212, |
|
"eval_samples_per_second": 68.5, |
|
"eval_steps_per_second": 4.308, |
|
"step": 4803 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.303459495306015, |
|
"eval_runtime": 2.0531, |
|
"eval_samples_per_second": 77.442, |
|
"eval_steps_per_second": 4.871, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 428.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31719303131103516, |
|
"eval_runtime": 2.0034, |
|
"eval_samples_per_second": 79.365, |
|
"eval_steps_per_second": 4.992, |
|
"step": 4826 |
|
}, |
|
{ |
|
"epoch": 429.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2884739935398102, |
|
"eval_runtime": 2.1854, |
|
"eval_samples_per_second": 72.756, |
|
"eval_steps_per_second": 4.576, |
|
"step": 4837 |
|
}, |
|
{ |
|
"epoch": 430.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2915368676185608, |
|
"eval_runtime": 2.0672, |
|
"eval_samples_per_second": 76.914, |
|
"eval_steps_per_second": 4.837, |
|
"step": 4848 |
|
}, |
|
{ |
|
"epoch": 431.11, |
|
"grad_norm": 0.1926555037498474, |
|
"learning_rate": 1.496590909090909e-05, |
|
"loss": 0.0181, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32380226254463196, |
|
"eval_runtime": 2.0107, |
|
"eval_samples_per_second": 79.076, |
|
"eval_steps_per_second": 4.973, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 432.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3051411807537079, |
|
"eval_runtime": 2.0979, |
|
"eval_samples_per_second": 75.789, |
|
"eval_steps_per_second": 4.767, |
|
"step": 4871 |
|
}, |
|
{ |
|
"epoch": 433.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2746570408344269, |
|
"eval_runtime": 2.0978, |
|
"eval_samples_per_second": 75.795, |
|
"eval_steps_per_second": 4.767, |
|
"step": 4882 |
|
}, |
|
{ |
|
"epoch": 434.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.27779048681259155, |
|
"eval_runtime": 2.1681, |
|
"eval_samples_per_second": 73.336, |
|
"eval_steps_per_second": 4.612, |
|
"step": 4893 |
|
}, |
|
{ |
|
"epoch": 435.56, |
|
"grad_norm": 0.2639506757259369, |
|
"learning_rate": 1.4776515151515152e-05, |
|
"loss": 0.0152, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3142688274383545, |
|
"eval_runtime": 2.0074, |
|
"eval_samples_per_second": 79.208, |
|
"eval_steps_per_second": 4.982, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 436.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.29534852504730225, |
|
"eval_runtime": 2.0119, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 4.97, |
|
"step": 4916 |
|
}, |
|
{ |
|
"epoch": 437.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2987271249294281, |
|
"eval_runtime": 2.0466, |
|
"eval_samples_per_second": 77.691, |
|
"eval_steps_per_second": 4.886, |
|
"step": 4927 |
|
}, |
|
{ |
|
"epoch": 438.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3240003287792206, |
|
"eval_runtime": 2.1303, |
|
"eval_samples_per_second": 74.638, |
|
"eval_steps_per_second": 4.694, |
|
"step": 4938 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"grad_norm": 1.0273933410644531, |
|
"learning_rate": 1.4587121212121213e-05, |
|
"loss": 0.0233, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2931964099407196, |
|
"eval_runtime": 2.0028, |
|
"eval_samples_per_second": 79.388, |
|
"eval_steps_per_second": 4.993, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 440.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.30667683482170105, |
|
"eval_runtime": 2.1028, |
|
"eval_samples_per_second": 75.614, |
|
"eval_steps_per_second": 4.756, |
|
"step": 4961 |
|
}, |
|
{ |
|
"epoch": 441.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31695908308029175, |
|
"eval_runtime": 2.1429, |
|
"eval_samples_per_second": 74.198, |
|
"eval_steps_per_second": 4.667, |
|
"step": 4972 |
|
}, |
|
{ |
|
"epoch": 442.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33484575152397156, |
|
"eval_runtime": 2.2487, |
|
"eval_samples_per_second": 70.709, |
|
"eval_steps_per_second": 4.447, |
|
"step": 4983 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3350779116153717, |
|
"eval_runtime": 2.2089, |
|
"eval_samples_per_second": 71.981, |
|
"eval_steps_per_second": 4.527, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 444.44, |
|
"grad_norm": 0.05571739375591278, |
|
"learning_rate": 1.4397727272727274e-05, |
|
"loss": 0.0134, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 444.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.33779439330101013, |
|
"eval_runtime": 2.155, |
|
"eval_samples_per_second": 73.781, |
|
"eval_steps_per_second": 4.64, |
|
"step": 5006 |
|
}, |
|
{ |
|
"epoch": 445.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.32037729024887085, |
|
"eval_runtime": 2.1415, |
|
"eval_samples_per_second": 74.247, |
|
"eval_steps_per_second": 4.67, |
|
"step": 5017 |
|
}, |
|
{ |
|
"epoch": 446.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30960965156555176, |
|
"eval_runtime": 2.0664, |
|
"eval_samples_per_second": 76.947, |
|
"eval_steps_per_second": 4.839, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3135194480419159, |
|
"eval_runtime": 2.1609, |
|
"eval_samples_per_second": 73.581, |
|
"eval_steps_per_second": 4.628, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 448.89, |
|
"grad_norm": 1.2499555349349976, |
|
"learning_rate": 1.4208333333333333e-05, |
|
"loss": 0.0185, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 448.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32047778367996216, |
|
"eval_runtime": 2.0116, |
|
"eval_samples_per_second": 79.04, |
|
"eval_steps_per_second": 4.971, |
|
"step": 5051 |
|
}, |
|
{ |
|
"epoch": 449.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3151703476905823, |
|
"eval_runtime": 1.9982, |
|
"eval_samples_per_second": 79.571, |
|
"eval_steps_per_second": 5.004, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 450.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.32720035314559937, |
|
"eval_runtime": 2.0554, |
|
"eval_samples_per_second": 77.357, |
|
"eval_steps_per_second": 4.865, |
|
"step": 5073 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.31637299060821533, |
|
"eval_runtime": 2.0655, |
|
"eval_samples_per_second": 76.978, |
|
"eval_steps_per_second": 4.841, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 452.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3297300934791565, |
|
"eval_runtime": 2.0194, |
|
"eval_samples_per_second": 78.737, |
|
"eval_steps_per_second": 4.952, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 453.33, |
|
"grad_norm": 0.4623982012271881, |
|
"learning_rate": 1.4018939393939395e-05, |
|
"loss": 0.0149, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 453.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3299054801464081, |
|
"eval_runtime": 2.027, |
|
"eval_samples_per_second": 78.441, |
|
"eval_steps_per_second": 4.933, |
|
"step": 5107 |
|
}, |
|
{ |
|
"epoch": 454.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34270188212394714, |
|
"eval_runtime": 2.0462, |
|
"eval_samples_per_second": 77.705, |
|
"eval_steps_per_second": 4.887, |
|
"step": 5118 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3775523006916046, |
|
"eval_runtime": 2.0532, |
|
"eval_samples_per_second": 77.442, |
|
"eval_steps_per_second": 4.871, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 456.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.376447468996048, |
|
"eval_runtime": 2.0839, |
|
"eval_samples_per_second": 76.298, |
|
"eval_steps_per_second": 4.799, |
|
"step": 5141 |
|
}, |
|
{ |
|
"epoch": 457.78, |
|
"grad_norm": 0.23284748196601868, |
|
"learning_rate": 1.3829545454545456e-05, |
|
"loss": 0.0099, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 457.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3852477967739105, |
|
"eval_runtime": 2.0765, |
|
"eval_samples_per_second": 76.569, |
|
"eval_steps_per_second": 4.816, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 458.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.35552406311035156, |
|
"eval_runtime": 2.0834, |
|
"eval_samples_per_second": 76.318, |
|
"eval_steps_per_second": 4.8, |
|
"step": 5163 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3497180640697479, |
|
"eval_runtime": 2.1727, |
|
"eval_samples_per_second": 73.182, |
|
"eval_steps_per_second": 4.603, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 460.98, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.3959099054336548, |
|
"eval_runtime": 2.2063, |
|
"eval_samples_per_second": 72.066, |
|
"eval_steps_per_second": 4.532, |
|
"step": 5186 |
|
}, |
|
{ |
|
"epoch": 461.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3428646922111511, |
|
"eval_runtime": 2.0667, |
|
"eval_samples_per_second": 76.934, |
|
"eval_steps_per_second": 4.839, |
|
"step": 5197 |
|
}, |
|
{ |
|
"epoch": 462.22, |
|
"grad_norm": 0.01973637193441391, |
|
"learning_rate": 1.3640151515151516e-05, |
|
"loss": 0.0123, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 462.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3277600407600403, |
|
"eval_runtime": 2.0262, |
|
"eval_samples_per_second": 78.472, |
|
"eval_steps_per_second": 4.935, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.307450532913208, |
|
"eval_runtime": 2.1318, |
|
"eval_samples_per_second": 74.586, |
|
"eval_steps_per_second": 4.691, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 464.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.30191025137901306, |
|
"eval_runtime": 2.0236, |
|
"eval_samples_per_second": 78.574, |
|
"eval_steps_per_second": 4.942, |
|
"step": 5231 |
|
}, |
|
{ |
|
"epoch": 465.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3069049119949341, |
|
"eval_runtime": 1.9794, |
|
"eval_samples_per_second": 80.326, |
|
"eval_steps_per_second": 5.052, |
|
"step": 5242 |
|
}, |
|
{ |
|
"epoch": 466.67, |
|
"grad_norm": 1.7077068090438843, |
|
"learning_rate": 1.3450757575757575e-05, |
|
"loss": 0.0169, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 466.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3036327362060547, |
|
"eval_runtime": 2.2515, |
|
"eval_samples_per_second": 70.62, |
|
"eval_steps_per_second": 4.442, |
|
"step": 5253 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32558977603912354, |
|
"eval_runtime": 2.0075, |
|
"eval_samples_per_second": 79.202, |
|
"eval_steps_per_second": 4.981, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 468.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3241185247898102, |
|
"eval_runtime": 2.079, |
|
"eval_samples_per_second": 76.48, |
|
"eval_steps_per_second": 4.81, |
|
"step": 5276 |
|
}, |
|
{ |
|
"epoch": 469.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.32361313700675964, |
|
"eval_runtime": 2.2276, |
|
"eval_samples_per_second": 71.378, |
|
"eval_steps_per_second": 4.489, |
|
"step": 5287 |
|
}, |
|
{ |
|
"epoch": 470.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.32213094830513, |
|
"eval_runtime": 2.0555, |
|
"eval_samples_per_second": 77.353, |
|
"eval_steps_per_second": 4.865, |
|
"step": 5298 |
|
}, |
|
{ |
|
"epoch": 471.11, |
|
"grad_norm": 2.2473459243774414, |
|
"learning_rate": 1.3261363636363636e-05, |
|
"loss": 0.0114, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2958085536956787, |
|
"eval_runtime": 2.1042, |
|
"eval_samples_per_second": 75.563, |
|
"eval_steps_per_second": 4.752, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 472.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.2994365692138672, |
|
"eval_runtime": 2.0536, |
|
"eval_samples_per_second": 77.424, |
|
"eval_steps_per_second": 4.869, |
|
"step": 5321 |
|
}, |
|
{ |
|
"epoch": 473.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.29937687516212463, |
|
"eval_runtime": 2.0807, |
|
"eval_samples_per_second": 76.417, |
|
"eval_steps_per_second": 4.806, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 474.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.4239935576915741, |
|
"eval_runtime": 2.0885, |
|
"eval_samples_per_second": 76.13, |
|
"eval_steps_per_second": 4.788, |
|
"step": 5343 |
|
}, |
|
{ |
|
"epoch": 475.56, |
|
"grad_norm": 0.01770736277103424, |
|
"learning_rate": 1.3071969696969698e-05, |
|
"loss": 0.0148, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.32858237624168396, |
|
"eval_runtime": 2.0527, |
|
"eval_samples_per_second": 77.46, |
|
"eval_steps_per_second": 4.872, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 476.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2954269051551819, |
|
"eval_runtime": 2.1594, |
|
"eval_samples_per_second": 73.63, |
|
"eval_steps_per_second": 4.631, |
|
"step": 5366 |
|
}, |
|
{ |
|
"epoch": 477.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.29593905806541443, |
|
"eval_runtime": 2.1654, |
|
"eval_samples_per_second": 73.426, |
|
"eval_steps_per_second": 4.618, |
|
"step": 5377 |
|
}, |
|
{ |
|
"epoch": 478.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2928108274936676, |
|
"eval_runtime": 2.2433, |
|
"eval_samples_per_second": 70.877, |
|
"eval_steps_per_second": 4.458, |
|
"step": 5388 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"grad_norm": 1.7406607866287231, |
|
"learning_rate": 1.2882575757575757e-05, |
|
"loss": 0.0171, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.2977100610733032, |
|
"eval_runtime": 2.0243, |
|
"eval_samples_per_second": 78.544, |
|
"eval_steps_per_second": 4.94, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 480.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.30747535824775696, |
|
"eval_runtime": 2.0298, |
|
"eval_samples_per_second": 78.334, |
|
"eval_steps_per_second": 4.927, |
|
"step": 5411 |
|
}, |
|
{ |
|
"epoch": 481.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3572753071784973, |
|
"eval_runtime": 2.0524, |
|
"eval_samples_per_second": 77.47, |
|
"eval_steps_per_second": 4.872, |
|
"step": 5422 |
|
}, |
|
{ |
|
"epoch": 482.93, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3878822326660156, |
|
"eval_runtime": 2.0986, |
|
"eval_samples_per_second": 75.766, |
|
"eval_steps_per_second": 4.765, |
|
"step": 5433 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.3886529803276062, |
|
"eval_runtime": 2.078, |
|
"eval_samples_per_second": 76.517, |
|
"eval_steps_per_second": 4.812, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 484.44, |
|
"grad_norm": 0.06283226609230042, |
|
"learning_rate": 1.2693181818181818e-05, |
|
"loss": 0.0166, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 484.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3698625862598419, |
|
"eval_runtime": 2.094, |
|
"eval_samples_per_second": 75.932, |
|
"eval_steps_per_second": 4.776, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 485.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.351385235786438, |
|
"eval_runtime": 2.0668, |
|
"eval_samples_per_second": 76.93, |
|
"eval_steps_per_second": 4.838, |
|
"step": 5467 |
|
}, |
|
{ |
|
"epoch": 486.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34395086765289307, |
|
"eval_runtime": 2.12, |
|
"eval_samples_per_second": 74.999, |
|
"eval_steps_per_second": 4.717, |
|
"step": 5478 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.31205570697784424, |
|
"eval_runtime": 2.2336, |
|
"eval_samples_per_second": 71.184, |
|
"eval_steps_per_second": 4.477, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 488.89, |
|
"grad_norm": 1.9271873235702515, |
|
"learning_rate": 1.2503787878787879e-05, |
|
"loss": 0.0169, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 488.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3185611069202423, |
|
"eval_runtime": 1.9966, |
|
"eval_samples_per_second": 79.635, |
|
"eval_steps_per_second": 5.008, |
|
"step": 5501 |
|
}, |
|
{ |
|
"epoch": 489.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3383605182170868, |
|
"eval_runtime": 2.0874, |
|
"eval_samples_per_second": 76.17, |
|
"eval_steps_per_second": 4.791, |
|
"step": 5512 |
|
}, |
|
{ |
|
"epoch": 490.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.35870400071144104, |
|
"eval_runtime": 2.2491, |
|
"eval_samples_per_second": 70.694, |
|
"eval_steps_per_second": 4.446, |
|
"step": 5523 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3265625238418579, |
|
"eval_runtime": 2.0134, |
|
"eval_samples_per_second": 78.971, |
|
"eval_steps_per_second": 4.967, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 492.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3273981213569641, |
|
"eval_runtime": 2.0972, |
|
"eval_samples_per_second": 75.815, |
|
"eval_steps_per_second": 4.768, |
|
"step": 5546 |
|
}, |
|
{ |
|
"epoch": 493.33, |
|
"grad_norm": 0.3140685260295868, |
|
"learning_rate": 1.2314393939393941e-05, |
|
"loss": 0.0162, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 493.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3433980345726013, |
|
"eval_runtime": 4.4757, |
|
"eval_samples_per_second": 35.525, |
|
"eval_steps_per_second": 2.234, |
|
"step": 5557 |
|
}, |
|
{ |
|
"epoch": 494.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3295518755912781, |
|
"eval_runtime": 2.0317, |
|
"eval_samples_per_second": 78.259, |
|
"eval_steps_per_second": 4.922, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.31786414980888367, |
|
"eval_runtime": 2.1435, |
|
"eval_samples_per_second": 74.179, |
|
"eval_steps_per_second": 4.665, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 496.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32228994369506836, |
|
"eval_runtime": 2.0036, |
|
"eval_samples_per_second": 79.357, |
|
"eval_steps_per_second": 4.991, |
|
"step": 5591 |
|
}, |
|
{ |
|
"epoch": 497.78, |
|
"grad_norm": 1.7739616632461548, |
|
"learning_rate": 1.2125e-05, |
|
"loss": 0.0128, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 497.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3525673747062683, |
|
"eval_runtime": 2.0848, |
|
"eval_samples_per_second": 76.266, |
|
"eval_steps_per_second": 4.797, |
|
"step": 5602 |
|
}, |
|
{ |
|
"epoch": 498.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3345227539539337, |
|
"eval_runtime": 2.0597, |
|
"eval_samples_per_second": 77.194, |
|
"eval_steps_per_second": 4.855, |
|
"step": 5613 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3081194758415222, |
|
"eval_runtime": 2.14, |
|
"eval_samples_per_second": 74.297, |
|
"eval_steps_per_second": 4.673, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 500.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3136290907859802, |
|
"eval_runtime": 2.0866, |
|
"eval_samples_per_second": 76.201, |
|
"eval_steps_per_second": 4.793, |
|
"step": 5636 |
|
}, |
|
{ |
|
"epoch": 501.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31603533029556274, |
|
"eval_runtime": 2.272, |
|
"eval_samples_per_second": 69.983, |
|
"eval_steps_per_second": 4.401, |
|
"step": 5647 |
|
}, |
|
{ |
|
"epoch": 502.22, |
|
"grad_norm": 0.024508927017450333, |
|
"learning_rate": 1.193560606060606e-05, |
|
"loss": 0.0089, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 502.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3217502236366272, |
|
"eval_runtime": 2.2568, |
|
"eval_samples_per_second": 70.454, |
|
"eval_steps_per_second": 4.431, |
|
"step": 5658 |
|
}, |
|
{ |
|
"epoch": 504.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3330020606517792, |
|
"eval_runtime": 2.1528, |
|
"eval_samples_per_second": 73.857, |
|
"eval_steps_per_second": 4.645, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 504.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3610976040363312, |
|
"eval_runtime": 2.1981, |
|
"eval_samples_per_second": 72.335, |
|
"eval_steps_per_second": 4.549, |
|
"step": 5681 |
|
}, |
|
{ |
|
"epoch": 505.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3819771111011505, |
|
"eval_runtime": 2.0605, |
|
"eval_samples_per_second": 77.167, |
|
"eval_steps_per_second": 4.853, |
|
"step": 5692 |
|
}, |
|
{ |
|
"epoch": 506.67, |
|
"grad_norm": 0.13250546157360077, |
|
"learning_rate": 1.1746212121212121e-05, |
|
"loss": 0.0168, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 506.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3471725881099701, |
|
"eval_runtime": 2.0816, |
|
"eval_samples_per_second": 76.384, |
|
"eval_steps_per_second": 4.804, |
|
"step": 5703 |
|
}, |
|
{ |
|
"epoch": 508.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3075188100337982, |
|
"eval_runtime": 2.1057, |
|
"eval_samples_per_second": 75.51, |
|
"eval_steps_per_second": 4.749, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 508.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.30466988682746887, |
|
"eval_runtime": 2.1027, |
|
"eval_samples_per_second": 75.617, |
|
"eval_steps_per_second": 4.756, |
|
"step": 5726 |
|
}, |
|
{ |
|
"epoch": 509.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.314418226480484, |
|
"eval_runtime": 2.1578, |
|
"eval_samples_per_second": 73.686, |
|
"eval_steps_per_second": 4.634, |
|
"step": 5737 |
|
}, |
|
{ |
|
"epoch": 510.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3144315183162689, |
|
"eval_runtime": 2.2895, |
|
"eval_samples_per_second": 69.447, |
|
"eval_steps_per_second": 4.368, |
|
"step": 5748 |
|
}, |
|
{ |
|
"epoch": 511.11, |
|
"grad_norm": 1.371584415435791, |
|
"learning_rate": 1.1556818181818184e-05, |
|
"loss": 0.0143, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 512.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.30977222323417664, |
|
"eval_runtime": 2.0905, |
|
"eval_samples_per_second": 76.059, |
|
"eval_steps_per_second": 4.784, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 512.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.31324854493141174, |
|
"eval_runtime": 2.2018, |
|
"eval_samples_per_second": 72.212, |
|
"eval_steps_per_second": 4.542, |
|
"step": 5771 |
|
}, |
|
{ |
|
"epoch": 513.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3324536979198456, |
|
"eval_runtime": 2.096, |
|
"eval_samples_per_second": 75.859, |
|
"eval_steps_per_second": 4.771, |
|
"step": 5782 |
|
}, |
|
{ |
|
"epoch": 514.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32093632221221924, |
|
"eval_runtime": 2.049, |
|
"eval_samples_per_second": 77.599, |
|
"eval_steps_per_second": 4.88, |
|
"step": 5793 |
|
}, |
|
{ |
|
"epoch": 515.56, |
|
"grad_norm": 1.4226562976837158, |
|
"learning_rate": 1.1367424242424243e-05, |
|
"loss": 0.014, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 516.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3191947937011719, |
|
"eval_runtime": 2.0898, |
|
"eval_samples_per_second": 76.083, |
|
"eval_steps_per_second": 4.785, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 516.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.311814546585083, |
|
"eval_runtime": 2.0315, |
|
"eval_samples_per_second": 78.269, |
|
"eval_steps_per_second": 4.923, |
|
"step": 5816 |
|
}, |
|
{ |
|
"epoch": 517.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.31416967511177063, |
|
"eval_runtime": 2.0132, |
|
"eval_samples_per_second": 78.978, |
|
"eval_steps_per_second": 4.967, |
|
"step": 5827 |
|
}, |
|
{ |
|
"epoch": 518.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3255424201488495, |
|
"eval_runtime": 2.4361, |
|
"eval_samples_per_second": 65.269, |
|
"eval_steps_per_second": 4.105, |
|
"step": 5838 |
|
}, |
|
{ |
|
"epoch": 520.0, |
|
"grad_norm": 0.1621515154838562, |
|
"learning_rate": 1.1178030303030303e-05, |
|
"loss": 0.0111, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 520.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.32208895683288574, |
|
"eval_runtime": 2.0821, |
|
"eval_samples_per_second": 76.364, |
|
"eval_steps_per_second": 4.803, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 520.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3211723566055298, |
|
"eval_runtime": 2.0312, |
|
"eval_samples_per_second": 78.28, |
|
"eval_steps_per_second": 4.923, |
|
"step": 5861 |
|
}, |
|
{ |
|
"epoch": 521.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.32905757427215576, |
|
"eval_runtime": 2.0294, |
|
"eval_samples_per_second": 78.349, |
|
"eval_steps_per_second": 4.928, |
|
"step": 5872 |
|
}, |
|
{ |
|
"epoch": 522.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.33144110441207886, |
|
"eval_runtime": 2.032, |
|
"eval_samples_per_second": 78.249, |
|
"eval_steps_per_second": 4.921, |
|
"step": 5883 |
|
}, |
|
{ |
|
"epoch": 524.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3268250823020935, |
|
"eval_runtime": 2.0687, |
|
"eval_samples_per_second": 76.859, |
|
"eval_steps_per_second": 4.834, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 524.44, |
|
"grad_norm": 0.008243849501013756, |
|
"learning_rate": 1.0988636363636364e-05, |
|
"loss": 0.0107, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 524.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3351696729660034, |
|
"eval_runtime": 2.155, |
|
"eval_samples_per_second": 73.782, |
|
"eval_steps_per_second": 4.64, |
|
"step": 5906 |
|
}, |
|
{ |
|
"epoch": 525.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34242841601371765, |
|
"eval_runtime": 2.0063, |
|
"eval_samples_per_second": 79.249, |
|
"eval_steps_per_second": 4.984, |
|
"step": 5917 |
|
}, |
|
{ |
|
"epoch": 526.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33888906240463257, |
|
"eval_runtime": 2.2365, |
|
"eval_samples_per_second": 71.093, |
|
"eval_steps_per_second": 4.471, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 528.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3547358810901642, |
|
"eval_runtime": 2.0755, |
|
"eval_samples_per_second": 76.609, |
|
"eval_steps_per_second": 4.818, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 528.89, |
|
"grad_norm": 0.47511938214302063, |
|
"learning_rate": 1.0799242424242423e-05, |
|
"loss": 0.01, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 528.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.34747716784477234, |
|
"eval_runtime": 2.0823, |
|
"eval_samples_per_second": 76.358, |
|
"eval_steps_per_second": 4.802, |
|
"step": 5951 |
|
}, |
|
{ |
|
"epoch": 529.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35945838689804077, |
|
"eval_runtime": 2.0524, |
|
"eval_samples_per_second": 77.469, |
|
"eval_steps_per_second": 4.872, |
|
"step": 5962 |
|
}, |
|
{ |
|
"epoch": 530.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3673442602157593, |
|
"eval_runtime": 2.0276, |
|
"eval_samples_per_second": 78.419, |
|
"eval_steps_per_second": 4.932, |
|
"step": 5973 |
|
}, |
|
{ |
|
"epoch": 532.0, |
|
"eval_accuracy": 0.9119496855345912, |
|
"eval_loss": 0.41652363538742065, |
|
"eval_runtime": 2.0573, |
|
"eval_samples_per_second": 77.285, |
|
"eval_steps_per_second": 4.861, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 532.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.42472416162490845, |
|
"eval_runtime": 2.1003, |
|
"eval_samples_per_second": 75.704, |
|
"eval_steps_per_second": 4.761, |
|
"step": 5996 |
|
}, |
|
{ |
|
"epoch": 533.33, |
|
"grad_norm": 0.15851238369941711, |
|
"learning_rate": 1.0609848484848485e-05, |
|
"loss": 0.0126, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 533.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.4061521589756012, |
|
"eval_runtime": 2.0889, |
|
"eval_samples_per_second": 76.116, |
|
"eval_steps_per_second": 4.787, |
|
"step": 6007 |
|
}, |
|
{ |
|
"epoch": 534.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3752112090587616, |
|
"eval_runtime": 2.0476, |
|
"eval_samples_per_second": 77.651, |
|
"eval_steps_per_second": 4.884, |
|
"step": 6018 |
|
}, |
|
{ |
|
"epoch": 536.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.35743284225463867, |
|
"eval_runtime": 2.2159, |
|
"eval_samples_per_second": 71.753, |
|
"eval_steps_per_second": 4.513, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 536.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3824201226234436, |
|
"eval_runtime": 2.0455, |
|
"eval_samples_per_second": 77.732, |
|
"eval_steps_per_second": 4.889, |
|
"step": 6041 |
|
}, |
|
{ |
|
"epoch": 537.78, |
|
"grad_norm": 0.0922364741563797, |
|
"learning_rate": 1.0420454545454546e-05, |
|
"loss": 0.0126, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 537.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3730430006980896, |
|
"eval_runtime": 2.1192, |
|
"eval_samples_per_second": 75.028, |
|
"eval_steps_per_second": 4.719, |
|
"step": 6052 |
|
}, |
|
{ |
|
"epoch": 538.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3703514337539673, |
|
"eval_runtime": 2.2056, |
|
"eval_samples_per_second": 72.091, |
|
"eval_steps_per_second": 4.534, |
|
"step": 6063 |
|
}, |
|
{ |
|
"epoch": 540.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.38142630457878113, |
|
"eval_runtime": 2.0818, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 4.804, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 540.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3648853302001953, |
|
"eval_runtime": 2.2199, |
|
"eval_samples_per_second": 71.625, |
|
"eval_steps_per_second": 4.505, |
|
"step": 6086 |
|
}, |
|
{ |
|
"epoch": 541.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3810517489910126, |
|
"eval_runtime": 2.0826, |
|
"eval_samples_per_second": 76.345, |
|
"eval_steps_per_second": 4.802, |
|
"step": 6097 |
|
}, |
|
{ |
|
"epoch": 542.22, |
|
"grad_norm": 0.04241061210632324, |
|
"learning_rate": 1.0231060606060607e-05, |
|
"loss": 0.012, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 542.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3544082045555115, |
|
"eval_runtime": 2.08, |
|
"eval_samples_per_second": 76.442, |
|
"eval_steps_per_second": 4.808, |
|
"step": 6108 |
|
}, |
|
{ |
|
"epoch": 544.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3614555597305298, |
|
"eval_runtime": 2.2123, |
|
"eval_samples_per_second": 71.871, |
|
"eval_steps_per_second": 4.52, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 544.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35575661063194275, |
|
"eval_runtime": 2.1324, |
|
"eval_samples_per_second": 74.564, |
|
"eval_steps_per_second": 4.69, |
|
"step": 6131 |
|
}, |
|
{ |
|
"epoch": 545.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.34816914796829224, |
|
"eval_runtime": 2.0819, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 4.803, |
|
"step": 6142 |
|
}, |
|
{ |
|
"epoch": 546.67, |
|
"grad_norm": 0.5738076567649841, |
|
"learning_rate": 1.0041666666666666e-05, |
|
"loss": 0.0135, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 546.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.36677080392837524, |
|
"eval_runtime": 2.1421, |
|
"eval_samples_per_second": 74.226, |
|
"eval_steps_per_second": 4.668, |
|
"step": 6153 |
|
}, |
|
{ |
|
"epoch": 548.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34037116169929504, |
|
"eval_runtime": 2.0657, |
|
"eval_samples_per_second": 76.972, |
|
"eval_steps_per_second": 4.841, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 548.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33401021361351013, |
|
"eval_runtime": 2.0325, |
|
"eval_samples_per_second": 78.229, |
|
"eval_steps_per_second": 4.92, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 549.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3377488851547241, |
|
"eval_runtime": 2.1646, |
|
"eval_samples_per_second": 73.456, |
|
"eval_steps_per_second": 4.62, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 550.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3406839966773987, |
|
"eval_runtime": 2.1382, |
|
"eval_samples_per_second": 74.36, |
|
"eval_steps_per_second": 4.677, |
|
"step": 6198 |
|
}, |
|
{ |
|
"epoch": 551.11, |
|
"grad_norm": 0.34322044253349304, |
|
"learning_rate": 9.852272727272728e-06, |
|
"loss": 0.0101, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 552.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33890071511268616, |
|
"eval_runtime": 2.0917, |
|
"eval_samples_per_second": 76.015, |
|
"eval_steps_per_second": 4.781, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 552.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33053550124168396, |
|
"eval_runtime": 2.2779, |
|
"eval_samples_per_second": 69.8, |
|
"eval_steps_per_second": 4.39, |
|
"step": 6221 |
|
}, |
|
{ |
|
"epoch": 553.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.31986501812934875, |
|
"eval_runtime": 1.9669, |
|
"eval_samples_per_second": 80.836, |
|
"eval_steps_per_second": 5.084, |
|
"step": 6232 |
|
}, |
|
{ |
|
"epoch": 554.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33377256989479065, |
|
"eval_runtime": 2.0395, |
|
"eval_samples_per_second": 77.96, |
|
"eval_steps_per_second": 4.903, |
|
"step": 6243 |
|
}, |
|
{ |
|
"epoch": 555.56, |
|
"grad_norm": 0.1416609138250351, |
|
"learning_rate": 9.662878787878789e-06, |
|
"loss": 0.0175, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 556.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33228349685668945, |
|
"eval_runtime": 2.1542, |
|
"eval_samples_per_second": 73.811, |
|
"eval_steps_per_second": 4.642, |
|
"step": 6255 |
|
}, |
|
{ |
|
"epoch": 556.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.340250164270401, |
|
"eval_runtime": 2.0563, |
|
"eval_samples_per_second": 77.325, |
|
"eval_steps_per_second": 4.863, |
|
"step": 6266 |
|
}, |
|
{ |
|
"epoch": 557.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34735485911369324, |
|
"eval_runtime": 2.0285, |
|
"eval_samples_per_second": 78.384, |
|
"eval_steps_per_second": 4.93, |
|
"step": 6277 |
|
}, |
|
{ |
|
"epoch": 558.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34990042448043823, |
|
"eval_runtime": 2.1967, |
|
"eval_samples_per_second": 72.38, |
|
"eval_steps_per_second": 4.552, |
|
"step": 6288 |
|
}, |
|
{ |
|
"epoch": 560.0, |
|
"grad_norm": 0.09764547646045685, |
|
"learning_rate": 9.473484848484848e-06, |
|
"loss": 0.0108, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 560.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.342894971370697, |
|
"eval_runtime": 2.026, |
|
"eval_samples_per_second": 78.479, |
|
"eval_steps_per_second": 4.936, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 560.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3396158218383789, |
|
"eval_runtime": 2.3052, |
|
"eval_samples_per_second": 68.976, |
|
"eval_steps_per_second": 4.338, |
|
"step": 6311 |
|
}, |
|
{ |
|
"epoch": 561.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3467164933681488, |
|
"eval_runtime": 2.0425, |
|
"eval_samples_per_second": 77.846, |
|
"eval_steps_per_second": 4.896, |
|
"step": 6322 |
|
}, |
|
{ |
|
"epoch": 562.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3349219858646393, |
|
"eval_runtime": 2.0651, |
|
"eval_samples_per_second": 76.992, |
|
"eval_steps_per_second": 4.842, |
|
"step": 6333 |
|
}, |
|
{ |
|
"epoch": 564.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3380991518497467, |
|
"eval_runtime": 2.111, |
|
"eval_samples_per_second": 75.32, |
|
"eval_steps_per_second": 4.737, |
|
"step": 6345 |
|
}, |
|
{ |
|
"epoch": 564.44, |
|
"grad_norm": 0.021107789129018784, |
|
"learning_rate": 9.284090909090908e-06, |
|
"loss": 0.0139, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 564.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.32741737365722656, |
|
"eval_runtime": 2.1143, |
|
"eval_samples_per_second": 75.203, |
|
"eval_steps_per_second": 4.73, |
|
"step": 6356 |
|
}, |
|
{ |
|
"epoch": 565.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3318650722503662, |
|
"eval_runtime": 1.9953, |
|
"eval_samples_per_second": 79.688, |
|
"eval_steps_per_second": 5.012, |
|
"step": 6367 |
|
}, |
|
{ |
|
"epoch": 566.93, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.33214500546455383, |
|
"eval_runtime": 2.0923, |
|
"eval_samples_per_second": 75.992, |
|
"eval_steps_per_second": 4.779, |
|
"step": 6378 |
|
}, |
|
{ |
|
"epoch": 568.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3546938896179199, |
|
"eval_runtime": 2.1191, |
|
"eval_samples_per_second": 75.033, |
|
"eval_steps_per_second": 4.719, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 568.89, |
|
"grad_norm": 1.3278522491455078, |
|
"learning_rate": 9.09469696969697e-06, |
|
"loss": 0.0138, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 568.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.366202175617218, |
|
"eval_runtime": 2.0849, |
|
"eval_samples_per_second": 76.261, |
|
"eval_steps_per_second": 4.796, |
|
"step": 6401 |
|
}, |
|
{ |
|
"epoch": 569.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.34554189443588257, |
|
"eval_runtime": 2.2433, |
|
"eval_samples_per_second": 70.878, |
|
"eval_steps_per_second": 4.458, |
|
"step": 6412 |
|
}, |
|
{ |
|
"epoch": 570.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3477872908115387, |
|
"eval_runtime": 2.0921, |
|
"eval_samples_per_second": 76.0, |
|
"eval_steps_per_second": 4.78, |
|
"step": 6423 |
|
}, |
|
{ |
|
"epoch": 572.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3400007486343384, |
|
"eval_runtime": 2.0746, |
|
"eval_samples_per_second": 76.641, |
|
"eval_steps_per_second": 4.82, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 572.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3512841463088989, |
|
"eval_runtime": 2.0975, |
|
"eval_samples_per_second": 75.803, |
|
"eval_steps_per_second": 4.767, |
|
"step": 6446 |
|
}, |
|
{ |
|
"epoch": 573.33, |
|
"grad_norm": 0.1855485886335373, |
|
"learning_rate": 8.905303030303031e-06, |
|
"loss": 0.0095, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 573.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3461546301841736, |
|
"eval_runtime": 2.067, |
|
"eval_samples_per_second": 76.921, |
|
"eval_steps_per_second": 4.838, |
|
"step": 6457 |
|
}, |
|
{ |
|
"epoch": 574.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33488187193870544, |
|
"eval_runtime": 2.0691, |
|
"eval_samples_per_second": 76.846, |
|
"eval_steps_per_second": 4.833, |
|
"step": 6468 |
|
}, |
|
{ |
|
"epoch": 576.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.337620347738266, |
|
"eval_runtime": 2.018, |
|
"eval_samples_per_second": 78.793, |
|
"eval_steps_per_second": 4.956, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 576.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.33732709288597107, |
|
"eval_runtime": 2.0922, |
|
"eval_samples_per_second": 75.996, |
|
"eval_steps_per_second": 4.78, |
|
"step": 6491 |
|
}, |
|
{ |
|
"epoch": 577.78, |
|
"grad_norm": 0.9204933643341064, |
|
"learning_rate": 8.71590909090909e-06, |
|
"loss": 0.0138, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 577.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3310604989528656, |
|
"eval_runtime": 2.1334, |
|
"eval_samples_per_second": 74.528, |
|
"eval_steps_per_second": 4.687, |
|
"step": 6502 |
|
}, |
|
{ |
|
"epoch": 578.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.33120694756507874, |
|
"eval_runtime": 2.1395, |
|
"eval_samples_per_second": 74.316, |
|
"eval_steps_per_second": 4.674, |
|
"step": 6513 |
|
}, |
|
{ |
|
"epoch": 580.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3291258215904236, |
|
"eval_runtime": 2.1193, |
|
"eval_samples_per_second": 75.024, |
|
"eval_steps_per_second": 4.719, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 580.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3441867232322693, |
|
"eval_runtime": 2.081, |
|
"eval_samples_per_second": 76.405, |
|
"eval_steps_per_second": 4.805, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 581.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3806348145008087, |
|
"eval_runtime": 2.08, |
|
"eval_samples_per_second": 76.443, |
|
"eval_steps_per_second": 4.808, |
|
"step": 6547 |
|
}, |
|
{ |
|
"epoch": 582.22, |
|
"grad_norm": 1.3162257671356201, |
|
"learning_rate": 8.526515151515151e-06, |
|
"loss": 0.0163, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 582.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.39340561628341675, |
|
"eval_runtime": 2.0419, |
|
"eval_samples_per_second": 77.868, |
|
"eval_steps_per_second": 4.897, |
|
"step": 6558 |
|
}, |
|
{ |
|
"epoch": 584.0, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3990216851234436, |
|
"eval_runtime": 2.049, |
|
"eval_samples_per_second": 77.599, |
|
"eval_steps_per_second": 4.88, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 584.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.353302925825119, |
|
"eval_runtime": 2.1595, |
|
"eval_samples_per_second": 73.629, |
|
"eval_steps_per_second": 4.631, |
|
"step": 6581 |
|
}, |
|
{ |
|
"epoch": 585.96, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.34103333950042725, |
|
"eval_runtime": 2.2099, |
|
"eval_samples_per_second": 71.948, |
|
"eval_steps_per_second": 4.525, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 586.67, |
|
"grad_norm": 0.35993504524230957, |
|
"learning_rate": 8.337121212121213e-06, |
|
"loss": 0.0152, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 586.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3351433575153351, |
|
"eval_runtime": 2.2699, |
|
"eval_samples_per_second": 70.046, |
|
"eval_steps_per_second": 4.405, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 588.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3369242250919342, |
|
"eval_runtime": 2.117, |
|
"eval_samples_per_second": 75.106, |
|
"eval_steps_per_second": 4.724, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 588.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35417425632476807, |
|
"eval_runtime": 2.144, |
|
"eval_samples_per_second": 74.161, |
|
"eval_steps_per_second": 4.664, |
|
"step": 6626 |
|
}, |
|
{ |
|
"epoch": 589.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3728938102722168, |
|
"eval_runtime": 2.0531, |
|
"eval_samples_per_second": 77.443, |
|
"eval_steps_per_second": 4.871, |
|
"step": 6637 |
|
}, |
|
{ |
|
"epoch": 590.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34069618582725525, |
|
"eval_runtime": 2.1327, |
|
"eval_samples_per_second": 74.555, |
|
"eval_steps_per_second": 4.689, |
|
"step": 6648 |
|
}, |
|
{ |
|
"epoch": 591.11, |
|
"grad_norm": 0.19336657226085663, |
|
"learning_rate": 8.147727272727274e-06, |
|
"loss": 0.017, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 592.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3440462052822113, |
|
"eval_runtime": 2.0686, |
|
"eval_samples_per_second": 76.865, |
|
"eval_steps_per_second": 4.834, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 592.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3493140935897827, |
|
"eval_runtime": 2.0648, |
|
"eval_samples_per_second": 77.004, |
|
"eval_steps_per_second": 4.843, |
|
"step": 6671 |
|
}, |
|
{ |
|
"epoch": 593.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.37120524048805237, |
|
"eval_runtime": 2.2033, |
|
"eval_samples_per_second": 72.165, |
|
"eval_steps_per_second": 4.539, |
|
"step": 6682 |
|
}, |
|
{ |
|
"epoch": 594.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.36460721492767334, |
|
"eval_runtime": 2.2563, |
|
"eval_samples_per_second": 70.47, |
|
"eval_steps_per_second": 4.432, |
|
"step": 6693 |
|
}, |
|
{ |
|
"epoch": 595.56, |
|
"grad_norm": 0.017406007274985313, |
|
"learning_rate": 7.958333333333333e-06, |
|
"loss": 0.0113, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 596.0, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.36630791425704956, |
|
"eval_runtime": 2.0788, |
|
"eval_samples_per_second": 76.486, |
|
"eval_steps_per_second": 4.81, |
|
"step": 6705 |
|
}, |
|
{ |
|
"epoch": 596.98, |
|
"eval_accuracy": 0.9245283018867925, |
|
"eval_loss": 0.3725621700286865, |
|
"eval_runtime": 2.226, |
|
"eval_samples_per_second": 71.429, |
|
"eval_steps_per_second": 4.492, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 597.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.35295018553733826, |
|
"eval_runtime": 2.16, |
|
"eval_samples_per_second": 73.611, |
|
"eval_steps_per_second": 4.63, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 598.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3451589047908783, |
|
"eval_runtime": 2.0598, |
|
"eval_samples_per_second": 77.193, |
|
"eval_steps_per_second": 4.855, |
|
"step": 6738 |
|
}, |
|
{ |
|
"epoch": 600.0, |
|
"grad_norm": 0.1029694527387619, |
|
"learning_rate": 7.768939393939394e-06, |
|
"loss": 0.0115, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 600.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3340095281600952, |
|
"eval_runtime": 2.1945, |
|
"eval_samples_per_second": 72.455, |
|
"eval_steps_per_second": 4.557, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 600.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34892547130584717, |
|
"eval_runtime": 2.1247, |
|
"eval_samples_per_second": 74.836, |
|
"eval_steps_per_second": 4.707, |
|
"step": 6761 |
|
}, |
|
{ |
|
"epoch": 601.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3408372402191162, |
|
"eval_runtime": 2.1827, |
|
"eval_samples_per_second": 72.846, |
|
"eval_steps_per_second": 4.582, |
|
"step": 6772 |
|
}, |
|
{ |
|
"epoch": 602.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3423627018928528, |
|
"eval_runtime": 2.2182, |
|
"eval_samples_per_second": 71.68, |
|
"eval_steps_per_second": 4.508, |
|
"step": 6783 |
|
}, |
|
{ |
|
"epoch": 604.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34804755449295044, |
|
"eval_runtime": 2.1754, |
|
"eval_samples_per_second": 73.091, |
|
"eval_steps_per_second": 4.597, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 604.44, |
|
"grad_norm": 0.7808576822280884, |
|
"learning_rate": 7.579545454545454e-06, |
|
"loss": 0.0132, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 604.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34386932849884033, |
|
"eval_runtime": 2.0311, |
|
"eval_samples_per_second": 78.283, |
|
"eval_steps_per_second": 4.923, |
|
"step": 6806 |
|
}, |
|
{ |
|
"epoch": 605.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3530921936035156, |
|
"eval_runtime": 2.102, |
|
"eval_samples_per_second": 75.641, |
|
"eval_steps_per_second": 4.757, |
|
"step": 6817 |
|
}, |
|
{ |
|
"epoch": 606.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3807942271232605, |
|
"eval_runtime": 2.1101, |
|
"eval_samples_per_second": 75.351, |
|
"eval_steps_per_second": 4.739, |
|
"step": 6828 |
|
}, |
|
{ |
|
"epoch": 608.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3441016674041748, |
|
"eval_runtime": 2.1163, |
|
"eval_samples_per_second": 75.133, |
|
"eval_steps_per_second": 4.725, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 608.89, |
|
"grad_norm": 0.31322968006134033, |
|
"learning_rate": 7.390151515151515e-06, |
|
"loss": 0.014, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 608.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3534349203109741, |
|
"eval_runtime": 2.0731, |
|
"eval_samples_per_second": 76.696, |
|
"eval_steps_per_second": 4.824, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 609.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3583095371723175, |
|
"eval_runtime": 2.1365, |
|
"eval_samples_per_second": 74.419, |
|
"eval_steps_per_second": 4.68, |
|
"step": 6862 |
|
}, |
|
{ |
|
"epoch": 610.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3640231490135193, |
|
"eval_runtime": 2.3226, |
|
"eval_samples_per_second": 68.457, |
|
"eval_steps_per_second": 4.305, |
|
"step": 6873 |
|
}, |
|
{ |
|
"epoch": 612.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3587685227394104, |
|
"eval_runtime": 2.0532, |
|
"eval_samples_per_second": 77.44, |
|
"eval_steps_per_second": 4.87, |
|
"step": 6885 |
|
}, |
|
{ |
|
"epoch": 612.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3662501275539398, |
|
"eval_runtime": 2.1672, |
|
"eval_samples_per_second": 73.368, |
|
"eval_steps_per_second": 4.614, |
|
"step": 6896 |
|
}, |
|
{ |
|
"epoch": 613.33, |
|
"grad_norm": 1.508801817893982, |
|
"learning_rate": 7.200757575757576e-06, |
|
"loss": 0.0089, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 613.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3788923919200897, |
|
"eval_runtime": 2.0361, |
|
"eval_samples_per_second": 78.092, |
|
"eval_steps_per_second": 4.911, |
|
"step": 6907 |
|
}, |
|
{ |
|
"epoch": 614.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.378842294216156, |
|
"eval_runtime": 2.0538, |
|
"eval_samples_per_second": 77.417, |
|
"eval_steps_per_second": 4.869, |
|
"step": 6918 |
|
}, |
|
{ |
|
"epoch": 616.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3528358042240143, |
|
"eval_runtime": 2.0973, |
|
"eval_samples_per_second": 75.811, |
|
"eval_steps_per_second": 4.768, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 616.98, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3626009523868561, |
|
"eval_runtime": 2.1285, |
|
"eval_samples_per_second": 74.701, |
|
"eval_steps_per_second": 4.698, |
|
"step": 6941 |
|
}, |
|
{ |
|
"epoch": 617.78, |
|
"grad_norm": 0.027906352654099464, |
|
"learning_rate": 7.0113636363636365e-06, |
|
"loss": 0.0135, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 617.96, |
|
"eval_accuracy": 0.9182389937106918, |
|
"eval_loss": 0.3760795593261719, |
|
"eval_runtime": 2.0573, |
|
"eval_samples_per_second": 77.285, |
|
"eval_steps_per_second": 4.861, |
|
"step": 6952 |
|
}, |
|
{ |
|
"epoch": 618.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3911431133747101, |
|
"eval_runtime": 2.3187, |
|
"eval_samples_per_second": 68.573, |
|
"eval_steps_per_second": 4.313, |
|
"step": 6963 |
|
}, |
|
{ |
|
"epoch": 620.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3900914192199707, |
|
"eval_runtime": 2.1186, |
|
"eval_samples_per_second": 75.049, |
|
"eval_steps_per_second": 4.72, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 620.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.4003194272518158, |
|
"eval_runtime": 2.1007, |
|
"eval_samples_per_second": 75.689, |
|
"eval_steps_per_second": 4.76, |
|
"step": 6986 |
|
}, |
|
{ |
|
"epoch": 621.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.36526620388031006, |
|
"eval_runtime": 2.1753, |
|
"eval_samples_per_second": 73.093, |
|
"eval_steps_per_second": 4.597, |
|
"step": 6997 |
|
}, |
|
{ |
|
"epoch": 622.22, |
|
"grad_norm": 0.05157339572906494, |
|
"learning_rate": 6.821969696969697e-06, |
|
"loss": 0.0071, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 622.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.33499374985694885, |
|
"eval_runtime": 2.11, |
|
"eval_samples_per_second": 75.356, |
|
"eval_steps_per_second": 4.739, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 624.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3353654444217682, |
|
"eval_runtime": 2.0902, |
|
"eval_samples_per_second": 76.069, |
|
"eval_steps_per_second": 4.784, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 624.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.37156394124031067, |
|
"eval_runtime": 2.0375, |
|
"eval_samples_per_second": 78.038, |
|
"eval_steps_per_second": 4.908, |
|
"step": 7031 |
|
}, |
|
{ |
|
"epoch": 625.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3520486354827881, |
|
"eval_runtime": 2.0907, |
|
"eval_samples_per_second": 76.051, |
|
"eval_steps_per_second": 4.783, |
|
"step": 7042 |
|
}, |
|
{ |
|
"epoch": 626.67, |
|
"grad_norm": 0.5914948582649231, |
|
"learning_rate": 6.632575757575758e-06, |
|
"loss": 0.0129, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 626.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3306971490383148, |
|
"eval_runtime": 2.0739, |
|
"eval_samples_per_second": 76.667, |
|
"eval_steps_per_second": 4.822, |
|
"step": 7053 |
|
}, |
|
{ |
|
"epoch": 628.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33053889870643616, |
|
"eval_runtime": 2.2479, |
|
"eval_samples_per_second": 70.731, |
|
"eval_steps_per_second": 4.449, |
|
"step": 7065 |
|
}, |
|
{ |
|
"epoch": 628.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3301643431186676, |
|
"eval_runtime": 1.9998, |
|
"eval_samples_per_second": 79.509, |
|
"eval_steps_per_second": 5.001, |
|
"step": 7076 |
|
}, |
|
{ |
|
"epoch": 629.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3290785253047943, |
|
"eval_runtime": 2.0356, |
|
"eval_samples_per_second": 78.11, |
|
"eval_steps_per_second": 4.913, |
|
"step": 7087 |
|
}, |
|
{ |
|
"epoch": 630.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3329908847808838, |
|
"eval_runtime": 2.0116, |
|
"eval_samples_per_second": 79.04, |
|
"eval_steps_per_second": 4.971, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 631.11, |
|
"grad_norm": 1.9344037771224976, |
|
"learning_rate": 6.4431818181818185e-06, |
|
"loss": 0.0091, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 632.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3331502079963684, |
|
"eval_runtime": 2.1322, |
|
"eval_samples_per_second": 74.572, |
|
"eval_steps_per_second": 4.69, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 632.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33215317130088806, |
|
"eval_runtime": 2.0089, |
|
"eval_samples_per_second": 79.146, |
|
"eval_steps_per_second": 4.978, |
|
"step": 7121 |
|
}, |
|
{ |
|
"epoch": 633.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3437711000442505, |
|
"eval_runtime": 2.1614, |
|
"eval_samples_per_second": 73.562, |
|
"eval_steps_per_second": 4.627, |
|
"step": 7132 |
|
}, |
|
{ |
|
"epoch": 634.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.36110153794288635, |
|
"eval_runtime": 2.1038, |
|
"eval_samples_per_second": 75.577, |
|
"eval_steps_per_second": 4.753, |
|
"step": 7143 |
|
}, |
|
{ |
|
"epoch": 635.56, |
|
"grad_norm": 0.008998346514999866, |
|
"learning_rate": 6.253787878787879e-06, |
|
"loss": 0.0107, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 636.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34894272685050964, |
|
"eval_runtime": 2.1178, |
|
"eval_samples_per_second": 75.077, |
|
"eval_steps_per_second": 4.722, |
|
"step": 7155 |
|
}, |
|
{ |
|
"epoch": 636.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3357524573802948, |
|
"eval_runtime": 2.1256, |
|
"eval_samples_per_second": 74.803, |
|
"eval_steps_per_second": 4.705, |
|
"step": 7166 |
|
}, |
|
{ |
|
"epoch": 637.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3372538983821869, |
|
"eval_runtime": 2.0938, |
|
"eval_samples_per_second": 75.939, |
|
"eval_steps_per_second": 4.776, |
|
"step": 7177 |
|
}, |
|
{ |
|
"epoch": 638.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3444075584411621, |
|
"eval_runtime": 2.1377, |
|
"eval_samples_per_second": 74.379, |
|
"eval_steps_per_second": 4.678, |
|
"step": 7188 |
|
}, |
|
{ |
|
"epoch": 640.0, |
|
"grad_norm": 0.753413736820221, |
|
"learning_rate": 6.06439393939394e-06, |
|
"loss": 0.0125, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 640.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.36328038573265076, |
|
"eval_runtime": 2.0555, |
|
"eval_samples_per_second": 77.354, |
|
"eval_steps_per_second": 4.865, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 640.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3562980592250824, |
|
"eval_runtime": 2.0343, |
|
"eval_samples_per_second": 78.159, |
|
"eval_steps_per_second": 4.916, |
|
"step": 7211 |
|
}, |
|
{ |
|
"epoch": 641.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.35727426409721375, |
|
"eval_runtime": 2.0513, |
|
"eval_samples_per_second": 77.513, |
|
"eval_steps_per_second": 4.875, |
|
"step": 7222 |
|
}, |
|
{ |
|
"epoch": 642.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3534907400608063, |
|
"eval_runtime": 2.109, |
|
"eval_samples_per_second": 75.393, |
|
"eval_steps_per_second": 4.742, |
|
"step": 7233 |
|
}, |
|
{ |
|
"epoch": 644.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34685295820236206, |
|
"eval_runtime": 2.1171, |
|
"eval_samples_per_second": 75.104, |
|
"eval_steps_per_second": 4.724, |
|
"step": 7245 |
|
}, |
|
{ |
|
"epoch": 644.44, |
|
"grad_norm": 0.040267378091812134, |
|
"learning_rate": 5.8750000000000005e-06, |
|
"loss": 0.0071, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 644.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34481677412986755, |
|
"eval_runtime": 2.2433, |
|
"eval_samples_per_second": 70.878, |
|
"eval_steps_per_second": 4.458, |
|
"step": 7256 |
|
}, |
|
{ |
|
"epoch": 645.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3445126414299011, |
|
"eval_runtime": 2.09, |
|
"eval_samples_per_second": 76.075, |
|
"eval_steps_per_second": 4.785, |
|
"step": 7267 |
|
}, |
|
{ |
|
"epoch": 646.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3418070077896118, |
|
"eval_runtime": 2.1179, |
|
"eval_samples_per_second": 75.074, |
|
"eval_steps_per_second": 4.722, |
|
"step": 7278 |
|
}, |
|
{ |
|
"epoch": 648.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3541422188282013, |
|
"eval_runtime": 2.0491, |
|
"eval_samples_per_second": 77.596, |
|
"eval_steps_per_second": 4.88, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 648.89, |
|
"grad_norm": 0.02006547898054123, |
|
"learning_rate": 5.685606060606061e-06, |
|
"loss": 0.0076, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 648.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34063196182250977, |
|
"eval_runtime": 2.1334, |
|
"eval_samples_per_second": 74.528, |
|
"eval_steps_per_second": 4.687, |
|
"step": 7301 |
|
}, |
|
{ |
|
"epoch": 649.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3326892852783203, |
|
"eval_runtime": 2.0215, |
|
"eval_samples_per_second": 78.656, |
|
"eval_steps_per_second": 4.947, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 650.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3381519019603729, |
|
"eval_runtime": 2.1234, |
|
"eval_samples_per_second": 74.878, |
|
"eval_steps_per_second": 4.709, |
|
"step": 7323 |
|
}, |
|
{ |
|
"epoch": 652.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3574288785457611, |
|
"eval_runtime": 2.2212, |
|
"eval_samples_per_second": 71.583, |
|
"eval_steps_per_second": 4.502, |
|
"step": 7335 |
|
}, |
|
{ |
|
"epoch": 652.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3462476134300232, |
|
"eval_runtime": 2.3846, |
|
"eval_samples_per_second": 66.678, |
|
"eval_steps_per_second": 4.194, |
|
"step": 7346 |
|
}, |
|
{ |
|
"epoch": 653.33, |
|
"grad_norm": 0.1632642298936844, |
|
"learning_rate": 5.5e-06, |
|
"loss": 0.0131, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 653.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33882516622543335, |
|
"eval_runtime": 2.0171, |
|
"eval_samples_per_second": 78.826, |
|
"eval_steps_per_second": 4.958, |
|
"step": 7357 |
|
}, |
|
{ |
|
"epoch": 654.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.337929904460907, |
|
"eval_runtime": 2.1283, |
|
"eval_samples_per_second": 74.708, |
|
"eval_steps_per_second": 4.699, |
|
"step": 7368 |
|
}, |
|
{ |
|
"epoch": 656.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3396049737930298, |
|
"eval_runtime": 2.0868, |
|
"eval_samples_per_second": 76.193, |
|
"eval_steps_per_second": 4.792, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 656.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3436720371246338, |
|
"eval_runtime": 2.0283, |
|
"eval_samples_per_second": 78.391, |
|
"eval_steps_per_second": 4.93, |
|
"step": 7391 |
|
}, |
|
{ |
|
"epoch": 657.78, |
|
"grad_norm": 1.5342937707901, |
|
"learning_rate": 5.3106060606060605e-06, |
|
"loss": 0.0086, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 657.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3466395139694214, |
|
"eval_runtime": 2.1077, |
|
"eval_samples_per_second": 75.438, |
|
"eval_steps_per_second": 4.745, |
|
"step": 7402 |
|
}, |
|
{ |
|
"epoch": 658.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3453463315963745, |
|
"eval_runtime": 2.0776, |
|
"eval_samples_per_second": 76.532, |
|
"eval_steps_per_second": 4.813, |
|
"step": 7413 |
|
}, |
|
{ |
|
"epoch": 660.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3420422077178955, |
|
"eval_runtime": 2.0546, |
|
"eval_samples_per_second": 77.386, |
|
"eval_steps_per_second": 4.867, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 660.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33706873655319214, |
|
"eval_runtime": 2.1267, |
|
"eval_samples_per_second": 74.764, |
|
"eval_steps_per_second": 4.702, |
|
"step": 7436 |
|
}, |
|
{ |
|
"epoch": 661.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34426021575927734, |
|
"eval_runtime": 2.0904, |
|
"eval_samples_per_second": 76.061, |
|
"eval_steps_per_second": 4.784, |
|
"step": 7447 |
|
}, |
|
{ |
|
"epoch": 662.22, |
|
"grad_norm": 0.16996954381465912, |
|
"learning_rate": 5.121212121212121e-06, |
|
"loss": 0.0123, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 662.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3473140299320221, |
|
"eval_runtime": 2.0509, |
|
"eval_samples_per_second": 77.526, |
|
"eval_steps_per_second": 4.876, |
|
"step": 7458 |
|
}, |
|
{ |
|
"epoch": 664.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3424939215183258, |
|
"eval_runtime": 2.0641, |
|
"eval_samples_per_second": 77.031, |
|
"eval_steps_per_second": 4.845, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 664.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.345442533493042, |
|
"eval_runtime": 2.0612, |
|
"eval_samples_per_second": 77.138, |
|
"eval_steps_per_second": 4.851, |
|
"step": 7481 |
|
}, |
|
{ |
|
"epoch": 665.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3493753969669342, |
|
"eval_runtime": 1.9848, |
|
"eval_samples_per_second": 80.108, |
|
"eval_steps_per_second": 5.038, |
|
"step": 7492 |
|
}, |
|
{ |
|
"epoch": 666.67, |
|
"grad_norm": 0.08370883017778397, |
|
"learning_rate": 4.931818181818182e-06, |
|
"loss": 0.0083, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 666.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35356974601745605, |
|
"eval_runtime": 2.1097, |
|
"eval_samples_per_second": 75.368, |
|
"eval_steps_per_second": 4.74, |
|
"step": 7503 |
|
}, |
|
{ |
|
"epoch": 668.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34760990738868713, |
|
"eval_runtime": 2.1147, |
|
"eval_samples_per_second": 75.188, |
|
"eval_steps_per_second": 4.729, |
|
"step": 7515 |
|
}, |
|
{ |
|
"epoch": 668.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34870967268943787, |
|
"eval_runtime": 2.0331, |
|
"eval_samples_per_second": 78.206, |
|
"eval_steps_per_second": 4.919, |
|
"step": 7526 |
|
}, |
|
{ |
|
"epoch": 669.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35328802466392517, |
|
"eval_runtime": 2.4514, |
|
"eval_samples_per_second": 64.861, |
|
"eval_steps_per_second": 4.079, |
|
"step": 7537 |
|
}, |
|
{ |
|
"epoch": 670.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35539668798446655, |
|
"eval_runtime": 2.1199, |
|
"eval_samples_per_second": 75.003, |
|
"eval_steps_per_second": 4.717, |
|
"step": 7548 |
|
}, |
|
{ |
|
"epoch": 671.11, |
|
"grad_norm": 2.100541353225708, |
|
"learning_rate": 4.7424242424242426e-06, |
|
"loss": 0.0079, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 672.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3482361435890198, |
|
"eval_runtime": 2.1456, |
|
"eval_samples_per_second": 74.104, |
|
"eval_steps_per_second": 4.661, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 672.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34814804792404175, |
|
"eval_runtime": 2.0856, |
|
"eval_samples_per_second": 76.239, |
|
"eval_steps_per_second": 4.795, |
|
"step": 7571 |
|
}, |
|
{ |
|
"epoch": 673.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.344621866941452, |
|
"eval_runtime": 2.2762, |
|
"eval_samples_per_second": 69.852, |
|
"eval_steps_per_second": 4.393, |
|
"step": 7582 |
|
}, |
|
{ |
|
"epoch": 674.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3432255983352661, |
|
"eval_runtime": 2.0507, |
|
"eval_samples_per_second": 77.533, |
|
"eval_steps_per_second": 4.876, |
|
"step": 7593 |
|
}, |
|
{ |
|
"epoch": 675.56, |
|
"grad_norm": 0.598809003829956, |
|
"learning_rate": 4.553030303030303e-06, |
|
"loss": 0.0111, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 676.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34696489572525024, |
|
"eval_runtime": 2.1457, |
|
"eval_samples_per_second": 74.102, |
|
"eval_steps_per_second": 4.66, |
|
"step": 7605 |
|
}, |
|
{ |
|
"epoch": 676.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.33925533294677734, |
|
"eval_runtime": 2.134, |
|
"eval_samples_per_second": 74.507, |
|
"eval_steps_per_second": 4.686, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 677.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3386417627334595, |
|
"eval_runtime": 2.0634, |
|
"eval_samples_per_second": 77.059, |
|
"eval_steps_per_second": 4.846, |
|
"step": 7627 |
|
}, |
|
{ |
|
"epoch": 678.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3310278058052063, |
|
"eval_runtime": 2.0308, |
|
"eval_samples_per_second": 78.293, |
|
"eval_steps_per_second": 4.924, |
|
"step": 7638 |
|
}, |
|
{ |
|
"epoch": 680.0, |
|
"grad_norm": 0.0734761655330658, |
|
"learning_rate": 4.363636363636364e-06, |
|
"loss": 0.0107, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 680.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.329887717962265, |
|
"eval_runtime": 2.214, |
|
"eval_samples_per_second": 71.816, |
|
"eval_steps_per_second": 4.517, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 680.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33161696791648865, |
|
"eval_runtime": 2.0168, |
|
"eval_samples_per_second": 78.839, |
|
"eval_steps_per_second": 4.958, |
|
"step": 7661 |
|
}, |
|
{ |
|
"epoch": 681.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33317527174949646, |
|
"eval_runtime": 2.1533, |
|
"eval_samples_per_second": 73.84, |
|
"eval_steps_per_second": 4.644, |
|
"step": 7672 |
|
}, |
|
{ |
|
"epoch": 682.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3443678021430969, |
|
"eval_runtime": 2.1824, |
|
"eval_samples_per_second": 72.855, |
|
"eval_steps_per_second": 4.582, |
|
"step": 7683 |
|
}, |
|
{ |
|
"epoch": 684.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3444632291793823, |
|
"eval_runtime": 1.9773, |
|
"eval_samples_per_second": 80.414, |
|
"eval_steps_per_second": 5.058, |
|
"step": 7695 |
|
}, |
|
{ |
|
"epoch": 684.44, |
|
"grad_norm": 1.5188406705856323, |
|
"learning_rate": 4.1742424242424246e-06, |
|
"loss": 0.0091, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 684.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3443754017353058, |
|
"eval_runtime": 2.0477, |
|
"eval_samples_per_second": 77.647, |
|
"eval_steps_per_second": 4.883, |
|
"step": 7706 |
|
}, |
|
{ |
|
"epoch": 685.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34085437655448914, |
|
"eval_runtime": 2.2252, |
|
"eval_samples_per_second": 71.453, |
|
"eval_steps_per_second": 4.494, |
|
"step": 7717 |
|
}, |
|
{ |
|
"epoch": 686.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34413453936576843, |
|
"eval_runtime": 2.1451, |
|
"eval_samples_per_second": 74.121, |
|
"eval_steps_per_second": 4.662, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 688.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.35173678398132324, |
|
"eval_runtime": 2.0413, |
|
"eval_samples_per_second": 77.89, |
|
"eval_steps_per_second": 4.899, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 688.89, |
|
"grad_norm": 1.0382517576217651, |
|
"learning_rate": 3.984848484848484e-06, |
|
"loss": 0.0081, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 688.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3521307110786438, |
|
"eval_runtime": 2.0937, |
|
"eval_samples_per_second": 75.942, |
|
"eval_steps_per_second": 4.776, |
|
"step": 7751 |
|
}, |
|
{ |
|
"epoch": 689.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.350664883852005, |
|
"eval_runtime": 2.1003, |
|
"eval_samples_per_second": 75.703, |
|
"eval_steps_per_second": 4.761, |
|
"step": 7762 |
|
}, |
|
{ |
|
"epoch": 690.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3460524082183838, |
|
"eval_runtime": 2.0791, |
|
"eval_samples_per_second": 76.475, |
|
"eval_steps_per_second": 4.81, |
|
"step": 7773 |
|
}, |
|
{ |
|
"epoch": 692.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.349832683801651, |
|
"eval_runtime": 2.0457, |
|
"eval_samples_per_second": 77.724, |
|
"eval_steps_per_second": 4.888, |
|
"step": 7785 |
|
}, |
|
{ |
|
"epoch": 692.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.35444310307502747, |
|
"eval_runtime": 2.1547, |
|
"eval_samples_per_second": 73.793, |
|
"eval_steps_per_second": 4.641, |
|
"step": 7796 |
|
}, |
|
{ |
|
"epoch": 693.33, |
|
"grad_norm": 0.36742502450942993, |
|
"learning_rate": 3.795454545454546e-06, |
|
"loss": 0.009, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 693.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.35569891333580017, |
|
"eval_runtime": 2.0236, |
|
"eval_samples_per_second": 78.575, |
|
"eval_steps_per_second": 4.942, |
|
"step": 7807 |
|
}, |
|
{ |
|
"epoch": 694.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.35327550768852234, |
|
"eval_runtime": 2.057, |
|
"eval_samples_per_second": 77.297, |
|
"eval_steps_per_second": 4.861, |
|
"step": 7818 |
|
}, |
|
{ |
|
"epoch": 696.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3559163212776184, |
|
"eval_runtime": 2.203, |
|
"eval_samples_per_second": 72.173, |
|
"eval_steps_per_second": 4.539, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 696.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35951152443885803, |
|
"eval_runtime": 2.0835, |
|
"eval_samples_per_second": 76.315, |
|
"eval_steps_per_second": 4.8, |
|
"step": 7841 |
|
}, |
|
{ |
|
"epoch": 697.78, |
|
"grad_norm": 0.10021142661571503, |
|
"learning_rate": 3.606060606060606e-06, |
|
"loss": 0.0078, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 697.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3617425560951233, |
|
"eval_runtime": 2.0937, |
|
"eval_samples_per_second": 75.941, |
|
"eval_steps_per_second": 4.776, |
|
"step": 7852 |
|
}, |
|
{ |
|
"epoch": 698.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3614467978477478, |
|
"eval_runtime": 2.2589, |
|
"eval_samples_per_second": 70.389, |
|
"eval_steps_per_second": 4.427, |
|
"step": 7863 |
|
}, |
|
{ |
|
"epoch": 700.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34519079327583313, |
|
"eval_runtime": 2.046, |
|
"eval_samples_per_second": 77.712, |
|
"eval_steps_per_second": 4.888, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 700.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34312644600868225, |
|
"eval_runtime": 2.143, |
|
"eval_samples_per_second": 74.196, |
|
"eval_steps_per_second": 4.666, |
|
"step": 7886 |
|
}, |
|
{ |
|
"epoch": 701.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34687530994415283, |
|
"eval_runtime": 2.1317, |
|
"eval_samples_per_second": 74.59, |
|
"eval_steps_per_second": 4.691, |
|
"step": 7897 |
|
}, |
|
{ |
|
"epoch": 702.22, |
|
"grad_norm": 0.013305970467627048, |
|
"learning_rate": 3.416666666666667e-06, |
|
"loss": 0.0102, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 702.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3564489483833313, |
|
"eval_runtime": 2.0468, |
|
"eval_samples_per_second": 77.682, |
|
"eval_steps_per_second": 4.886, |
|
"step": 7908 |
|
}, |
|
{ |
|
"epoch": 704.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35935157537460327, |
|
"eval_runtime": 2.0233, |
|
"eval_samples_per_second": 78.584, |
|
"eval_steps_per_second": 4.942, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 704.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3517804443836212, |
|
"eval_runtime": 2.2107, |
|
"eval_samples_per_second": 71.924, |
|
"eval_steps_per_second": 4.524, |
|
"step": 7931 |
|
}, |
|
{ |
|
"epoch": 705.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3444287180900574, |
|
"eval_runtime": 2.0396, |
|
"eval_samples_per_second": 77.958, |
|
"eval_steps_per_second": 4.903, |
|
"step": 7942 |
|
}, |
|
{ |
|
"epoch": 706.67, |
|
"grad_norm": 1.1949517726898193, |
|
"learning_rate": 3.2272727272727275e-06, |
|
"loss": 0.008, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 706.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34264177083969116, |
|
"eval_runtime": 2.0811, |
|
"eval_samples_per_second": 76.402, |
|
"eval_steps_per_second": 4.805, |
|
"step": 7953 |
|
}, |
|
{ |
|
"epoch": 708.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34593525528907776, |
|
"eval_runtime": 2.1049, |
|
"eval_samples_per_second": 75.537, |
|
"eval_steps_per_second": 4.751, |
|
"step": 7965 |
|
}, |
|
{ |
|
"epoch": 708.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3511156439781189, |
|
"eval_runtime": 2.0385, |
|
"eval_samples_per_second": 77.999, |
|
"eval_steps_per_second": 4.906, |
|
"step": 7976 |
|
}, |
|
{ |
|
"epoch": 709.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.35437288880348206, |
|
"eval_runtime": 2.0421, |
|
"eval_samples_per_second": 77.862, |
|
"eval_steps_per_second": 4.897, |
|
"step": 7987 |
|
}, |
|
{ |
|
"epoch": 710.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3566732704639435, |
|
"eval_runtime": 2.2624, |
|
"eval_samples_per_second": 70.28, |
|
"eval_steps_per_second": 4.42, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 711.11, |
|
"grad_norm": 0.8354963660240173, |
|
"learning_rate": 3.0378787878787878e-06, |
|
"loss": 0.0053, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 712.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3673837184906006, |
|
"eval_runtime": 2.0161, |
|
"eval_samples_per_second": 78.866, |
|
"eval_steps_per_second": 4.96, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 712.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3630300760269165, |
|
"eval_runtime": 2.0691, |
|
"eval_samples_per_second": 76.844, |
|
"eval_steps_per_second": 4.833, |
|
"step": 8021 |
|
}, |
|
{ |
|
"epoch": 713.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3602018654346466, |
|
"eval_runtime": 2.0814, |
|
"eval_samples_per_second": 76.389, |
|
"eval_steps_per_second": 4.804, |
|
"step": 8032 |
|
}, |
|
{ |
|
"epoch": 714.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35657405853271484, |
|
"eval_runtime": 2.0547, |
|
"eval_samples_per_second": 77.384, |
|
"eval_steps_per_second": 4.867, |
|
"step": 8043 |
|
}, |
|
{ |
|
"epoch": 715.56, |
|
"grad_norm": 0.17041368782520294, |
|
"learning_rate": 2.8484848484848484e-06, |
|
"loss": 0.0071, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 716.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3645796477794647, |
|
"eval_runtime": 2.0104, |
|
"eval_samples_per_second": 79.087, |
|
"eval_steps_per_second": 4.974, |
|
"step": 8055 |
|
}, |
|
{ |
|
"epoch": 716.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.364641398191452, |
|
"eval_runtime": 2.0723, |
|
"eval_samples_per_second": 76.725, |
|
"eval_steps_per_second": 4.825, |
|
"step": 8066 |
|
}, |
|
{ |
|
"epoch": 717.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3593458831310272, |
|
"eval_runtime": 2.017, |
|
"eval_samples_per_second": 78.83, |
|
"eval_steps_per_second": 4.958, |
|
"step": 8077 |
|
}, |
|
{ |
|
"epoch": 718.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3625403344631195, |
|
"eval_runtime": 2.1034, |
|
"eval_samples_per_second": 75.591, |
|
"eval_steps_per_second": 4.754, |
|
"step": 8088 |
|
}, |
|
{ |
|
"epoch": 720.0, |
|
"grad_norm": 0.7891609072685242, |
|
"learning_rate": 2.659090909090909e-06, |
|
"loss": 0.0071, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 720.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.36099299788475037, |
|
"eval_runtime": 2.0137, |
|
"eval_samples_per_second": 78.958, |
|
"eval_steps_per_second": 4.966, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 720.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.35885581374168396, |
|
"eval_runtime": 2.0236, |
|
"eval_samples_per_second": 78.572, |
|
"eval_steps_per_second": 4.942, |
|
"step": 8111 |
|
}, |
|
{ |
|
"epoch": 721.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3528722822666168, |
|
"eval_runtime": 2.0499, |
|
"eval_samples_per_second": 77.565, |
|
"eval_steps_per_second": 4.878, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 722.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34843915700912476, |
|
"eval_runtime": 2.0515, |
|
"eval_samples_per_second": 77.504, |
|
"eval_steps_per_second": 4.874, |
|
"step": 8133 |
|
}, |
|
{ |
|
"epoch": 724.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3468559682369232, |
|
"eval_runtime": 2.0267, |
|
"eval_samples_per_second": 78.452, |
|
"eval_steps_per_second": 4.934, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 724.44, |
|
"grad_norm": 0.013204416260123253, |
|
"learning_rate": 2.46969696969697e-06, |
|
"loss": 0.0098, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 724.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34806957840919495, |
|
"eval_runtime": 2.0094, |
|
"eval_samples_per_second": 79.126, |
|
"eval_steps_per_second": 4.976, |
|
"step": 8156 |
|
}, |
|
{ |
|
"epoch": 725.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34636813402175903, |
|
"eval_runtime": 2.1662, |
|
"eval_samples_per_second": 73.4, |
|
"eval_steps_per_second": 4.616, |
|
"step": 8167 |
|
}, |
|
{ |
|
"epoch": 726.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34824779629707336, |
|
"eval_runtime": 2.0311, |
|
"eval_samples_per_second": 78.282, |
|
"eval_steps_per_second": 4.923, |
|
"step": 8178 |
|
}, |
|
{ |
|
"epoch": 728.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34667864441871643, |
|
"eval_runtime": 2.2582, |
|
"eval_samples_per_second": 70.411, |
|
"eval_steps_per_second": 4.428, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 728.89, |
|
"grad_norm": 1.7239004373550415, |
|
"learning_rate": 2.2803030303030305e-06, |
|
"loss": 0.0159, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 728.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.346113383769989, |
|
"eval_runtime": 2.0824, |
|
"eval_samples_per_second": 76.353, |
|
"eval_steps_per_second": 4.802, |
|
"step": 8201 |
|
}, |
|
{ |
|
"epoch": 729.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3437664210796356, |
|
"eval_runtime": 2.0394, |
|
"eval_samples_per_second": 77.966, |
|
"eval_steps_per_second": 4.904, |
|
"step": 8212 |
|
}, |
|
{ |
|
"epoch": 730.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33936139941215515, |
|
"eval_runtime": 2.0701, |
|
"eval_samples_per_second": 76.809, |
|
"eval_steps_per_second": 4.831, |
|
"step": 8223 |
|
}, |
|
{ |
|
"epoch": 732.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3355594277381897, |
|
"eval_runtime": 2.1359, |
|
"eval_samples_per_second": 74.442, |
|
"eval_steps_per_second": 4.682, |
|
"step": 8235 |
|
}, |
|
{ |
|
"epoch": 732.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3355758488178253, |
|
"eval_runtime": 2.0241, |
|
"eval_samples_per_second": 78.553, |
|
"eval_steps_per_second": 4.94, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 733.33, |
|
"grad_norm": 1.1134917736053467, |
|
"learning_rate": 2.090909090909091e-06, |
|
"loss": 0.0128, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 733.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.337179034948349, |
|
"eval_runtime": 2.162, |
|
"eval_samples_per_second": 73.543, |
|
"eval_steps_per_second": 4.625, |
|
"step": 8257 |
|
}, |
|
{ |
|
"epoch": 734.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3391708731651306, |
|
"eval_runtime": 2.0183, |
|
"eval_samples_per_second": 78.778, |
|
"eval_steps_per_second": 4.955, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 736.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3454706072807312, |
|
"eval_runtime": 2.037, |
|
"eval_samples_per_second": 78.056, |
|
"eval_steps_per_second": 4.909, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 736.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34865179657936096, |
|
"eval_runtime": 2.1268, |
|
"eval_samples_per_second": 74.76, |
|
"eval_steps_per_second": 4.702, |
|
"step": 8291 |
|
}, |
|
{ |
|
"epoch": 737.78, |
|
"grad_norm": 0.008208476938307285, |
|
"learning_rate": 1.9015151515151518e-06, |
|
"loss": 0.0086, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 737.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3467850983142853, |
|
"eval_runtime": 2.1854, |
|
"eval_samples_per_second": 72.756, |
|
"eval_steps_per_second": 4.576, |
|
"step": 8302 |
|
}, |
|
{ |
|
"epoch": 738.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.344488263130188, |
|
"eval_runtime": 2.0623, |
|
"eval_samples_per_second": 77.099, |
|
"eval_steps_per_second": 4.849, |
|
"step": 8313 |
|
}, |
|
{ |
|
"epoch": 740.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.34248578548431396, |
|
"eval_runtime": 2.0582, |
|
"eval_samples_per_second": 77.254, |
|
"eval_steps_per_second": 4.859, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 740.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3452531397342682, |
|
"eval_runtime": 2.1556, |
|
"eval_samples_per_second": 73.762, |
|
"eval_steps_per_second": 4.639, |
|
"step": 8336 |
|
}, |
|
{ |
|
"epoch": 741.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34475868940353394, |
|
"eval_runtime": 2.0516, |
|
"eval_samples_per_second": 77.5, |
|
"eval_steps_per_second": 4.874, |
|
"step": 8347 |
|
}, |
|
{ |
|
"epoch": 742.22, |
|
"grad_norm": 0.2444353848695755, |
|
"learning_rate": 1.712121212121212e-06, |
|
"loss": 0.011, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 742.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34120240807533264, |
|
"eval_runtime": 2.0936, |
|
"eval_samples_per_second": 75.945, |
|
"eval_steps_per_second": 4.776, |
|
"step": 8358 |
|
}, |
|
{ |
|
"epoch": 744.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.33924660086631775, |
|
"eval_runtime": 2.2099, |
|
"eval_samples_per_second": 71.948, |
|
"eval_steps_per_second": 4.525, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 744.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3390309512615204, |
|
"eval_runtime": 1.9925, |
|
"eval_samples_per_second": 79.801, |
|
"eval_steps_per_second": 5.019, |
|
"step": 8381 |
|
}, |
|
{ |
|
"epoch": 745.96, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3395291268825531, |
|
"eval_runtime": 1.9807, |
|
"eval_samples_per_second": 80.274, |
|
"eval_steps_per_second": 5.049, |
|
"step": 8392 |
|
}, |
|
{ |
|
"epoch": 746.67, |
|
"grad_norm": 0.8103430867195129, |
|
"learning_rate": 1.5227272727272727e-06, |
|
"loss": 0.0074, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 746.93, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3383350074291229, |
|
"eval_runtime": 2.2625, |
|
"eval_samples_per_second": 70.276, |
|
"eval_steps_per_second": 4.42, |
|
"step": 8403 |
|
}, |
|
{ |
|
"epoch": 748.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33776676654815674, |
|
"eval_runtime": 2.0087, |
|
"eval_samples_per_second": 79.157, |
|
"eval_steps_per_second": 4.978, |
|
"step": 8415 |
|
}, |
|
{ |
|
"epoch": 748.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3348415195941925, |
|
"eval_runtime": 2.0796, |
|
"eval_samples_per_second": 76.457, |
|
"eval_steps_per_second": 4.809, |
|
"step": 8426 |
|
}, |
|
{ |
|
"epoch": 749.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33347979187965393, |
|
"eval_runtime": 2.1871, |
|
"eval_samples_per_second": 72.698, |
|
"eval_steps_per_second": 4.572, |
|
"step": 8437 |
|
}, |
|
{ |
|
"epoch": 750.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33422428369522095, |
|
"eval_runtime": 2.069, |
|
"eval_samples_per_second": 76.849, |
|
"eval_steps_per_second": 4.833, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 751.11, |
|
"grad_norm": 1.5617446899414062, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0087, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 752.0, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33466464281082153, |
|
"eval_runtime": 2.0175, |
|
"eval_samples_per_second": 78.81, |
|
"eval_steps_per_second": 4.957, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 752.98, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.33632901310920715, |
|
"eval_runtime": 2.2613, |
|
"eval_samples_per_second": 70.315, |
|
"eval_steps_per_second": 4.422, |
|
"step": 8471 |
|
}, |
|
{ |
|
"epoch": 753.96, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3377835154533386, |
|
"eval_runtime": 2.0093, |
|
"eval_samples_per_second": 79.131, |
|
"eval_steps_per_second": 4.977, |
|
"step": 8482 |
|
}, |
|
{ |
|
"epoch": 754.93, |
|
"eval_accuracy": 0.9433962264150944, |
|
"eval_loss": 0.3383637070655823, |
|
"eval_runtime": 2.0348, |
|
"eval_samples_per_second": 78.139, |
|
"eval_steps_per_second": 4.914, |
|
"step": 8493 |
|
}, |
|
{ |
|
"epoch": 755.56, |
|
"grad_norm": 1.2671109437942505, |
|
"learning_rate": 1.143939393939394e-06, |
|
"loss": 0.0061, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 756.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3406466245651245, |
|
"eval_runtime": 2.0595, |
|
"eval_samples_per_second": 77.203, |
|
"eval_steps_per_second": 4.856, |
|
"step": 8505 |
|
}, |
|
{ |
|
"epoch": 756.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.34400761127471924, |
|
"eval_runtime": 1.9798, |
|
"eval_samples_per_second": 80.313, |
|
"eval_steps_per_second": 5.051, |
|
"step": 8516 |
|
}, |
|
{ |
|
"epoch": 757.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34409239888191223, |
|
"eval_runtime": 2.0569, |
|
"eval_samples_per_second": 77.301, |
|
"eval_steps_per_second": 4.862, |
|
"step": 8527 |
|
}, |
|
{ |
|
"epoch": 758.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34241315722465515, |
|
"eval_runtime": 2.0733, |
|
"eval_samples_per_second": 76.691, |
|
"eval_steps_per_second": 4.823, |
|
"step": 8538 |
|
}, |
|
{ |
|
"epoch": 760.0, |
|
"grad_norm": 2.0512726306915283, |
|
"learning_rate": 9.545454545454546e-07, |
|
"loss": 0.0119, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 760.0, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3426421582698822, |
|
"eval_runtime": 2.0315, |
|
"eval_samples_per_second": 78.268, |
|
"eval_steps_per_second": 4.922, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 760.98, |
|
"eval_accuracy": 0.9371069182389937, |
|
"eval_loss": 0.3427829444408417, |
|
"eval_runtime": 2.1633, |
|
"eval_samples_per_second": 73.499, |
|
"eval_steps_per_second": 4.623, |
|
"step": 8561 |
|
}, |
|
{ |
|
"epoch": 761.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34399789571762085, |
|
"eval_runtime": 2.1363, |
|
"eval_samples_per_second": 74.428, |
|
"eval_steps_per_second": 4.681, |
|
"step": 8572 |
|
}, |
|
{ |
|
"epoch": 762.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3443286418914795, |
|
"eval_runtime": 2.0533, |
|
"eval_samples_per_second": 77.437, |
|
"eval_steps_per_second": 4.87, |
|
"step": 8583 |
|
}, |
|
{ |
|
"epoch": 764.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.345469206571579, |
|
"eval_runtime": 1.9651, |
|
"eval_samples_per_second": 80.911, |
|
"eval_steps_per_second": 5.089, |
|
"step": 8595 |
|
}, |
|
{ |
|
"epoch": 764.44, |
|
"grad_norm": 0.15614187717437744, |
|
"learning_rate": 7.651515151515152e-07, |
|
"loss": 0.0056, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 764.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34602606296539307, |
|
"eval_runtime": 2.0712, |
|
"eval_samples_per_second": 76.769, |
|
"eval_steps_per_second": 4.828, |
|
"step": 8606 |
|
}, |
|
{ |
|
"epoch": 765.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3463137745857239, |
|
"eval_runtime": 1.9634, |
|
"eval_samples_per_second": 80.983, |
|
"eval_steps_per_second": 5.093, |
|
"step": 8617 |
|
}, |
|
{ |
|
"epoch": 766.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34662124514579773, |
|
"eval_runtime": 2.0264, |
|
"eval_samples_per_second": 78.466, |
|
"eval_steps_per_second": 4.935, |
|
"step": 8628 |
|
}, |
|
{ |
|
"epoch": 768.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3465888202190399, |
|
"eval_runtime": 2.1276, |
|
"eval_samples_per_second": 74.732, |
|
"eval_steps_per_second": 4.7, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 768.89, |
|
"grad_norm": 0.13273529708385468, |
|
"learning_rate": 5.757575757575757e-07, |
|
"loss": 0.0094, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 768.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34740516543388367, |
|
"eval_runtime": 1.986, |
|
"eval_samples_per_second": 80.062, |
|
"eval_steps_per_second": 5.035, |
|
"step": 8651 |
|
}, |
|
{ |
|
"epoch": 769.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3476426601409912, |
|
"eval_runtime": 2.2993, |
|
"eval_samples_per_second": 69.152, |
|
"eval_steps_per_second": 4.349, |
|
"step": 8662 |
|
}, |
|
{ |
|
"epoch": 770.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34822559356689453, |
|
"eval_runtime": 2.054, |
|
"eval_samples_per_second": 77.411, |
|
"eval_steps_per_second": 4.869, |
|
"step": 8673 |
|
}, |
|
{ |
|
"epoch": 772.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.348609060049057, |
|
"eval_runtime": 2.0775, |
|
"eval_samples_per_second": 76.533, |
|
"eval_steps_per_second": 4.813, |
|
"step": 8685 |
|
}, |
|
{ |
|
"epoch": 772.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34849491715431213, |
|
"eval_runtime": 1.9848, |
|
"eval_samples_per_second": 80.11, |
|
"eval_steps_per_second": 5.038, |
|
"step": 8696 |
|
}, |
|
{ |
|
"epoch": 773.33, |
|
"grad_norm": 2.092862606048584, |
|
"learning_rate": 3.8636363636363636e-07, |
|
"loss": 0.014, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 773.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3478315770626068, |
|
"eval_runtime": 2.0318, |
|
"eval_samples_per_second": 78.257, |
|
"eval_steps_per_second": 4.922, |
|
"step": 8707 |
|
}, |
|
{ |
|
"epoch": 774.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.347221702337265, |
|
"eval_runtime": 2.0723, |
|
"eval_samples_per_second": 76.726, |
|
"eval_steps_per_second": 4.826, |
|
"step": 8718 |
|
}, |
|
{ |
|
"epoch": 776.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34651002287864685, |
|
"eval_runtime": 1.9895, |
|
"eval_samples_per_second": 79.92, |
|
"eval_steps_per_second": 5.026, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 776.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3461478352546692, |
|
"eval_runtime": 2.0438, |
|
"eval_samples_per_second": 77.796, |
|
"eval_steps_per_second": 4.893, |
|
"step": 8741 |
|
}, |
|
{ |
|
"epoch": 777.78, |
|
"grad_norm": 0.42866629362106323, |
|
"learning_rate": 1.9696969696969696e-07, |
|
"loss": 0.0126, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 777.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3467194736003876, |
|
"eval_runtime": 2.0767, |
|
"eval_samples_per_second": 76.564, |
|
"eval_steps_per_second": 4.815, |
|
"step": 8752 |
|
}, |
|
{ |
|
"epoch": 778.93, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3471050262451172, |
|
"eval_runtime": 2.0846, |
|
"eval_samples_per_second": 76.272, |
|
"eval_steps_per_second": 4.797, |
|
"step": 8763 |
|
}, |
|
{ |
|
"epoch": 780.0, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.34714454412460327, |
|
"eval_runtime": 2.1516, |
|
"eval_samples_per_second": 73.897, |
|
"eval_steps_per_second": 4.648, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 780.98, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3471665382385254, |
|
"eval_runtime": 2.0781, |
|
"eval_samples_per_second": 76.511, |
|
"eval_steps_per_second": 4.812, |
|
"step": 8786 |
|
}, |
|
{ |
|
"epoch": 781.96, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3471100628376007, |
|
"eval_runtime": 2.0029, |
|
"eval_samples_per_second": 79.386, |
|
"eval_steps_per_second": 4.993, |
|
"step": 8797 |
|
}, |
|
{ |
|
"epoch": 782.22, |
|
"grad_norm": 0.060126595199108124, |
|
"learning_rate": 7.575757575757576e-09, |
|
"loss": 0.0048, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 782.22, |
|
"eval_accuracy": 0.9308176100628931, |
|
"eval_loss": 0.3471885025501251, |
|
"eval_runtime": 2.0337, |
|
"eval_samples_per_second": 78.181, |
|
"eval_steps_per_second": 4.917, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 782.22, |
|
"step": 8800, |
|
"total_flos": 4.912188447589224e+18, |
|
"train_loss": 0.0709631282125007, |
|
"train_runtime": 5794.2307, |
|
"train_samples_per_second": 98.995, |
|
"train_steps_per_second": 1.519 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 8800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 800, |
|
"save_steps": 500, |
|
"total_flos": 4.912188447589224e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|