|
{ |
|
"best_metric": 0.08489208633093526, |
|
"best_model_checkpoint": "resnet-50-finetuned-pokemon-finetuned-pokemon/checkpoint-2486", |
|
"epoch": 99.34640522875817, |
|
"eval_steps": 500, |
|
"global_step": 3800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"loss": 1.1424, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 1.1313, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.9473684210526315e-06, |
|
"loss": 1.1894, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.013669064748201438, |
|
"eval_loss": 9.211542129516602, |
|
"eval_runtime": 5.8886, |
|
"eval_samples_per_second": 236.048, |
|
"eval_steps_per_second": 7.472, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 1.1934, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 1.1333, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 1.0844, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 1.1389, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.012949640287769784, |
|
"eval_loss": 9.252067565917969, |
|
"eval_runtime": 5.8088, |
|
"eval_samples_per_second": 239.291, |
|
"eval_steps_per_second": 7.575, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 1.1388, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 1.1188, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 1.1029, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.4473684210526317e-05, |
|
"loss": 1.0432, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.014388489208633094, |
|
"eval_loss": 9.476502418518066, |
|
"eval_runtime": 5.8873, |
|
"eval_samples_per_second": 236.102, |
|
"eval_steps_per_second": 7.474, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 1.124, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.7105263157894737e-05, |
|
"loss": 1.062, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 1.1051, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 1.0625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.013669064748201438, |
|
"eval_loss": 9.7667818069458, |
|
"eval_runtime": 5.875, |
|
"eval_samples_per_second": 236.595, |
|
"eval_steps_per_second": 7.489, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 1.1224, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 1.0557, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 1.0433, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0805, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.013669064748201438, |
|
"eval_loss": 10.252641677856445, |
|
"eval_runtime": 5.9184, |
|
"eval_samples_per_second": 234.862, |
|
"eval_steps_per_second": 7.434, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 1.0663, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.7631578947368426e-05, |
|
"loss": 1.0406, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 1.0353, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.012949640287769784, |
|
"eval_loss": 10.323753356933594, |
|
"eval_runtime": 5.9005, |
|
"eval_samples_per_second": 235.572, |
|
"eval_steps_per_second": 7.457, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3.0263157894736844e-05, |
|
"loss": 1.0401, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 1.0144, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 1.0139, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 0.9747, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.016546762589928057, |
|
"eval_loss": 10.577895164489746, |
|
"eval_runtime": 6.0278, |
|
"eval_samples_per_second": 230.598, |
|
"eval_steps_per_second": 7.299, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 1.0199, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.9931, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 3.815789473684211e-05, |
|
"loss": 0.9694, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.9708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.017985611510791366, |
|
"eval_loss": 10.745814323425293, |
|
"eval_runtime": 6.0966, |
|
"eval_samples_per_second": 227.997, |
|
"eval_steps_per_second": 7.217, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 4.078947368421053e-05, |
|
"loss": 0.9497, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.8812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 0.8991, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 0.8886, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.019424460431654675, |
|
"eval_loss": 11.007186889648438, |
|
"eval_runtime": 6.1583, |
|
"eval_samples_per_second": 225.713, |
|
"eval_steps_per_second": 7.145, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 0.8914, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.8727, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 0.8327, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8408, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.022302158273381296, |
|
"eval_loss": 11.317117691040039, |
|
"eval_runtime": 6.1694, |
|
"eval_samples_per_second": 225.305, |
|
"eval_steps_per_second": 7.132, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 4.985380116959065e-05, |
|
"loss": 0.8767, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.801, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 4.956140350877193e-05, |
|
"loss": 0.804, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 4.941520467836258e-05, |
|
"loss": 0.802, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.02446043165467626, |
|
"eval_loss": 11.554548263549805, |
|
"eval_runtime": 6.1954, |
|
"eval_samples_per_second": 224.36, |
|
"eval_steps_per_second": 7.102, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 4.926900584795322e-05, |
|
"loss": 0.8308, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.912280701754386e-05, |
|
"loss": 0.7814, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 4.8976608187134504e-05, |
|
"loss": 0.7903, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.02877697841726619, |
|
"eval_loss": 11.77219295501709, |
|
"eval_runtime": 6.2667, |
|
"eval_samples_per_second": 221.808, |
|
"eval_steps_per_second": 7.021, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 4.883040935672515e-05, |
|
"loss": 0.8129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 0.7376, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 4.853801169590643e-05, |
|
"loss": 0.7731, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 4.839181286549708e-05, |
|
"loss": 0.7553, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.03525179856115108, |
|
"eval_loss": 11.983402252197266, |
|
"eval_runtime": 6.562, |
|
"eval_samples_per_second": 211.826, |
|
"eval_steps_per_second": 6.705, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.744, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 4.8099415204678366e-05, |
|
"loss": 0.7221, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 4.7953216374269006e-05, |
|
"loss": 0.728, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 4.780701754385965e-05, |
|
"loss": 0.7413, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.04460431654676259, |
|
"eval_loss": 11.981474876403809, |
|
"eval_runtime": 6.1295, |
|
"eval_samples_per_second": 226.771, |
|
"eval_steps_per_second": 7.178, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 4.7660818713450294e-05, |
|
"loss": 0.6931, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 4.751461988304094e-05, |
|
"loss": 0.6729, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.6931, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.6272, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.04964028776978417, |
|
"eval_loss": 12.08712387084961, |
|
"eval_runtime": 6.1408, |
|
"eval_samples_per_second": 226.354, |
|
"eval_steps_per_second": 7.165, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 4.707602339181287e-05, |
|
"loss": 0.6859, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 4.6929824561403515e-05, |
|
"loss": 0.6595, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.6183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 4.6637426900584796e-05, |
|
"loss": 0.6944, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.058992805755395686, |
|
"eval_loss": 12.371334075927734, |
|
"eval_runtime": 6.0154, |
|
"eval_samples_per_second": 231.073, |
|
"eval_steps_per_second": 7.315, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 4.649122807017544e-05, |
|
"loss": 0.6187, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 4.634502923976608e-05, |
|
"loss": 0.6033, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 4.619883040935672e-05, |
|
"loss": 0.6347, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 0.6322, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.05539568345323741, |
|
"eval_loss": 12.682552337646484, |
|
"eval_runtime": 5.924, |
|
"eval_samples_per_second": 234.639, |
|
"eval_steps_per_second": 7.427, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 4.590643274853802e-05, |
|
"loss": 0.6833, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 4.576023391812866e-05, |
|
"loss": 0.6246, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 4.56140350877193e-05, |
|
"loss": 0.6131, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.06115107913669065, |
|
"eval_loss": 12.481949806213379, |
|
"eval_runtime": 5.8554, |
|
"eval_samples_per_second": 237.388, |
|
"eval_steps_per_second": 7.514, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 4.5467836257309945e-05, |
|
"loss": 0.5705, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.5652, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 4.517543859649123e-05, |
|
"loss": 0.5502, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 4.502923976608187e-05, |
|
"loss": 0.5916, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.06474820143884892, |
|
"eval_loss": 12.624631881713867, |
|
"eval_runtime": 5.8769, |
|
"eval_samples_per_second": 236.519, |
|
"eval_steps_per_second": 7.487, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 4.488304093567251e-05, |
|
"loss": 0.6098, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 0.5802, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 4.4590643274853806e-05, |
|
"loss": 0.4978, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.5094, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.06690647482014389, |
|
"eval_loss": 12.664143562316895, |
|
"eval_runtime": 5.9348, |
|
"eval_samples_per_second": 234.211, |
|
"eval_steps_per_second": 7.414, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 4.429824561403509e-05, |
|
"loss": 0.5806, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 4.4152046783625734e-05, |
|
"loss": 0.5324, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 4.400584795321638e-05, |
|
"loss": 0.5366, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.5201, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.06618705035971223, |
|
"eval_loss": 12.886053085327148, |
|
"eval_runtime": 5.9484, |
|
"eval_samples_per_second": 233.676, |
|
"eval_steps_per_second": 7.397, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"learning_rate": 4.371345029239766e-05, |
|
"loss": 0.5913, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 4.356725146198831e-05, |
|
"loss": 0.5353, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 0.542, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 4.327485380116959e-05, |
|
"loss": 0.4731, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.06546762589928058, |
|
"eval_loss": 12.743083000183105, |
|
"eval_runtime": 5.8758, |
|
"eval_samples_per_second": 236.564, |
|
"eval_steps_per_second": 7.488, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 4.3128654970760236e-05, |
|
"loss": 0.4812, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 4.298245614035088e-05, |
|
"loss": 0.5204, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 4.283625730994152e-05, |
|
"loss": 0.5132, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_accuracy": 0.07050359712230216, |
|
"eval_loss": 12.778566360473633, |
|
"eval_runtime": 5.856, |
|
"eval_samples_per_second": 237.364, |
|
"eval_steps_per_second": 7.514, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 4.269005847953216e-05, |
|
"loss": 0.5084, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 4.254385964912281e-05, |
|
"loss": 0.5183, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 0.5286, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 4.22514619883041e-05, |
|
"loss": 0.5036, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.07266187050359713, |
|
"eval_loss": 12.998967170715332, |
|
"eval_runtime": 5.8521, |
|
"eval_samples_per_second": 237.522, |
|
"eval_steps_per_second": 7.519, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.4826, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 4.195906432748538e-05, |
|
"loss": 0.5105, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 4.1812865497076025e-05, |
|
"loss": 0.5133, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4863, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.07266187050359713, |
|
"eval_loss": 13.041873931884766, |
|
"eval_runtime": 7.3809, |
|
"eval_samples_per_second": 188.323, |
|
"eval_steps_per_second": 5.961, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"learning_rate": 4.152046783625731e-05, |
|
"loss": 0.5028, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 4.137426900584795e-05, |
|
"loss": 0.5185, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 4.12280701754386e-05, |
|
"loss": 0.484, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 4.1081871345029247e-05, |
|
"loss": 0.4852, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.07338129496402877, |
|
"eval_loss": 13.057332038879395, |
|
"eval_runtime": 6.099, |
|
"eval_samples_per_second": 227.908, |
|
"eval_steps_per_second": 7.214, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.5227, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 4.078947368421053e-05, |
|
"loss": 0.4827, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 4.0643274853801174e-05, |
|
"loss": 0.5091, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"learning_rate": 4.0497076023391814e-05, |
|
"loss": 0.4983, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_accuracy": 0.07194244604316546, |
|
"eval_loss": 13.131017684936523, |
|
"eval_runtime": 6.2465, |
|
"eval_samples_per_second": 222.525, |
|
"eval_steps_per_second": 7.044, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 27.19, |
|
"learning_rate": 4.0350877192982455e-05, |
|
"loss": 0.4918, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 4.02046783625731e-05, |
|
"loss": 0.4687, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 4.005847953216375e-05, |
|
"loss": 0.4687, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 3.991228070175439e-05, |
|
"loss": 0.459, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.07482014388489208, |
|
"eval_loss": 13.068760871887207, |
|
"eval_runtime": 6.2511, |
|
"eval_samples_per_second": 222.36, |
|
"eval_steps_per_second": 7.039, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 3.976608187134503e-05, |
|
"loss": 0.4987, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 3.9619883040935676e-05, |
|
"loss": 0.4874, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.4556, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.07482014388489208, |
|
"eval_loss": 13.41281509399414, |
|
"eval_runtime": 6.1781, |
|
"eval_samples_per_second": 224.989, |
|
"eval_steps_per_second": 7.122, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 3.932748538011696e-05, |
|
"loss": 0.47, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 3.9181286549707604e-05, |
|
"loss": 0.4709, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"learning_rate": 3.9035087719298244e-05, |
|
"loss": 0.4499, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.4729, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.07410071942446043, |
|
"eval_loss": 13.353046417236328, |
|
"eval_runtime": 6.179, |
|
"eval_samples_per_second": 224.956, |
|
"eval_steps_per_second": 7.121, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 30.07, |
|
"learning_rate": 3.874269005847954e-05, |
|
"loss": 0.4329, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 30.33, |
|
"learning_rate": 3.859649122807018e-05, |
|
"loss": 0.4418, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 30.59, |
|
"learning_rate": 3.845029239766082e-05, |
|
"loss": 0.4428, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 30.85, |
|
"learning_rate": 3.8304093567251465e-05, |
|
"loss": 0.4659, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_accuracy": 0.07625899280575539, |
|
"eval_loss": 13.230795860290527, |
|
"eval_runtime": 6.1942, |
|
"eval_samples_per_second": 224.404, |
|
"eval_steps_per_second": 7.103, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 3.815789473684211e-05, |
|
"loss": 0.436, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.4484, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"learning_rate": 3.786549707602339e-05, |
|
"loss": 0.4099, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 31.9, |
|
"learning_rate": 3.771929824561404e-05, |
|
"loss": 0.4337, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.07482014388489208, |
|
"eval_loss": 13.32636833190918, |
|
"eval_runtime": 6.1367, |
|
"eval_samples_per_second": 226.505, |
|
"eval_steps_per_second": 7.17, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 32.16, |
|
"learning_rate": 3.757309941520468e-05, |
|
"loss": 0.4737, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 32.42, |
|
"learning_rate": 3.742690058479532e-05, |
|
"loss": 0.4133, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 32.68, |
|
"learning_rate": 3.728070175438597e-05, |
|
"loss": 0.4175, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"learning_rate": 3.713450292397661e-05, |
|
"loss": 0.456, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.07410071942446043, |
|
"eval_loss": 13.350556373596191, |
|
"eval_runtime": 6.1533, |
|
"eval_samples_per_second": 225.896, |
|
"eval_steps_per_second": 7.151, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 3.6988304093567254e-05, |
|
"loss": 0.4446, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 33.46, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.4216, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"learning_rate": 3.669590643274854e-05, |
|
"loss": 0.4576, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.4423, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_accuracy": 0.07841726618705036, |
|
"eval_loss": 13.360701560974121, |
|
"eval_runtime": 5.9537, |
|
"eval_samples_per_second": 233.467, |
|
"eval_steps_per_second": 7.39, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 34.25, |
|
"learning_rate": 3.640350877192983e-05, |
|
"loss": 0.4469, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 3.625730994152047e-05, |
|
"loss": 0.4226, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.4037, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 34.98, |
|
"eval_accuracy": 0.07338129496402877, |
|
"eval_loss": 13.252113342285156, |
|
"eval_runtime": 6.1434, |
|
"eval_samples_per_second": 226.26, |
|
"eval_steps_per_second": 7.162, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 3.5964912280701756e-05, |
|
"loss": 0.4277, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 3.5818713450292403e-05, |
|
"loss": 0.3596, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 3.5672514619883044e-05, |
|
"loss": 0.4047, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 35.82, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 0.3891, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.0776978417266187, |
|
"eval_loss": 13.370182991027832, |
|
"eval_runtime": 6.0896, |
|
"eval_samples_per_second": 228.258, |
|
"eval_steps_per_second": 7.225, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 36.08, |
|
"learning_rate": 3.538011695906433e-05, |
|
"loss": 0.4298, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 36.34, |
|
"learning_rate": 3.523391812865498e-05, |
|
"loss": 0.366, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.4485, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 3.494152046783626e-05, |
|
"loss": 0.3992, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.0776978417266187, |
|
"eval_loss": 13.476212501525879, |
|
"eval_runtime": 6.0276, |
|
"eval_samples_per_second": 230.606, |
|
"eval_steps_per_second": 7.3, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"learning_rate": 3.4795321637426905e-05, |
|
"loss": 0.3786, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 3.4649122807017546e-05, |
|
"loss": 0.3805, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 37.65, |
|
"learning_rate": 3.4502923976608186e-05, |
|
"loss": 0.398, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 37.91, |
|
"learning_rate": 3.435672514619883e-05, |
|
"loss": 0.4014, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.07913669064748201, |
|
"eval_loss": 13.538166999816895, |
|
"eval_runtime": 5.9007, |
|
"eval_samples_per_second": 235.564, |
|
"eval_steps_per_second": 7.457, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 38.17, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 0.4176, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 3.406432748538012e-05, |
|
"loss": 0.3663, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 38.69, |
|
"learning_rate": 3.391812865497076e-05, |
|
"loss": 0.3658, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 3.377192982456141e-05, |
|
"loss": 0.3549, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"eval_accuracy": 0.07913669064748201, |
|
"eval_loss": 13.555007934570312, |
|
"eval_runtime": 5.8979, |
|
"eval_samples_per_second": 235.677, |
|
"eval_steps_per_second": 7.46, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.4037, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 39.48, |
|
"learning_rate": 3.3479532163742695e-05, |
|
"loss": 0.3612, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 39.74, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3879, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.3187134502923975e-05, |
|
"loss": 0.4048, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.07985611510791367, |
|
"eval_loss": 13.640563011169434, |
|
"eval_runtime": 5.9909, |
|
"eval_samples_per_second": 232.02, |
|
"eval_steps_per_second": 7.345, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 40.26, |
|
"learning_rate": 3.304093567251462e-05, |
|
"loss": 0.3897, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 40.52, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 0.3554, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"learning_rate": 3.274853801169591e-05, |
|
"loss": 0.3711, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.0776978417266187, |
|
"eval_loss": 13.51199722290039, |
|
"eval_runtime": 5.8868, |
|
"eval_samples_per_second": 236.12, |
|
"eval_steps_per_second": 7.474, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 41.05, |
|
"learning_rate": 3.260233918128655e-05, |
|
"loss": 0.4245, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 41.31, |
|
"learning_rate": 3.24561403508772e-05, |
|
"loss": 0.4058, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"learning_rate": 3.230994152046784e-05, |
|
"loss": 0.3726, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 41.83, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.3834, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"eval_accuracy": 0.07985611510791367, |
|
"eval_loss": 13.923029899597168, |
|
"eval_runtime": 5.9072, |
|
"eval_samples_per_second": 235.305, |
|
"eval_steps_per_second": 7.448, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 42.09, |
|
"learning_rate": 3.2017543859649124e-05, |
|
"loss": 0.4307, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 42.35, |
|
"learning_rate": 3.187134502923977e-05, |
|
"loss": 0.3423, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 3.172514619883041e-05, |
|
"loss": 0.3652, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 42.88, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 0.3475, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 42.98, |
|
"eval_accuracy": 0.07913669064748201, |
|
"eval_loss": 13.860240936279297, |
|
"eval_runtime": 5.9793, |
|
"eval_samples_per_second": 232.468, |
|
"eval_steps_per_second": 7.359, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"learning_rate": 3.14327485380117e-05, |
|
"loss": 0.3636, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 3.128654970760234e-05, |
|
"loss": 0.3586, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 43.66, |
|
"learning_rate": 3.1140350877192986e-05, |
|
"loss": 0.3643, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 3.0994152046783626e-05, |
|
"loss": 0.3465, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 13.69305419921875, |
|
"eval_runtime": 5.9121, |
|
"eval_samples_per_second": 235.109, |
|
"eval_steps_per_second": 7.442, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 44.18, |
|
"learning_rate": 3.084795321637427e-05, |
|
"loss": 0.412, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.3521, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 44.71, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.3847, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 44.97, |
|
"learning_rate": 3.0409356725146197e-05, |
|
"loss": 0.3682, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.07841726618705036, |
|
"eval_loss": 13.777403831481934, |
|
"eval_runtime": 6.1334, |
|
"eval_samples_per_second": 226.629, |
|
"eval_steps_per_second": 7.174, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"learning_rate": 3.0263157894736844e-05, |
|
"loss": 0.4067, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 45.49, |
|
"learning_rate": 3.0116959064327488e-05, |
|
"loss": 0.3724, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 45.75, |
|
"learning_rate": 2.997076023391813e-05, |
|
"loss": 0.3613, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"eval_accuracy": 0.07913669064748201, |
|
"eval_loss": 14.02348518371582, |
|
"eval_runtime": 6.1306, |
|
"eval_samples_per_second": 226.73, |
|
"eval_steps_per_second": 7.177, |
|
"step": 1759 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 2.9824561403508772e-05, |
|
"loss": 0.3766, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 46.27, |
|
"learning_rate": 2.9678362573099415e-05, |
|
"loss": 0.4555, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 46.54, |
|
"learning_rate": 2.9532163742690062e-05, |
|
"loss": 0.3324, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 2.9385964912280706e-05, |
|
"loss": 0.368, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 46.98, |
|
"eval_accuracy": 0.08129496402877698, |
|
"eval_loss": 13.92888355255127, |
|
"eval_runtime": 6.1887, |
|
"eval_samples_per_second": 224.602, |
|
"eval_steps_per_second": 7.11, |
|
"step": 1797 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.2888, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 47.32, |
|
"learning_rate": 2.909356725146199e-05, |
|
"loss": 0.3538, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 47.58, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.3298, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"learning_rate": 2.8801169590643277e-05, |
|
"loss": 0.3961, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.254853248596191, |
|
"eval_runtime": 6.2277, |
|
"eval_samples_per_second": 223.197, |
|
"eval_steps_per_second": 7.065, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 48.1, |
|
"learning_rate": 2.8654970760233917e-05, |
|
"loss": 0.3433, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 48.37, |
|
"learning_rate": 2.850877192982456e-05, |
|
"loss": 0.366, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 2.8362573099415208e-05, |
|
"loss": 0.3278, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"learning_rate": 2.821637426900585e-05, |
|
"loss": 0.365, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.08129496402877698, |
|
"eval_loss": 14.111425399780273, |
|
"eval_runtime": 6.2596, |
|
"eval_samples_per_second": 222.059, |
|
"eval_steps_per_second": 7.029, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 2.8070175438596492e-05, |
|
"loss": 0.3721, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 49.41, |
|
"learning_rate": 2.7923976608187135e-05, |
|
"loss": 0.3683, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3506, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 49.93, |
|
"learning_rate": 2.7631578947368426e-05, |
|
"loss": 0.3259, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 13.97095012664795, |
|
"eval_runtime": 6.1784, |
|
"eval_samples_per_second": 224.977, |
|
"eval_steps_per_second": 7.122, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 50.2, |
|
"learning_rate": 2.7485380116959063e-05, |
|
"loss": 0.3456, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 50.46, |
|
"learning_rate": 2.733918128654971e-05, |
|
"loss": 0.3499, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"learning_rate": 2.7192982456140354e-05, |
|
"loss": 0.3255, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"learning_rate": 2.7046783625730997e-05, |
|
"loss": 0.2998, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.028827667236328, |
|
"eval_runtime": 6.2989, |
|
"eval_samples_per_second": 220.674, |
|
"eval_steps_per_second": 6.985, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 51.24, |
|
"learning_rate": 2.6900584795321637e-05, |
|
"loss": 0.3892, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 51.5, |
|
"learning_rate": 2.675438596491228e-05, |
|
"loss": 0.336, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"learning_rate": 2.6608187134502928e-05, |
|
"loss": 0.3203, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.08129496402877698, |
|
"eval_loss": 13.939803123474121, |
|
"eval_runtime": 6.0691, |
|
"eval_samples_per_second": 229.028, |
|
"eval_steps_per_second": 7.25, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 52.03, |
|
"learning_rate": 2.6461988304093572e-05, |
|
"loss": 0.3512, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 52.29, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.3178, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 52.55, |
|
"learning_rate": 2.6169590643274856e-05, |
|
"loss": 0.3224, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 52.81, |
|
"learning_rate": 2.60233918128655e-05, |
|
"loss": 0.3104, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.025542259216309, |
|
"eval_runtime": 5.9734, |
|
"eval_samples_per_second": 232.696, |
|
"eval_steps_per_second": 7.366, |
|
"step": 2027 |
|
}, |
|
{ |
|
"epoch": 53.07, |
|
"learning_rate": 2.5877192982456143e-05, |
|
"loss": 0.3159, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 2.5730994152046783e-05, |
|
"loss": 0.3305, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 53.59, |
|
"learning_rate": 2.5584795321637427e-05, |
|
"loss": 0.3085, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 53.86, |
|
"learning_rate": 2.5438596491228074e-05, |
|
"loss": 0.3232, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 53.99, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 13.93545150756836, |
|
"eval_runtime": 6.1249, |
|
"eval_samples_per_second": 226.943, |
|
"eval_steps_per_second": 7.184, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 54.12, |
|
"learning_rate": 2.5292397660818717e-05, |
|
"loss": 0.3035, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 54.38, |
|
"learning_rate": 2.5146198830409358e-05, |
|
"loss": 0.3235, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3359, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 54.9, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.3521, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 54.98, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 13.862702369689941, |
|
"eval_runtime": 5.97, |
|
"eval_samples_per_second": 232.832, |
|
"eval_steps_per_second": 7.37, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 55.16, |
|
"learning_rate": 2.470760233918129e-05, |
|
"loss": 0.3049, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 55.42, |
|
"learning_rate": 2.456140350877193e-05, |
|
"loss": 0.3011, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 55.69, |
|
"learning_rate": 2.4415204678362576e-05, |
|
"loss": 0.2945, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 55.95, |
|
"learning_rate": 2.4269005847953216e-05, |
|
"loss": 0.3322, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.01791000366211, |
|
"eval_runtime": 5.9414, |
|
"eval_samples_per_second": 233.951, |
|
"eval_steps_per_second": 7.406, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"learning_rate": 2.412280701754386e-05, |
|
"loss": 0.2962, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 56.47, |
|
"learning_rate": 2.3976608187134503e-05, |
|
"loss": 0.3282, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 56.73, |
|
"learning_rate": 2.3830409356725147e-05, |
|
"loss": 0.2965, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.3129, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 13.96402359008789, |
|
"eval_runtime": 5.8814, |
|
"eval_samples_per_second": 236.34, |
|
"eval_steps_per_second": 7.481, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 57.25, |
|
"learning_rate": 2.3538011695906434e-05, |
|
"loss": 0.3487, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 57.52, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.3089, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"learning_rate": 2.324561403508772e-05, |
|
"loss": 0.3159, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 57.99, |
|
"eval_accuracy": 0.07985611510791367, |
|
"eval_loss": 14.199702262878418, |
|
"eval_runtime": 6.0594, |
|
"eval_samples_per_second": 229.395, |
|
"eval_steps_per_second": 7.261, |
|
"step": 2218 |
|
}, |
|
{ |
|
"epoch": 58.04, |
|
"learning_rate": 2.309941520467836e-05, |
|
"loss": 0.34, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 58.3, |
|
"learning_rate": 2.295321637426901e-05, |
|
"loss": 0.3108, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 58.56, |
|
"learning_rate": 2.280701754385965e-05, |
|
"loss": 0.3316, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"learning_rate": 2.2660818713450292e-05, |
|
"loss": 0.3118, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 58.98, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.163899421691895, |
|
"eval_runtime": 5.9877, |
|
"eval_samples_per_second": 232.144, |
|
"eval_steps_per_second": 7.348, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 59.08, |
|
"learning_rate": 2.2514619883040936e-05, |
|
"loss": 0.321, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 0.2972, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 59.61, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.2783, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 59.87, |
|
"learning_rate": 2.2076023391812867e-05, |
|
"loss": 0.3196, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.033405303955078, |
|
"eval_runtime": 6.077, |
|
"eval_samples_per_second": 228.732, |
|
"eval_steps_per_second": 7.24, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 60.13, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.3396, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 60.39, |
|
"learning_rate": 2.1783625730994154e-05, |
|
"loss": 0.3065, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 60.65, |
|
"learning_rate": 2.1637426900584794e-05, |
|
"loss": 0.3169, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"learning_rate": 2.149122807017544e-05, |
|
"loss": 0.301, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 13.995382308959961, |
|
"eval_runtime": 6.1902, |
|
"eval_samples_per_second": 224.549, |
|
"eval_steps_per_second": 7.108, |
|
"step": 2333 |
|
}, |
|
{ |
|
"epoch": 61.18, |
|
"learning_rate": 2.134502923976608e-05, |
|
"loss": 0.3011, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 61.44, |
|
"learning_rate": 2.1198830409356725e-05, |
|
"loss": 0.2741, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 61.7, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.2713, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"learning_rate": 2.0906432748538013e-05, |
|
"loss": 0.3142, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 61.99, |
|
"eval_accuracy": 0.07985611510791367, |
|
"eval_loss": 14.143210411071777, |
|
"eval_runtime": 6.2246, |
|
"eval_samples_per_second": 223.307, |
|
"eval_steps_per_second": 7.069, |
|
"step": 2371 |
|
}, |
|
{ |
|
"epoch": 62.22, |
|
"learning_rate": 2.0760233918128656e-05, |
|
"loss": 0.2731, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 62.48, |
|
"learning_rate": 2.06140350877193e-05, |
|
"loss": 0.2847, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 62.75, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.3192, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"eval_accuracy": 0.07841726618705036, |
|
"eval_loss": 14.026898384094238, |
|
"eval_runtime": 6.2695, |
|
"eval_samples_per_second": 221.708, |
|
"eval_steps_per_second": 7.018, |
|
"step": 2409 |
|
}, |
|
{ |
|
"epoch": 63.01, |
|
"learning_rate": 2.0321637426900587e-05, |
|
"loss": 0.3586, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 63.27, |
|
"learning_rate": 2.0175438596491227e-05, |
|
"loss": 0.3255, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 63.53, |
|
"learning_rate": 2.0029239766081874e-05, |
|
"loss": 0.2821, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 63.79, |
|
"learning_rate": 1.9883040935672515e-05, |
|
"loss": 0.3342, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.044992446899414, |
|
"eval_runtime": 6.2504, |
|
"eval_samples_per_second": 222.386, |
|
"eval_steps_per_second": 7.04, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 64.05, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 0.3163, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 64.31, |
|
"learning_rate": 1.9590643274853802e-05, |
|
"loss": 0.3105, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 64.58, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.32, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 64.84, |
|
"learning_rate": 1.929824561403509e-05, |
|
"loss": 0.3045, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_accuracy": 0.08489208633093526, |
|
"eval_loss": 14.174620628356934, |
|
"eval_runtime": 6.2508, |
|
"eval_samples_per_second": 222.372, |
|
"eval_steps_per_second": 7.039, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 65.1, |
|
"learning_rate": 1.9152046783625733e-05, |
|
"loss": 0.3044, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 65.36, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.3328, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"learning_rate": 1.885964912280702e-05, |
|
"loss": 0.2961, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 65.88, |
|
"learning_rate": 1.871345029239766e-05, |
|
"loss": 0.2991, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 65.99, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.319219589233398, |
|
"eval_runtime": 6.2401, |
|
"eval_samples_per_second": 222.753, |
|
"eval_steps_per_second": 7.051, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 66.14, |
|
"learning_rate": 1.8567251461988304e-05, |
|
"loss": 0.3191, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 66.41, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.3111, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.827485380116959e-05, |
|
"loss": 0.3201, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 66.93, |
|
"learning_rate": 1.8128654970760235e-05, |
|
"loss": 0.3228, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_accuracy": 0.07841726618705036, |
|
"eval_loss": 14.178192138671875, |
|
"eval_runtime": 6.2603, |
|
"eval_samples_per_second": 222.035, |
|
"eval_steps_per_second": 7.028, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 67.19, |
|
"learning_rate": 1.7982456140350878e-05, |
|
"loss": 0.2959, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 67.45, |
|
"learning_rate": 1.7836257309941522e-05, |
|
"loss": 0.2927, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 67.71, |
|
"learning_rate": 1.7690058479532165e-05, |
|
"loss": 0.2991, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 67.97, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.2711, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.08489208633093526, |
|
"eval_loss": 14.426069259643555, |
|
"eval_runtime": 6.1828, |
|
"eval_samples_per_second": 224.816, |
|
"eval_steps_per_second": 7.116, |
|
"step": 2601 |
|
}, |
|
{ |
|
"epoch": 68.24, |
|
"learning_rate": 1.7397660818713453e-05, |
|
"loss": 0.2786, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 68.5, |
|
"learning_rate": 1.7251461988304093e-05, |
|
"loss": 0.2746, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 68.76, |
|
"learning_rate": 1.7105263157894737e-05, |
|
"loss": 0.2473, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 14.23031997680664, |
|
"eval_runtime": 6.0443, |
|
"eval_samples_per_second": 229.97, |
|
"eval_steps_per_second": 7.28, |
|
"step": 2639 |
|
}, |
|
{ |
|
"epoch": 69.02, |
|
"learning_rate": 1.695906432748538e-05, |
|
"loss": 0.3484, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 69.28, |
|
"learning_rate": 1.6812865497076024e-05, |
|
"loss": 0.2993, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 69.54, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3016, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 69.8, |
|
"learning_rate": 1.652046783625731e-05, |
|
"loss": 0.3287, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 14.274971961975098, |
|
"eval_runtime": 5.9713, |
|
"eval_samples_per_second": 232.779, |
|
"eval_steps_per_second": 7.369, |
|
"step": 2677 |
|
}, |
|
{ |
|
"epoch": 70.07, |
|
"learning_rate": 1.6374269005847955e-05, |
|
"loss": 0.2662, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 70.33, |
|
"learning_rate": 1.62280701754386e-05, |
|
"loss": 0.3035, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 70.59, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.2923, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 70.85, |
|
"learning_rate": 1.5935672514619886e-05, |
|
"loss": 0.2673, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 70.98, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.23031234741211, |
|
"eval_runtime": 5.945, |
|
"eval_samples_per_second": 233.809, |
|
"eval_steps_per_second": 7.401, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.2952, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 71.37, |
|
"learning_rate": 1.564327485380117e-05, |
|
"loss": 0.3177, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 71.63, |
|
"learning_rate": 1.5497076023391813e-05, |
|
"loss": 0.2803, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 71.9, |
|
"learning_rate": 1.5350877192982457e-05, |
|
"loss": 0.2843, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.408571243286133, |
|
"eval_runtime": 5.9636, |
|
"eval_samples_per_second": 233.082, |
|
"eval_steps_per_second": 7.378, |
|
"step": 2754 |
|
}, |
|
{ |
|
"epoch": 72.16, |
|
"learning_rate": 1.5204678362573099e-05, |
|
"loss": 0.3007, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 72.42, |
|
"learning_rate": 1.5058479532163744e-05, |
|
"loss": 0.3098, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 72.68, |
|
"learning_rate": 1.4912280701754386e-05, |
|
"loss": 0.272, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 72.94, |
|
"learning_rate": 1.4766081871345031e-05, |
|
"loss": 0.3099, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 14.518421173095703, |
|
"eval_runtime": 5.9749, |
|
"eval_samples_per_second": 232.641, |
|
"eval_steps_per_second": 7.364, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.2839, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 73.46, |
|
"learning_rate": 1.4473684210526317e-05, |
|
"loss": 0.3161, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 73.73, |
|
"learning_rate": 1.4327485380116959e-05, |
|
"loss": 0.3241, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"learning_rate": 1.4181286549707604e-05, |
|
"loss": 0.3102, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.276801109313965, |
|
"eval_runtime": 5.8868, |
|
"eval_samples_per_second": 236.121, |
|
"eval_steps_per_second": 7.474, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 74.25, |
|
"learning_rate": 1.4035087719298246e-05, |
|
"loss": 0.3081, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 74.51, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2935, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"learning_rate": 1.3742690058479531e-05, |
|
"loss": 0.2911, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.100983619689941, |
|
"eval_runtime": 6.0333, |
|
"eval_samples_per_second": 230.387, |
|
"eval_steps_per_second": 7.293, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 75.03, |
|
"learning_rate": 1.3596491228070177e-05, |
|
"loss": 0.3481, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 75.29, |
|
"learning_rate": 1.3450292397660819e-05, |
|
"loss": 0.298, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"learning_rate": 1.3304093567251464e-05, |
|
"loss": 0.282, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 75.82, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.2927, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.08129496402877698, |
|
"eval_loss": 14.461828231811523, |
|
"eval_runtime": 6.3066, |
|
"eval_samples_per_second": 220.403, |
|
"eval_steps_per_second": 6.977, |
|
"step": 2907 |
|
}, |
|
{ |
|
"epoch": 76.08, |
|
"learning_rate": 1.301169590643275e-05, |
|
"loss": 0.2929, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 76.34, |
|
"learning_rate": 1.2865497076023392e-05, |
|
"loss": 0.3012, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 76.6, |
|
"learning_rate": 1.2719298245614037e-05, |
|
"loss": 0.2694, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 76.86, |
|
"learning_rate": 1.2573099415204679e-05, |
|
"loss": 0.2967, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.358075141906738, |
|
"eval_runtime": 6.119, |
|
"eval_samples_per_second": 227.162, |
|
"eval_steps_per_second": 7.191, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 77.12, |
|
"learning_rate": 1.242690058479532e-05, |
|
"loss": 0.2748, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 77.39, |
|
"learning_rate": 1.2280701754385964e-05, |
|
"loss": 0.281, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 77.65, |
|
"learning_rate": 1.2134502923976608e-05, |
|
"loss": 0.2897, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 77.91, |
|
"learning_rate": 1.1988304093567252e-05, |
|
"loss": 0.2446, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.456214904785156, |
|
"eval_runtime": 6.1336, |
|
"eval_samples_per_second": 226.62, |
|
"eval_steps_per_second": 7.174, |
|
"step": 2983 |
|
}, |
|
{ |
|
"epoch": 78.17, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 0.3009, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 78.43, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.2901, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 78.69, |
|
"learning_rate": 1.154970760233918e-05, |
|
"loss": 0.2656, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 1.1403508771929824e-05, |
|
"loss": 0.3035, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 78.98, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.268139839172363, |
|
"eval_runtime": 6.2555, |
|
"eval_samples_per_second": 222.203, |
|
"eval_steps_per_second": 7.034, |
|
"step": 3021 |
|
}, |
|
{ |
|
"epoch": 79.22, |
|
"learning_rate": 1.1257309941520468e-05, |
|
"loss": 0.2897, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 79.48, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.3152, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 79.74, |
|
"learning_rate": 1.0964912280701754e-05, |
|
"loss": 0.3034, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.0818713450292397e-05, |
|
"loss": 0.2989, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 14.276779174804688, |
|
"eval_runtime": 6.208, |
|
"eval_samples_per_second": 223.906, |
|
"eval_steps_per_second": 7.088, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 80.26, |
|
"learning_rate": 1.067251461988304e-05, |
|
"loss": 0.3268, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 80.52, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.2827, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 80.78, |
|
"learning_rate": 1.0380116959064328e-05, |
|
"loss": 0.2486, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.424173355102539, |
|
"eval_runtime": 6.2731, |
|
"eval_samples_per_second": 221.581, |
|
"eval_steps_per_second": 7.014, |
|
"step": 3098 |
|
}, |
|
{ |
|
"epoch": 81.05, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.2711, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 81.31, |
|
"learning_rate": 1.0087719298245614e-05, |
|
"loss": 0.2918, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 81.57, |
|
"learning_rate": 9.941520467836257e-06, |
|
"loss": 0.3038, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 81.83, |
|
"learning_rate": 9.795321637426901e-06, |
|
"loss": 0.2622, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 81.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.381049156188965, |
|
"eval_runtime": 6.1831, |
|
"eval_samples_per_second": 224.805, |
|
"eval_steps_per_second": 7.116, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 82.09, |
|
"learning_rate": 9.649122807017545e-06, |
|
"loss": 0.295, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 82.35, |
|
"learning_rate": 9.502923976608186e-06, |
|
"loss": 0.2911, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 82.61, |
|
"learning_rate": 9.35672514619883e-06, |
|
"loss": 0.2637, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 82.88, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.2892, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 82.98, |
|
"eval_accuracy": 0.08273381294964029, |
|
"eval_loss": 14.46367073059082, |
|
"eval_runtime": 5.9176, |
|
"eval_samples_per_second": 234.894, |
|
"eval_steps_per_second": 7.435, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 83.14, |
|
"learning_rate": 9.064327485380117e-06, |
|
"loss": 0.3097, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 83.4, |
|
"learning_rate": 8.918128654970761e-06, |
|
"loss": 0.2677, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 83.66, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.2416, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 83.92, |
|
"learning_rate": 8.625730994152046e-06, |
|
"loss": 0.2668, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.459735870361328, |
|
"eval_runtime": 5.9202, |
|
"eval_samples_per_second": 234.79, |
|
"eval_steps_per_second": 7.432, |
|
"step": 3213 |
|
}, |
|
{ |
|
"epoch": 84.18, |
|
"learning_rate": 8.47953216374269e-06, |
|
"loss": 0.286, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 84.44, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2743, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 84.71, |
|
"learning_rate": 8.187134502923977e-06, |
|
"loss": 0.2709, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 84.97, |
|
"learning_rate": 8.04093567251462e-06, |
|
"loss": 0.2527, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_accuracy": 0.08201438848920864, |
|
"eval_loss": 14.30977725982666, |
|
"eval_runtime": 5.9216, |
|
"eval_samples_per_second": 234.734, |
|
"eval_steps_per_second": 7.43, |
|
"step": 3251 |
|
}, |
|
{ |
|
"epoch": 85.23, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 0.3011, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 85.49, |
|
"learning_rate": 7.748538011695907e-06, |
|
"loss": 0.2647, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 85.75, |
|
"learning_rate": 7.602339181286549e-06, |
|
"loss": 0.2636, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.37409782409668, |
|
"eval_runtime": 5.948, |
|
"eval_samples_per_second": 233.693, |
|
"eval_steps_per_second": 7.397, |
|
"step": 3289 |
|
}, |
|
{ |
|
"epoch": 86.01, |
|
"learning_rate": 7.456140350877193e-06, |
|
"loss": 0.2907, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 86.27, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.2792, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"learning_rate": 7.163742690058479e-06, |
|
"loss": 0.3177, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 86.8, |
|
"learning_rate": 7.017543859649123e-06, |
|
"loss": 0.247, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 86.98, |
|
"eval_accuracy": 0.0841726618705036, |
|
"eval_loss": 14.536933898925781, |
|
"eval_runtime": 5.8678, |
|
"eval_samples_per_second": 236.885, |
|
"eval_steps_per_second": 7.499, |
|
"step": 3327 |
|
}, |
|
{ |
|
"epoch": 87.06, |
|
"learning_rate": 6.871345029239766e-06, |
|
"loss": 0.2813, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 87.32, |
|
"learning_rate": 6.725146198830409e-06, |
|
"loss": 0.2539, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 87.58, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 0.2976, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 87.84, |
|
"learning_rate": 6.432748538011696e-06, |
|
"loss": 0.2693, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.403931617736816, |
|
"eval_runtime": 5.9739, |
|
"eval_samples_per_second": 232.68, |
|
"eval_steps_per_second": 7.365, |
|
"step": 3366 |
|
}, |
|
{ |
|
"epoch": 88.1, |
|
"learning_rate": 6.286549707602339e-06, |
|
"loss": 0.3025, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 88.37, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.2483, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 88.63, |
|
"learning_rate": 5.994152046783626e-06, |
|
"loss": 0.2826, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.2692, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.616063117980957, |
|
"eval_runtime": 5.9896, |
|
"eval_samples_per_second": 232.068, |
|
"eval_steps_per_second": 7.346, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 89.15, |
|
"learning_rate": 5.701754385964912e-06, |
|
"loss": 0.294, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 89.41, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3021, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 89.67, |
|
"learning_rate": 5.409356725146199e-06, |
|
"loss": 0.2924, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 89.93, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.28, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.524378776550293, |
|
"eval_runtime": 5.9099, |
|
"eval_samples_per_second": 235.197, |
|
"eval_steps_per_second": 7.445, |
|
"step": 3442 |
|
}, |
|
{ |
|
"epoch": 90.2, |
|
"learning_rate": 5.116959064327485e-06, |
|
"loss": 0.3174, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 90.46, |
|
"learning_rate": 4.970760233918129e-06, |
|
"loss": 0.2556, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 90.72, |
|
"learning_rate": 4.824561403508772e-06, |
|
"loss": 0.2672, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 90.98, |
|
"learning_rate": 4.678362573099415e-06, |
|
"loss": 0.2535, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 90.98, |
|
"eval_accuracy": 0.0841726618705036, |
|
"eval_loss": 14.40617847442627, |
|
"eval_runtime": 5.9735, |
|
"eval_samples_per_second": 232.695, |
|
"eval_steps_per_second": 7.366, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 91.24, |
|
"learning_rate": 4.532163742690059e-06, |
|
"loss": 0.3035, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 91.5, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.2757, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 91.76, |
|
"learning_rate": 4.239766081871345e-06, |
|
"loss": 0.2887, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.08057553956834532, |
|
"eval_loss": 14.41130256652832, |
|
"eval_runtime": 5.9481, |
|
"eval_samples_per_second": 233.689, |
|
"eval_steps_per_second": 7.397, |
|
"step": 3519 |
|
}, |
|
{ |
|
"epoch": 92.03, |
|
"learning_rate": 4.093567251461989e-06, |
|
"loss": 0.2733, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 92.29, |
|
"learning_rate": 3.9473684210526315e-06, |
|
"loss": 0.2809, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 92.55, |
|
"learning_rate": 3.8011695906432747e-06, |
|
"loss": 0.2788, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 92.81, |
|
"learning_rate": 3.6549707602339183e-06, |
|
"loss": 0.257, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_accuracy": 0.0841726618705036, |
|
"eval_loss": 14.344242095947266, |
|
"eval_runtime": 5.9071, |
|
"eval_samples_per_second": 235.31, |
|
"eval_steps_per_second": 7.449, |
|
"step": 3557 |
|
}, |
|
{ |
|
"epoch": 93.07, |
|
"learning_rate": 3.5087719298245615e-06, |
|
"loss": 0.3108, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 3.3625730994152047e-06, |
|
"loss": 0.2408, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 93.59, |
|
"learning_rate": 3.216374269005848e-06, |
|
"loss": 0.2597, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 93.86, |
|
"learning_rate": 3.070175438596491e-06, |
|
"loss": 0.2627, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.469304084777832, |
|
"eval_runtime": 6.0553, |
|
"eval_samples_per_second": 229.553, |
|
"eval_steps_per_second": 7.266, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 94.12, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.2628, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 94.38, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.283, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 94.64, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.27, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 94.9, |
|
"learning_rate": 2.4853801169590643e-06, |
|
"loss": 0.2804, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 94.98, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.322315216064453, |
|
"eval_runtime": 6.0881, |
|
"eval_samples_per_second": 228.315, |
|
"eval_steps_per_second": 7.227, |
|
"step": 3633 |
|
}, |
|
{ |
|
"epoch": 95.16, |
|
"learning_rate": 2.3391812865497075e-06, |
|
"loss": 0.3117, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 95.42, |
|
"learning_rate": 2.1929824561403507e-06, |
|
"loss": 0.2948, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 95.69, |
|
"learning_rate": 2.0467836257309943e-06, |
|
"loss": 0.2926, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 95.95, |
|
"learning_rate": 1.9005847953216373e-06, |
|
"loss": 0.2529, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.384366989135742, |
|
"eval_runtime": 6.1884, |
|
"eval_samples_per_second": 224.614, |
|
"eval_steps_per_second": 7.11, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 96.21, |
|
"learning_rate": 1.7543859649122807e-06, |
|
"loss": 0.2694, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 96.47, |
|
"learning_rate": 1.608187134502924e-06, |
|
"loss": 0.2869, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 96.73, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.2634, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"loss": 0.2327, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.428401947021484, |
|
"eval_runtime": 6.2696, |
|
"eval_samples_per_second": 221.706, |
|
"eval_steps_per_second": 7.018, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 97.25, |
|
"learning_rate": 1.1695906432748538e-06, |
|
"loss": 0.2786, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 97.52, |
|
"learning_rate": 1.0233918128654972e-06, |
|
"loss": 0.2893, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 97.78, |
|
"learning_rate": 8.771929824561404e-07, |
|
"loss": 0.2643, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"eval_accuracy": 0.08345323741007195, |
|
"eval_loss": 14.556747436523438, |
|
"eval_runtime": 6.288, |
|
"eval_samples_per_second": 221.057, |
|
"eval_steps_per_second": 6.997, |
|
"step": 3748 |
|
}, |
|
{ |
|
"epoch": 98.04, |
|
"learning_rate": 7.309941520467836e-07, |
|
"loss": 0.2726, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 98.3, |
|
"learning_rate": 5.847953216374269e-07, |
|
"loss": 0.2169, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 98.56, |
|
"learning_rate": 4.385964912280702e-07, |
|
"loss": 0.2801, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 98.82, |
|
"learning_rate": 2.9239766081871344e-07, |
|
"loss": 0.284, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 98.98, |
|
"eval_accuracy": 0.08129496402877698, |
|
"eval_loss": 14.673846244812012, |
|
"eval_runtime": 6.2773, |
|
"eval_samples_per_second": 221.432, |
|
"eval_steps_per_second": 7.009, |
|
"step": 3786 |
|
}, |
|
{ |
|
"epoch": 99.08, |
|
"learning_rate": 1.4619883040935672e-07, |
|
"loss": 0.2767, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 99.35, |
|
"learning_rate": 0.0, |
|
"loss": 0.2503, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 99.35, |
|
"eval_accuracy": 0.0841726618705036, |
|
"eval_loss": 14.536262512207031, |
|
"eval_runtime": 6.1763, |
|
"eval_samples_per_second": 225.053, |
|
"eval_steps_per_second": 7.124, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 99.35, |
|
"step": 3800, |
|
"total_flos": 1.0404624773781246e+19, |
|
"train_loss": 0.44781778210087825, |
|
"train_runtime": 6473.2407, |
|
"train_samples_per_second": 75.217, |
|
"train_steps_per_second": 0.587 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.0404624773781246e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|