|
{ |
|
"best_metric": 0.5022222222222222, |
|
"best_model_checkpoint": "ansilmbabl/cards-blt-swin-tiny-patch4-window7-224-finetuned-v2/checkpoint-337", |
|
"epoch": 99.55555555555556, |
|
"eval_steps": 500, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.928571428571428e-07, |
|
"loss": 1.415, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7857142857142857e-06, |
|
"loss": 1.4195, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6785714285714285e-06, |
|
"loss": 1.419, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 1.3573, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.464285714285715e-06, |
|
"loss": 1.4297, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.49333333333333335, |
|
"eval_loss": 1.1976468563079834, |
|
"eval_runtime": 6.9062, |
|
"eval_samples_per_second": 260.637, |
|
"eval_steps_per_second": 8.253, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 1.4063, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.3637, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 1.402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.035714285714286e-06, |
|
"loss": 1.3807, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 1.4298, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.821428571428573e-06, |
|
"loss": 1.4078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.5011111111111111, |
|
"eval_loss": 1.196445345878601, |
|
"eval_runtime": 6.7224, |
|
"eval_samples_per_second": 267.76, |
|
"eval_steps_per_second": 8.479, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 1.3884, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1607142857142857e-05, |
|
"loss": 1.3689, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.4214, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.3392857142857144e-05, |
|
"loss": 1.3834, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.417, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.4961111111111111, |
|
"eval_loss": 1.2024986743927002, |
|
"eval_runtime": 6.8464, |
|
"eval_samples_per_second": 262.91, |
|
"eval_steps_per_second": 8.325, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.5178571428571429e-05, |
|
"loss": 1.4647, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.6071428571428572e-05, |
|
"loss": 1.4384, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6964285714285715e-05, |
|
"loss": 1.3899, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.4333, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.3869, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.9642857142857145e-05, |
|
"loss": 1.4163, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.48833333333333334, |
|
"eval_loss": 1.2295453548431396, |
|
"eval_runtime": 6.7471, |
|
"eval_samples_per_second": 266.783, |
|
"eval_steps_per_second": 8.448, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.0535714285714285e-05, |
|
"loss": 1.4564, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 1.4539, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2321428571428575e-05, |
|
"loss": 1.4111, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.3214285714285715e-05, |
|
"loss": 1.4174, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.4107142857142858e-05, |
|
"loss": 1.386, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.4318, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.495, |
|
"eval_loss": 1.2330048084259033, |
|
"eval_runtime": 6.8802, |
|
"eval_samples_per_second": 261.621, |
|
"eval_steps_per_second": 8.285, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 2.5892857142857148e-05, |
|
"loss": 1.3915, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 1.3905, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.767857142857143e-05, |
|
"loss": 1.4094, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.4484, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 2.9464285714285718e-05, |
|
"loss": 1.4383, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.5022222222222222, |
|
"eval_loss": 1.2162481546401978, |
|
"eval_runtime": 6.9019, |
|
"eval_samples_per_second": 260.797, |
|
"eval_steps_per_second": 8.259, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 3.0357142857142857e-05, |
|
"loss": 1.4271, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.3845, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 1.4332, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 3.303571428571429e-05, |
|
"loss": 1.4743, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.392857142857143e-05, |
|
"loss": 1.3979, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.4821428571428574e-05, |
|
"loss": 1.4212, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.4716666666666667, |
|
"eval_loss": 1.2634377479553223, |
|
"eval_runtime": 6.8985, |
|
"eval_samples_per_second": 260.925, |
|
"eval_steps_per_second": 8.263, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.4537, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 3.6607142857142853e-05, |
|
"loss": 1.4253, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.4421, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.839285714285715e-05, |
|
"loss": 1.4282, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 1.4997, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.017857142857143e-05, |
|
"loss": 1.4346, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4688888888888889, |
|
"eval_loss": 1.3083486557006836, |
|
"eval_runtime": 6.7559, |
|
"eval_samples_per_second": 266.436, |
|
"eval_steps_per_second": 8.437, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 4.107142857142857e-05, |
|
"loss": 1.4238, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 4.196428571428572e-05, |
|
"loss": 1.4162, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.3962, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 4.375e-05, |
|
"loss": 1.38, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 1.419, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.48055555555555557, |
|
"eval_loss": 1.271929383277893, |
|
"eval_runtime": 6.6282, |
|
"eval_samples_per_second": 271.567, |
|
"eval_steps_per_second": 8.6, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.5535714285714286e-05, |
|
"loss": 1.4266, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 1.4394, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 4.732142857142857e-05, |
|
"loss": 1.4178, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.8214285714285716e-05, |
|
"loss": 1.4636, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 4.910714285714286e-05, |
|
"loss": 1.3616, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4252, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.4911111111111111, |
|
"eval_loss": 1.3047653436660767, |
|
"eval_runtime": 7.0266, |
|
"eval_samples_per_second": 256.17, |
|
"eval_steps_per_second": 8.112, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 4.990079365079365e-05, |
|
"loss": 1.4389, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.9801587301587306e-05, |
|
"loss": 1.3925, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 4.9702380952380955e-05, |
|
"loss": 1.4302, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 1.405, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 4.950396825396826e-05, |
|
"loss": 1.4522, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.47944444444444445, |
|
"eval_loss": 1.2707706689834595, |
|
"eval_runtime": 7.1468, |
|
"eval_samples_per_second": 251.861, |
|
"eval_steps_per_second": 7.976, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.940476190476191e-05, |
|
"loss": 1.4201, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 1.3786, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 4.9206349206349204e-05, |
|
"loss": 1.4126, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.910714285714286e-05, |
|
"loss": 1.4261, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.900793650793651e-05, |
|
"loss": 1.3891, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 4.8908730158730156e-05, |
|
"loss": 1.3748, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.43833333333333335, |
|
"eval_loss": 1.3720223903656006, |
|
"eval_runtime": 6.9318, |
|
"eval_samples_per_second": 259.673, |
|
"eval_steps_per_second": 8.223, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 4.880952380952381e-05, |
|
"loss": 1.479, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 4.871031746031746e-05, |
|
"loss": 1.3825, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.4049, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 4.8511904761904764e-05, |
|
"loss": 1.422, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4.841269841269841e-05, |
|
"loss": 1.3707, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 4.831349206349207e-05, |
|
"loss": 1.3966, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.45944444444444443, |
|
"eval_loss": 1.3094544410705566, |
|
"eval_runtime": 7.1035, |
|
"eval_samples_per_second": 253.396, |
|
"eval_steps_per_second": 8.024, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 4.8214285714285716e-05, |
|
"loss": 1.3888, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 4.811507936507937e-05, |
|
"loss": 1.4374, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 4.801587301587302e-05, |
|
"loss": 1.4004, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 1.4099, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 4.781746031746032e-05, |
|
"loss": 1.4507, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.485, |
|
"eval_loss": 1.2430291175842285, |
|
"eval_runtime": 7.2727, |
|
"eval_samples_per_second": 247.502, |
|
"eval_steps_per_second": 7.838, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 4.771825396825397e-05, |
|
"loss": 1.4554, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.3986, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.751984126984127e-05, |
|
"loss": 1.4145, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 4.7420634920634924e-05, |
|
"loss": 1.4258, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 4.732142857142857e-05, |
|
"loss": 1.4066, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 1.4033, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.47944444444444445, |
|
"eval_loss": 1.272768497467041, |
|
"eval_runtime": 6.8312, |
|
"eval_samples_per_second": 263.498, |
|
"eval_steps_per_second": 8.344, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 4.7123015873015876e-05, |
|
"loss": 1.4264, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 4.7023809523809525e-05, |
|
"loss": 1.3796, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 4.692460317460317e-05, |
|
"loss": 1.3972, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 4.682539682539683e-05, |
|
"loss": 1.3486, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 4.672619047619048e-05, |
|
"loss": 1.3769, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.662698412698413e-05, |
|
"loss": 1.3972, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.48833333333333334, |
|
"eval_loss": 1.261121392250061, |
|
"eval_runtime": 6.6409, |
|
"eval_samples_per_second": 271.049, |
|
"eval_steps_per_second": 8.583, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 4.652777777777778e-05, |
|
"loss": 1.3787, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 1.3228, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 4.6329365079365085e-05, |
|
"loss": 1.3655, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 4.623015873015873e-05, |
|
"loss": 1.3589, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 4.613095238095239e-05, |
|
"loss": 1.4136, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.3166221380233765, |
|
"eval_runtime": 6.9227, |
|
"eval_samples_per_second": 260.013, |
|
"eval_steps_per_second": 8.234, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.603174603174603e-05, |
|
"loss": 1.4259, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 4.5932539682539685e-05, |
|
"loss": 1.3792, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 1.3724, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.573412698412698e-05, |
|
"loss": 1.4, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 1.3087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 4.5535714285714286e-05, |
|
"loss": 1.3992, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.4855555555555556, |
|
"eval_loss": 1.3103010654449463, |
|
"eval_runtime": 6.5821, |
|
"eval_samples_per_second": 273.469, |
|
"eval_steps_per_second": 8.66, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 4.543650793650794e-05, |
|
"loss": 1.3577, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 4.533730158730159e-05, |
|
"loss": 1.3907, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 1.3839, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.3949, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 4.503968253968254e-05, |
|
"loss": 1.3614, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.44222222222222224, |
|
"eval_loss": 1.3302438259124756, |
|
"eval_runtime": 6.9237, |
|
"eval_samples_per_second": 259.976, |
|
"eval_steps_per_second": 8.233, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 4.494047619047619e-05, |
|
"loss": 1.3348, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.4841269841269846e-05, |
|
"loss": 1.3525, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 4.4742063492063494e-05, |
|
"loss": 1.3349, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 1.3511, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 4.45436507936508e-05, |
|
"loss": 1.3608, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.3747, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4855555555555556, |
|
"eval_loss": 1.2918972969055176, |
|
"eval_runtime": 6.9094, |
|
"eval_samples_per_second": 260.515, |
|
"eval_steps_per_second": 8.25, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 4.4345238095238095e-05, |
|
"loss": 1.372, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 4.4246031746031744e-05, |
|
"loss": 1.3213, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"learning_rate": 4.41468253968254e-05, |
|
"loss": 1.3659, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 4.404761904761905e-05, |
|
"loss": 1.374, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 4.39484126984127e-05, |
|
"loss": 1.3621, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 4.384920634920635e-05, |
|
"loss": 1.3868, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4727777777777778, |
|
"eval_loss": 1.3165982961654663, |
|
"eval_runtime": 6.6725, |
|
"eval_samples_per_second": 269.765, |
|
"eval_steps_per_second": 8.543, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 4.375e-05, |
|
"loss": 1.3344, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 1.3649, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"learning_rate": 4.35515873015873e-05, |
|
"loss": 1.2993, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 4.345238095238096e-05, |
|
"loss": 1.3745, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 4.335317460317461e-05, |
|
"loss": 1.3399, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.4672222222222222, |
|
"eval_loss": 1.3200290203094482, |
|
"eval_runtime": 6.7301, |
|
"eval_samples_per_second": 267.457, |
|
"eval_steps_per_second": 8.469, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 4.3253968253968256e-05, |
|
"loss": 1.3777, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 4.315476190476191e-05, |
|
"loss": 1.3122, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 1.3405, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 4.295634920634921e-05, |
|
"loss": 1.299, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.3672, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 4.2757936507936505e-05, |
|
"loss": 1.3943, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.4811111111111111, |
|
"eval_loss": 1.2920361757278442, |
|
"eval_runtime": 6.9015, |
|
"eval_samples_per_second": 260.815, |
|
"eval_steps_per_second": 8.259, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 4.265873015873016e-05, |
|
"loss": 1.3459, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 4.255952380952381e-05, |
|
"loss": 1.3264, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 4.2460317460317464e-05, |
|
"loss": 1.3297, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.64, |
|
"learning_rate": 4.236111111111111e-05, |
|
"loss": 1.3092, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 4.226190476190476e-05, |
|
"loss": 1.3551, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.2162698412698416e-05, |
|
"loss": 1.3635, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.3109022378921509, |
|
"eval_runtime": 6.7144, |
|
"eval_samples_per_second": 268.079, |
|
"eval_steps_per_second": 8.489, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.18, |
|
"learning_rate": 4.2063492063492065e-05, |
|
"loss": 1.3229, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 4.196428571428572e-05, |
|
"loss": 1.3578, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 4.186507936507937e-05, |
|
"loss": 1.3052, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 4.176587301587302e-05, |
|
"loss": 1.3843, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.3724, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.46444444444444444, |
|
"eval_loss": 1.3099535703659058, |
|
"eval_runtime": 6.6043, |
|
"eval_samples_per_second": 272.552, |
|
"eval_steps_per_second": 8.631, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 4.156746031746032e-05, |
|
"loss": 1.3264, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 4.1468253968253976e-05, |
|
"loss": 1.2923, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 4.136904761904762e-05, |
|
"loss": 1.34, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 1.2817, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"learning_rate": 4.117063492063492e-05, |
|
"loss": 1.3498, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"learning_rate": 4.107142857142857e-05, |
|
"loss": 1.3141, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.49777777777777776, |
|
"eval_loss": 1.3263020515441895, |
|
"eval_runtime": 9.6902, |
|
"eval_samples_per_second": 185.755, |
|
"eval_steps_per_second": 5.882, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 4.0972222222222225e-05, |
|
"loss": 1.3192, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 26.31, |
|
"learning_rate": 4.0873015873015874e-05, |
|
"loss": 1.3228, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.49, |
|
"learning_rate": 4.077380952380952e-05, |
|
"loss": 1.2749, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 4.067460317460318e-05, |
|
"loss": 1.289, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 4.0575396825396826e-05, |
|
"loss": 1.3576, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.4772222222222222, |
|
"eval_loss": 1.330748438835144, |
|
"eval_runtime": 6.6957, |
|
"eval_samples_per_second": 268.828, |
|
"eval_steps_per_second": 8.513, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 4.047619047619048e-05, |
|
"loss": 1.3147, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 4.037698412698413e-05, |
|
"loss": 1.2995, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 1.2871, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 4.017857142857143e-05, |
|
"loss": 1.313, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 4.007936507936508e-05, |
|
"loss": 1.361, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"learning_rate": 3.998015873015874e-05, |
|
"loss": 1.3022, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.49777777777777776, |
|
"eval_loss": 1.3408504724502563, |
|
"eval_runtime": 6.9866, |
|
"eval_samples_per_second": 257.635, |
|
"eval_steps_per_second": 8.158, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 3.9880952380952386e-05, |
|
"loss": 1.2554, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 3.9781746031746034e-05, |
|
"loss": 1.3175, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 1.2905, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.62, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 1.2806, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3.9484126984126986e-05, |
|
"loss": 1.3055, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 3.9384920634920635e-05, |
|
"loss": 1.2982, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.4583333333333333, |
|
"eval_loss": 1.3961677551269531, |
|
"eval_runtime": 6.8118, |
|
"eval_samples_per_second": 264.246, |
|
"eval_steps_per_second": 8.368, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 1.2994, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 3.918650793650794e-05, |
|
"loss": 1.2676, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 3.908730158730159e-05, |
|
"loss": 1.2888, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 3.898809523809524e-05, |
|
"loss": 1.2839, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.2657, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.4816666666666667, |
|
"eval_loss": 1.3329031467437744, |
|
"eval_runtime": 6.7377, |
|
"eval_samples_per_second": 267.155, |
|
"eval_steps_per_second": 8.46, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 30.04, |
|
"learning_rate": 3.878968253968254e-05, |
|
"loss": 1.2745, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"learning_rate": 3.8690476190476195e-05, |
|
"loss": 1.3186, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 3.859126984126984e-05, |
|
"loss": 1.2745, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.58, |
|
"learning_rate": 3.84920634920635e-05, |
|
"loss": 1.3076, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 30.76, |
|
"learning_rate": 3.839285714285715e-05, |
|
"loss": 1.2578, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 3.8293650793650795e-05, |
|
"loss": 1.3152, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 1.2973191738128662, |
|
"eval_runtime": 7.0246, |
|
"eval_samples_per_second": 256.242, |
|
"eval_steps_per_second": 8.114, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 1.248, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 31.29, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.2537, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 3.799603174603175e-05, |
|
"loss": 1.247, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 31.64, |
|
"learning_rate": 3.7896825396825396e-05, |
|
"loss": 1.2904, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 3.779761904761905e-05, |
|
"loss": 1.3096, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 1.2924, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.3158966302871704, |
|
"eval_runtime": 6.8489, |
|
"eval_samples_per_second": 262.815, |
|
"eval_steps_per_second": 8.322, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.18, |
|
"learning_rate": 3.759920634920635e-05, |
|
"loss": 1.2709, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 32.36, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.2427, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 32.53, |
|
"learning_rate": 3.740079365079365e-05, |
|
"loss": 1.2435, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 32.71, |
|
"learning_rate": 3.730158730158731e-05, |
|
"loss": 1.2843, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"learning_rate": 3.7202380952380956e-05, |
|
"loss": 1.214, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.395532488822937, |
|
"eval_runtime": 6.7585, |
|
"eval_samples_per_second": 266.33, |
|
"eval_steps_per_second": 8.434, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 3.7103174603174604e-05, |
|
"loss": 1.3143, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 33.24, |
|
"learning_rate": 3.700396825396826e-05, |
|
"loss": 1.2143, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 33.42, |
|
"learning_rate": 3.690476190476191e-05, |
|
"loss": 1.2721, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 3.6805555555555556e-05, |
|
"loss": 1.2267, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 3.6706349206349205e-05, |
|
"loss": 1.2616, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 3.6607142857142853e-05, |
|
"loss": 1.2717, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 1.4583297967910767, |
|
"eval_runtime": 6.7797, |
|
"eval_samples_per_second": 265.497, |
|
"eval_steps_per_second": 8.407, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 3.650793650793651e-05, |
|
"loss": 1.2594, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 34.31, |
|
"learning_rate": 3.640873015873016e-05, |
|
"loss": 1.3058, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 34.49, |
|
"learning_rate": 3.630952380952381e-05, |
|
"loss": 1.285, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 3.621031746031746e-05, |
|
"loss": 1.2276, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 1.2692, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.4938888888888889, |
|
"eval_loss": 1.3503786325454712, |
|
"eval_runtime": 6.4105, |
|
"eval_samples_per_second": 280.79, |
|
"eval_steps_per_second": 8.892, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 3.6011904761904765e-05, |
|
"loss": 1.2435, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 3.591269841269841e-05, |
|
"loss": 1.2224, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"learning_rate": 3.581349206349207e-05, |
|
"loss": 1.2219, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.2391, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 3.5615079365079365e-05, |
|
"loss": 1.2594, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 35.91, |
|
"learning_rate": 3.551587301587302e-05, |
|
"loss": 1.2127, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.378441333770752, |
|
"eval_runtime": 6.7487, |
|
"eval_samples_per_second": 266.719, |
|
"eval_steps_per_second": 8.446, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 36.09, |
|
"learning_rate": 3.541666666666667e-05, |
|
"loss": 1.2251, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 3.5317460317460324e-05, |
|
"loss": 1.2232, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 36.44, |
|
"learning_rate": 3.521825396825397e-05, |
|
"loss": 1.2207, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"learning_rate": 3.511904761904762e-05, |
|
"loss": 1.2139, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 3.501984126984127e-05, |
|
"loss": 1.2464, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"learning_rate": 3.492063492063492e-05, |
|
"loss": 1.1956, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.4816666666666667, |
|
"eval_loss": 1.4183870553970337, |
|
"eval_runtime": 6.7564, |
|
"eval_samples_per_second": 266.414, |
|
"eval_steps_per_second": 8.436, |
|
"step": 2081 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 3.4821428571428574e-05, |
|
"loss": 1.199, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 1.2005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 37.51, |
|
"learning_rate": 3.462301587301587e-05, |
|
"loss": 1.193, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 37.69, |
|
"learning_rate": 3.4523809523809526e-05, |
|
"loss": 1.2632, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"learning_rate": 3.4424603174603174e-05, |
|
"loss": 1.2408, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.49444444444444446, |
|
"eval_loss": 1.3849085569381714, |
|
"eval_runtime": 7.0969, |
|
"eval_samples_per_second": 253.633, |
|
"eval_steps_per_second": 8.032, |
|
"step": 2137 |
|
}, |
|
{ |
|
"epoch": 38.04, |
|
"learning_rate": 3.432539682539683e-05, |
|
"loss": 1.1826, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 38.22, |
|
"learning_rate": 3.422619047619048e-05, |
|
"loss": 1.2048, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 3.412698412698413e-05, |
|
"loss": 1.2153, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 38.58, |
|
"learning_rate": 3.402777777777778e-05, |
|
"loss": 1.2151, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 38.76, |
|
"learning_rate": 3.392857142857143e-05, |
|
"loss": 1.23, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 3.3829365079365086e-05, |
|
"loss": 1.1699, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.48444444444444446, |
|
"eval_loss": 1.429752230644226, |
|
"eval_runtime": 6.9781, |
|
"eval_samples_per_second": 257.951, |
|
"eval_steps_per_second": 8.168, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 39.11, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 1.1914, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"learning_rate": 3.363095238095238e-05, |
|
"loss": 1.218, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 3.353174603174603e-05, |
|
"loss": 1.2134, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"learning_rate": 3.343253968253968e-05, |
|
"loss": 1.1926, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.1607, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.3234126984126983e-05, |
|
"loss": 1.1727, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4772222222222222, |
|
"eval_loss": 1.4331458806991577, |
|
"eval_runtime": 6.6454, |
|
"eval_samples_per_second": 270.865, |
|
"eval_steps_per_second": 8.577, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 40.18, |
|
"learning_rate": 3.313492063492064e-05, |
|
"loss": 1.1827, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 40.36, |
|
"learning_rate": 3.303571428571429e-05, |
|
"loss": 1.1691, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 3.2936507936507936e-05, |
|
"loss": 1.2009, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 40.71, |
|
"learning_rate": 3.283730158730159e-05, |
|
"loss": 1.1939, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 3.273809523809524e-05, |
|
"loss": 1.1485, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.4672222222222222, |
|
"eval_loss": 1.4596869945526123, |
|
"eval_runtime": 6.7934, |
|
"eval_samples_per_second": 264.961, |
|
"eval_steps_per_second": 8.39, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 3.263888888888889e-05, |
|
"loss": 1.185, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 41.24, |
|
"learning_rate": 3.253968253968254e-05, |
|
"loss": 1.1438, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 41.42, |
|
"learning_rate": 3.244047619047619e-05, |
|
"loss": 1.2154, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 3.234126984126985e-05, |
|
"loss": 1.1789, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 41.78, |
|
"learning_rate": 3.2242063492063495e-05, |
|
"loss": 1.1464, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 1.1668, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"eval_accuracy": 0.47833333333333333, |
|
"eval_loss": 1.4428596496582031, |
|
"eval_runtime": 6.9455, |
|
"eval_samples_per_second": 259.161, |
|
"eval_steps_per_second": 8.207, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 3.20436507936508e-05, |
|
"loss": 1.1201, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 1.1545, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 42.49, |
|
"learning_rate": 3.1845238095238096e-05, |
|
"loss": 1.1587, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 1.1606, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.84, |
|
"learning_rate": 3.16468253968254e-05, |
|
"loss": 1.1881, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.48388888888888887, |
|
"eval_loss": 1.455505132675171, |
|
"eval_runtime": 6.7551, |
|
"eval_samples_per_second": 266.464, |
|
"eval_steps_per_second": 8.438, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 3.154761904761905e-05, |
|
"loss": 1.1859, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 3.14484126984127e-05, |
|
"loss": 1.1149, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 43.38, |
|
"learning_rate": 3.134920634920635e-05, |
|
"loss": 1.1699, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 43.56, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.1132, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 3.1150793650793656e-05, |
|
"loss": 1.129, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 43.91, |
|
"learning_rate": 3.1051587301587304e-05, |
|
"loss": 1.1204, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.47833333333333333, |
|
"eval_loss": 1.464751958847046, |
|
"eval_runtime": 6.9852, |
|
"eval_samples_per_second": 257.688, |
|
"eval_steps_per_second": 8.16, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 1.1405, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 3.085317460317461e-05, |
|
"loss": 1.1173, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 3.075396825396826e-05, |
|
"loss": 1.1739, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 44.62, |
|
"learning_rate": 3.0654761904761905e-05, |
|
"loss": 1.1311, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 1.1483, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"learning_rate": 3.0456349206349206e-05, |
|
"loss": 1.1523, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.47333333333333333, |
|
"eval_loss": 1.474352240562439, |
|
"eval_runtime": 6.7424, |
|
"eval_samples_per_second": 266.966, |
|
"eval_steps_per_second": 8.454, |
|
"step": 2531 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 3.0357142857142857e-05, |
|
"loss": 1.1291, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 3.0257936507936506e-05, |
|
"loss": 1.1552, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 3.0158730158730158e-05, |
|
"loss": 1.1652, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 45.69, |
|
"learning_rate": 3.005952380952381e-05, |
|
"loss": 1.1732, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"learning_rate": 2.996031746031746e-05, |
|
"loss": 1.1206, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"eval_accuracy": 0.4905555555555556, |
|
"eval_loss": 1.4791755676269531, |
|
"eval_runtime": 6.7618, |
|
"eval_samples_per_second": 266.202, |
|
"eval_steps_per_second": 8.43, |
|
"step": 2587 |
|
}, |
|
{ |
|
"epoch": 46.04, |
|
"learning_rate": 2.9861111111111113e-05, |
|
"loss": 1.1128, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 1.1148, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 2.9662698412698414e-05, |
|
"loss": 1.1391, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 46.58, |
|
"learning_rate": 2.9563492063492066e-05, |
|
"loss": 1.1445, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 46.76, |
|
"learning_rate": 2.9464285714285718e-05, |
|
"loss": 1.1596, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 46.93, |
|
"learning_rate": 2.9365079365079366e-05, |
|
"loss": 1.1135, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.4677777777777778, |
|
"eval_loss": 1.5008630752563477, |
|
"eval_runtime": 7.3069, |
|
"eval_samples_per_second": 246.342, |
|
"eval_steps_per_second": 7.801, |
|
"step": 2643 |
|
}, |
|
{ |
|
"epoch": 47.11, |
|
"learning_rate": 2.9265873015873018e-05, |
|
"loss": 1.1324, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 47.29, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 1.0933, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 47.47, |
|
"learning_rate": 2.906746031746032e-05, |
|
"loss": 1.1701, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 47.64, |
|
"learning_rate": 2.8968253968253974e-05, |
|
"loss": 1.1505, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 47.82, |
|
"learning_rate": 2.886904761904762e-05, |
|
"loss": 1.1117, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 2.876984126984127e-05, |
|
"loss": 1.1227, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.47333333333333333, |
|
"eval_loss": 1.5479964017868042, |
|
"eval_runtime": 6.7183, |
|
"eval_samples_per_second": 267.925, |
|
"eval_steps_per_second": 8.484, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 48.18, |
|
"learning_rate": 2.867063492063492e-05, |
|
"loss": 1.1419, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 48.36, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.0893, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 2.8472222222222223e-05, |
|
"loss": 1.0633, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 48.71, |
|
"learning_rate": 2.8373015873015875e-05, |
|
"loss": 1.0889, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"learning_rate": 2.8273809523809523e-05, |
|
"loss": 1.1017, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.46444444444444444, |
|
"eval_loss": 1.590686559677124, |
|
"eval_runtime": 9.0291, |
|
"eval_samples_per_second": 199.355, |
|
"eval_steps_per_second": 6.313, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 49.07, |
|
"learning_rate": 2.8174603174603175e-05, |
|
"loss": 1.0731, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 49.24, |
|
"learning_rate": 2.8075396825396827e-05, |
|
"loss": 1.0992, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 49.42, |
|
"learning_rate": 2.797619047619048e-05, |
|
"loss": 1.0779, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 2.787698412698413e-05, |
|
"loss": 1.0771, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 49.78, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.1053, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 49.96, |
|
"learning_rate": 2.767857142857143e-05, |
|
"loss": 1.1601, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 1.5136301517486572, |
|
"eval_runtime": 6.8103, |
|
"eval_samples_per_second": 264.305, |
|
"eval_steps_per_second": 8.37, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 50.13, |
|
"learning_rate": 2.7579365079365083e-05, |
|
"loss": 1.1253, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 50.31, |
|
"learning_rate": 2.7480158730158735e-05, |
|
"loss": 1.0357, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 50.49, |
|
"learning_rate": 2.7380952380952383e-05, |
|
"loss": 1.0808, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 50.67, |
|
"learning_rate": 2.7281746031746032e-05, |
|
"loss": 1.0321, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 50.84, |
|
"learning_rate": 2.718253968253968e-05, |
|
"loss": 1.1239, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"eval_accuracy": 0.47888888888888886, |
|
"eval_loss": 1.5383570194244385, |
|
"eval_runtime": 6.9446, |
|
"eval_samples_per_second": 259.194, |
|
"eval_steps_per_second": 8.208, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 51.02, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 1.1178, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 1.0706, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 51.38, |
|
"learning_rate": 2.6884920634920636e-05, |
|
"loss": 1.077, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 51.56, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 1.0307, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 51.73, |
|
"learning_rate": 2.6686507936507936e-05, |
|
"loss": 1.0621, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"learning_rate": 2.6587301587301588e-05, |
|
"loss": 1.09, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.4711111111111111, |
|
"eval_loss": 1.5716180801391602, |
|
"eval_runtime": 6.726, |
|
"eval_samples_per_second": 267.618, |
|
"eval_steps_per_second": 8.475, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 52.09, |
|
"learning_rate": 2.648809523809524e-05, |
|
"loss": 1.1091, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 1.0716, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 52.44, |
|
"learning_rate": 2.628968253968254e-05, |
|
"loss": 1.0558, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 52.62, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 1.0986, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 2.6091269841269844e-05, |
|
"loss": 1.0729, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"learning_rate": 2.5992063492063496e-05, |
|
"loss": 1.1023, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4727777777777778, |
|
"eval_loss": 1.5735939741134644, |
|
"eval_runtime": 6.8239, |
|
"eval_samples_per_second": 263.777, |
|
"eval_steps_per_second": 8.353, |
|
"step": 2981 |
|
}, |
|
{ |
|
"epoch": 53.16, |
|
"learning_rate": 2.5892857142857148e-05, |
|
"loss": 1.017, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 1.103, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 53.51, |
|
"learning_rate": 2.5694444444444445e-05, |
|
"loss": 1.0374, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 53.69, |
|
"learning_rate": 2.5595238095238093e-05, |
|
"loss": 1.0361, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 53.87, |
|
"learning_rate": 2.5496031746031745e-05, |
|
"loss": 1.1038, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 53.99, |
|
"eval_accuracy": 0.45555555555555555, |
|
"eval_loss": 1.5918738842010498, |
|
"eval_runtime": 6.817, |
|
"eval_samples_per_second": 264.046, |
|
"eval_steps_per_second": 8.361, |
|
"step": 3037 |
|
}, |
|
{ |
|
"epoch": 54.04, |
|
"learning_rate": 2.5396825396825397e-05, |
|
"loss": 1.0261, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 54.22, |
|
"learning_rate": 2.529761904761905e-05, |
|
"loss": 1.0808, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 2.5198412698412697e-05, |
|
"loss": 1.0486, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 54.58, |
|
"learning_rate": 2.509920634920635e-05, |
|
"loss": 1.0311, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 54.76, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0527, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 54.93, |
|
"learning_rate": 2.4900793650793653e-05, |
|
"loss": 1.058, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"eval_accuracy": 0.4772222222222222, |
|
"eval_loss": 1.5533833503723145, |
|
"eval_runtime": 6.859, |
|
"eval_samples_per_second": 262.427, |
|
"eval_steps_per_second": 8.31, |
|
"step": 3093 |
|
}, |
|
{ |
|
"epoch": 55.11, |
|
"learning_rate": 2.4801587301587305e-05, |
|
"loss": 1.0369, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 55.29, |
|
"learning_rate": 2.4702380952380953e-05, |
|
"loss": 1.0456, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 55.47, |
|
"learning_rate": 2.4603174603174602e-05, |
|
"loss": 1.0249, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 55.64, |
|
"learning_rate": 2.4503968253968254e-05, |
|
"loss": 1.04, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 55.82, |
|
"learning_rate": 2.4404761904761906e-05, |
|
"loss": 1.0552, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 1.0405, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.4716666666666667, |
|
"eval_loss": 1.5788267850875854, |
|
"eval_runtime": 6.7843, |
|
"eval_samples_per_second": 265.32, |
|
"eval_steps_per_second": 8.402, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 56.18, |
|
"learning_rate": 2.4206349206349206e-05, |
|
"loss": 1.0667, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 56.36, |
|
"learning_rate": 2.4107142857142858e-05, |
|
"loss": 1.046, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 2.400793650793651e-05, |
|
"loss": 1.0146, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 56.71, |
|
"learning_rate": 2.390873015873016e-05, |
|
"loss": 1.0656, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 56.89, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 1.0172, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.4766666666666667, |
|
"eval_loss": 1.585532546043396, |
|
"eval_runtime": 6.9697, |
|
"eval_samples_per_second": 258.26, |
|
"eval_steps_per_second": 8.178, |
|
"step": 3206 |
|
}, |
|
{ |
|
"epoch": 57.07, |
|
"learning_rate": 2.3710317460317462e-05, |
|
"loss": 1.0719, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 57.24, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 1.0542, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 57.42, |
|
"learning_rate": 2.3511904761904762e-05, |
|
"loss": 1.0316, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 2.3412698412698414e-05, |
|
"loss": 0.993, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"learning_rate": 2.3313492063492066e-05, |
|
"loss": 1.0508, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 57.96, |
|
"learning_rate": 2.3214285714285715e-05, |
|
"loss": 1.0036, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 57.99, |
|
"eval_accuracy": 0.455, |
|
"eval_loss": 1.6425151824951172, |
|
"eval_runtime": 7.0778, |
|
"eval_samples_per_second": 254.315, |
|
"eval_steps_per_second": 8.053, |
|
"step": 3262 |
|
}, |
|
{ |
|
"epoch": 58.13, |
|
"learning_rate": 2.3115079365079367e-05, |
|
"loss": 1.0261, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 58.31, |
|
"learning_rate": 2.3015873015873015e-05, |
|
"loss": 1.0191, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 1.0035, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 58.67, |
|
"learning_rate": 2.281746031746032e-05, |
|
"loss": 0.9924, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 58.84, |
|
"learning_rate": 2.271825396825397e-05, |
|
"loss": 1.0124, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"eval_accuracy": 0.4677777777777778, |
|
"eval_loss": 1.6039385795593262, |
|
"eval_runtime": 6.8431, |
|
"eval_samples_per_second": 263.038, |
|
"eval_steps_per_second": 8.33, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 59.02, |
|
"learning_rate": 2.261904761904762e-05, |
|
"loss": 1.0292, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 2.251984126984127e-05, |
|
"loss": 0.9945, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 59.38, |
|
"learning_rate": 2.2420634920634923e-05, |
|
"loss": 1.0021, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 59.56, |
|
"learning_rate": 2.2321428571428575e-05, |
|
"loss": 1.0187, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 59.73, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.0092, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 59.91, |
|
"learning_rate": 2.2123015873015872e-05, |
|
"loss": 1.0647, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4572222222222222, |
|
"eval_loss": 1.5890642404556274, |
|
"eval_runtime": 11.7484, |
|
"eval_samples_per_second": 153.212, |
|
"eval_steps_per_second": 4.852, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 60.09, |
|
"learning_rate": 2.2023809523809524e-05, |
|
"loss": 1.009, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 60.27, |
|
"learning_rate": 2.1924603174603176e-05, |
|
"loss": 0.9819, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 60.44, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 1.0114, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 60.62, |
|
"learning_rate": 2.172619047619048e-05, |
|
"loss": 1.0253, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 2.1626984126984128e-05, |
|
"loss": 0.9988, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"learning_rate": 2.152777777777778e-05, |
|
"loss": 1.0143, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.4483333333333333, |
|
"eval_loss": 1.6265422105789185, |
|
"eval_runtime": 6.6939, |
|
"eval_samples_per_second": 268.9, |
|
"eval_steps_per_second": 8.515, |
|
"step": 3431 |
|
}, |
|
{ |
|
"epoch": 61.16, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.9913, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"learning_rate": 2.132936507936508e-05, |
|
"loss": 0.978, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 61.51, |
|
"learning_rate": 2.1230158730158732e-05, |
|
"loss": 0.9833, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 61.69, |
|
"learning_rate": 2.113095238095238e-05, |
|
"loss": 1.049, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 61.87, |
|
"learning_rate": 2.1031746031746032e-05, |
|
"loss": 1.0051, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 61.99, |
|
"eval_accuracy": 0.4633333333333333, |
|
"eval_loss": 1.6208295822143555, |
|
"eval_runtime": 6.6968, |
|
"eval_samples_per_second": 268.784, |
|
"eval_steps_per_second": 8.511, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 62.04, |
|
"learning_rate": 2.0932539682539684e-05, |
|
"loss": 1.0076, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 62.22, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.0126, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 2.0734126984126988e-05, |
|
"loss": 0.9634, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 62.58, |
|
"learning_rate": 2.0634920634920636e-05, |
|
"loss": 0.9616, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 62.76, |
|
"learning_rate": 2.0535714285714285e-05, |
|
"loss": 1.0025, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 62.93, |
|
"learning_rate": 2.0436507936507937e-05, |
|
"loss": 0.9571, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_accuracy": 0.4483333333333333, |
|
"eval_loss": 1.6873899698257446, |
|
"eval_runtime": 6.8101, |
|
"eval_samples_per_second": 264.314, |
|
"eval_steps_per_second": 8.37, |
|
"step": 3543 |
|
}, |
|
{ |
|
"epoch": 63.11, |
|
"learning_rate": 2.033730158730159e-05, |
|
"loss": 0.9859, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 63.29, |
|
"learning_rate": 2.023809523809524e-05, |
|
"loss": 0.9828, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 63.47, |
|
"learning_rate": 2.013888888888889e-05, |
|
"loss": 0.9969, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 2.003968253968254e-05, |
|
"loss": 1.0127, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 63.82, |
|
"learning_rate": 1.9940476190476193e-05, |
|
"loss": 0.9949, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.9838, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.45166666666666666, |
|
"eval_loss": 1.6777788400650024, |
|
"eval_runtime": 6.4627, |
|
"eval_samples_per_second": 278.521, |
|
"eval_steps_per_second": 8.82, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 64.18, |
|
"learning_rate": 1.9742063492063493e-05, |
|
"loss": 0.9745, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 64.36, |
|
"learning_rate": 1.9642857142857145e-05, |
|
"loss": 0.9895, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 64.53, |
|
"learning_rate": 1.9543650793650793e-05, |
|
"loss": 0.9219, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 64.71, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.9853, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"learning_rate": 1.9345238095238097e-05, |
|
"loss": 0.9995, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.4722222222222222, |
|
"eval_loss": 1.6247550249099731, |
|
"eval_runtime": 6.6147, |
|
"eval_samples_per_second": 272.12, |
|
"eval_steps_per_second": 8.617, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 65.07, |
|
"learning_rate": 1.924603174603175e-05, |
|
"loss": 0.9911, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 65.24, |
|
"learning_rate": 1.9146825396825398e-05, |
|
"loss": 1.0129, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 65.42, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.9734, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 1.8948412698412698e-05, |
|
"loss": 0.9699, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 65.78, |
|
"learning_rate": 1.884920634920635e-05, |
|
"loss": 0.9707, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 65.96, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.0374, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 65.99, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.6644623279571533, |
|
"eval_runtime": 6.7375, |
|
"eval_samples_per_second": 267.163, |
|
"eval_steps_per_second": 8.46, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 66.13, |
|
"learning_rate": 1.8650793650793654e-05, |
|
"loss": 0.9862, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 66.31, |
|
"learning_rate": 1.8551587301587302e-05, |
|
"loss": 0.9879, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 66.49, |
|
"learning_rate": 1.8452380952380954e-05, |
|
"loss": 0.951, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.8353174603174602e-05, |
|
"loss": 0.991, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 66.84, |
|
"learning_rate": 1.8253968253968254e-05, |
|
"loss": 0.9483, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"eval_accuracy": 0.46111111111111114, |
|
"eval_loss": 1.6306700706481934, |
|
"eval_runtime": 6.7855, |
|
"eval_samples_per_second": 265.273, |
|
"eval_steps_per_second": 8.4, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 67.02, |
|
"learning_rate": 1.8154761904761906e-05, |
|
"loss": 0.9483, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.9179, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 67.38, |
|
"learning_rate": 1.7956349206349207e-05, |
|
"loss": 0.9575, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 67.56, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.9647, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 67.73, |
|
"learning_rate": 1.775793650793651e-05, |
|
"loss": 0.9526, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 67.91, |
|
"learning_rate": 1.7658730158730162e-05, |
|
"loss": 0.9825, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.4661111111111111, |
|
"eval_loss": 1.6661802530288696, |
|
"eval_runtime": 6.7756, |
|
"eval_samples_per_second": 265.661, |
|
"eval_steps_per_second": 8.413, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 68.09, |
|
"learning_rate": 1.755952380952381e-05, |
|
"loss": 0.9248, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.925, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 68.44, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.9444, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 68.62, |
|
"learning_rate": 1.7261904761904763e-05, |
|
"loss": 0.9719, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 1.7162698412698415e-05, |
|
"loss": 0.9858, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 68.98, |
|
"learning_rate": 1.7063492063492063e-05, |
|
"loss": 1.0023, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 1.6650328636169434, |
|
"eval_runtime": 6.9757, |
|
"eval_samples_per_second": 258.039, |
|
"eval_steps_per_second": 8.171, |
|
"step": 3881 |
|
}, |
|
{ |
|
"epoch": 69.16, |
|
"learning_rate": 1.6964285714285715e-05, |
|
"loss": 0.9709, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 69.33, |
|
"learning_rate": 1.6865079365079367e-05, |
|
"loss": 0.9281, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 69.51, |
|
"learning_rate": 1.6765873015873016e-05, |
|
"loss": 0.9137, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 69.69, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.9541, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 69.87, |
|
"learning_rate": 1.656746031746032e-05, |
|
"loss": 0.9642, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"eval_accuracy": 0.4494444444444444, |
|
"eval_loss": 1.6953352689743042, |
|
"eval_runtime": 6.89, |
|
"eval_samples_per_second": 261.249, |
|
"eval_steps_per_second": 8.273, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 70.04, |
|
"learning_rate": 1.6468253968253968e-05, |
|
"loss": 0.978, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 70.22, |
|
"learning_rate": 1.636904761904762e-05, |
|
"loss": 0.9182, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 1.626984126984127e-05, |
|
"loss": 0.9306, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 70.58, |
|
"learning_rate": 1.6170634920634923e-05, |
|
"loss": 0.9004, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 70.76, |
|
"learning_rate": 1.6071428571428572e-05, |
|
"loss": 0.9347, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 70.93, |
|
"learning_rate": 1.597222222222222e-05, |
|
"loss": 0.9687, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.4661111111111111, |
|
"eval_loss": 1.707597017288208, |
|
"eval_runtime": 6.4216, |
|
"eval_samples_per_second": 280.303, |
|
"eval_steps_per_second": 8.876, |
|
"step": 3993 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.9244, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 71.29, |
|
"learning_rate": 1.5773809523809524e-05, |
|
"loss": 0.921, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 71.47, |
|
"learning_rate": 1.5674603174603176e-05, |
|
"loss": 0.9421, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 71.64, |
|
"learning_rate": 1.5575396825396828e-05, |
|
"loss": 0.9144, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 71.82, |
|
"learning_rate": 1.5476190476190476e-05, |
|
"loss": 1.0043, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 1.537698412698413e-05, |
|
"loss": 0.9542, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.46555555555555556, |
|
"eval_loss": 1.7011748552322388, |
|
"eval_runtime": 6.7739, |
|
"eval_samples_per_second": 265.725, |
|
"eval_steps_per_second": 8.415, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 72.18, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.9239, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 72.36, |
|
"learning_rate": 1.5178571428571429e-05, |
|
"loss": 0.9682, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 72.53, |
|
"learning_rate": 1.5079365079365079e-05, |
|
"loss": 0.942, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 72.71, |
|
"learning_rate": 1.498015873015873e-05, |
|
"loss": 0.9036, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 72.89, |
|
"learning_rate": 1.4880952380952381e-05, |
|
"loss": 0.9378, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.4533333333333333, |
|
"eval_loss": 1.7056083679199219, |
|
"eval_runtime": 6.5401, |
|
"eval_samples_per_second": 275.223, |
|
"eval_steps_per_second": 8.715, |
|
"step": 4106 |
|
}, |
|
{ |
|
"epoch": 73.07, |
|
"learning_rate": 1.4781746031746033e-05, |
|
"loss": 0.9286, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 73.24, |
|
"learning_rate": 1.4682539682539683e-05, |
|
"loss": 0.9517, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 73.42, |
|
"learning_rate": 1.4583333333333335e-05, |
|
"loss": 0.9557, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 1.4484126984126987e-05, |
|
"loss": 0.9168, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 73.78, |
|
"learning_rate": 1.4384920634920635e-05, |
|
"loss": 0.9524, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 73.96, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.9542, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"eval_accuracy": 0.4572222222222222, |
|
"eval_loss": 1.7331255674362183, |
|
"eval_runtime": 6.6227, |
|
"eval_samples_per_second": 271.793, |
|
"eval_steps_per_second": 8.607, |
|
"step": 4162 |
|
}, |
|
{ |
|
"epoch": 74.13, |
|
"learning_rate": 1.4186507936507937e-05, |
|
"loss": 0.9507, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 74.31, |
|
"learning_rate": 1.4087301587301587e-05, |
|
"loss": 0.8558, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 74.49, |
|
"learning_rate": 1.398809523809524e-05, |
|
"loss": 0.8986, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 74.67, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.9561, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 74.84, |
|
"learning_rate": 1.3789682539682541e-05, |
|
"loss": 0.9035, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.44166666666666665, |
|
"eval_loss": 1.7459461688995361, |
|
"eval_runtime": 6.917, |
|
"eval_samples_per_second": 260.229, |
|
"eval_steps_per_second": 8.241, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 75.02, |
|
"learning_rate": 1.3690476190476192e-05, |
|
"loss": 0.9349, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"learning_rate": 1.359126984126984e-05, |
|
"loss": 0.9391, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 75.38, |
|
"learning_rate": 1.3492063492063492e-05, |
|
"loss": 0.9477, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"learning_rate": 1.3392857142857144e-05, |
|
"loss": 0.8942, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 75.73, |
|
"learning_rate": 1.3293650793650794e-05, |
|
"loss": 0.9168, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 75.91, |
|
"learning_rate": 1.3194444444444446e-05, |
|
"loss": 0.9631, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.465, |
|
"eval_loss": 1.723626732826233, |
|
"eval_runtime": 6.7505, |
|
"eval_samples_per_second": 266.648, |
|
"eval_steps_per_second": 8.444, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 76.09, |
|
"learning_rate": 1.3095238095238096e-05, |
|
"loss": 0.9378, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"learning_rate": 1.2996031746031748e-05, |
|
"loss": 0.9081, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 76.44, |
|
"learning_rate": 1.2896825396825398e-05, |
|
"loss": 0.9126, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 76.62, |
|
"learning_rate": 1.2797619047619047e-05, |
|
"loss": 0.8934, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.9844, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 76.98, |
|
"learning_rate": 1.2599206349206349e-05, |
|
"loss": 0.8759, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.455, |
|
"eval_loss": 1.7293672561645508, |
|
"eval_runtime": 6.715, |
|
"eval_samples_per_second": 268.058, |
|
"eval_steps_per_second": 8.489, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 77.16, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.9142, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 77.33, |
|
"learning_rate": 1.2400793650793652e-05, |
|
"loss": 0.9153, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 77.51, |
|
"learning_rate": 1.2301587301587301e-05, |
|
"loss": 0.9225, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 77.69, |
|
"learning_rate": 1.2202380952380953e-05, |
|
"loss": 0.9304, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 77.87, |
|
"learning_rate": 1.2103174603174603e-05, |
|
"loss": 0.9218, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"eval_accuracy": 0.4577777777777778, |
|
"eval_loss": 1.7653708457946777, |
|
"eval_runtime": 7.1048, |
|
"eval_samples_per_second": 253.348, |
|
"eval_steps_per_second": 8.023, |
|
"step": 4387 |
|
}, |
|
{ |
|
"epoch": 78.04, |
|
"learning_rate": 1.2003968253968255e-05, |
|
"loss": 0.8532, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 78.22, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.8794, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"learning_rate": 1.1805555555555555e-05, |
|
"loss": 0.8853, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 78.58, |
|
"learning_rate": 1.1706349206349207e-05, |
|
"loss": 0.8914, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 78.76, |
|
"learning_rate": 1.1607142857142857e-05, |
|
"loss": 0.9161, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 78.93, |
|
"learning_rate": 1.1507936507936508e-05, |
|
"loss": 0.9077, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"eval_accuracy": 0.45944444444444443, |
|
"eval_loss": 1.7234431505203247, |
|
"eval_runtime": 6.668, |
|
"eval_samples_per_second": 269.947, |
|
"eval_steps_per_second": 8.548, |
|
"step": 4443 |
|
}, |
|
{ |
|
"epoch": 79.11, |
|
"learning_rate": 1.140873015873016e-05, |
|
"loss": 0.897, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 79.29, |
|
"learning_rate": 1.130952380952381e-05, |
|
"loss": 0.8687, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 79.47, |
|
"learning_rate": 1.1210317460317461e-05, |
|
"loss": 0.8983, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 79.64, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.9047, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 79.82, |
|
"learning_rate": 1.1011904761904762e-05, |
|
"loss": 0.8923, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.0912698412698414e-05, |
|
"loss": 0.8924, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.4683333333333333, |
|
"eval_loss": 1.7255866527557373, |
|
"eval_runtime": 6.948, |
|
"eval_samples_per_second": 259.067, |
|
"eval_steps_per_second": 8.204, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 80.18, |
|
"learning_rate": 1.0813492063492064e-05, |
|
"loss": 0.8976, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 80.36, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.8618, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 80.53, |
|
"learning_rate": 1.0615079365079366e-05, |
|
"loss": 0.8743, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 80.71, |
|
"learning_rate": 1.0515873015873016e-05, |
|
"loss": 0.9115, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 80.89, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.9156, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.4677777777777778, |
|
"eval_loss": 1.73197603225708, |
|
"eval_runtime": 6.9362, |
|
"eval_samples_per_second": 259.509, |
|
"eval_steps_per_second": 8.218, |
|
"step": 4556 |
|
}, |
|
{ |
|
"epoch": 81.07, |
|
"learning_rate": 1.0317460317460318e-05, |
|
"loss": 0.8605, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 81.24, |
|
"learning_rate": 1.0218253968253968e-05, |
|
"loss": 0.8848, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 81.42, |
|
"learning_rate": 1.011904761904762e-05, |
|
"loss": 0.875, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"learning_rate": 1.001984126984127e-05, |
|
"loss": 0.896, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 81.78, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.8848, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 81.96, |
|
"learning_rate": 9.821428571428573e-06, |
|
"loss": 0.806, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 81.99, |
|
"eval_accuracy": 0.4661111111111111, |
|
"eval_loss": 1.734808087348938, |
|
"eval_runtime": 6.8749, |
|
"eval_samples_per_second": 261.821, |
|
"eval_steps_per_second": 8.291, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 82.13, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.8712, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 82.31, |
|
"learning_rate": 9.623015873015875e-06, |
|
"loss": 0.8963, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 82.49, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.871, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 82.67, |
|
"learning_rate": 9.424603174603175e-06, |
|
"loss": 0.854, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 82.84, |
|
"learning_rate": 9.325396825396827e-06, |
|
"loss": 0.8863, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"eval_accuracy": 0.46055555555555555, |
|
"eval_loss": 1.7513699531555176, |
|
"eval_runtime": 6.8287, |
|
"eval_samples_per_second": 263.592, |
|
"eval_steps_per_second": 8.347, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 9.226190476190477e-06, |
|
"loss": 0.9394, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 9.126984126984127e-06, |
|
"loss": 0.887, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 83.38, |
|
"learning_rate": 9.027777777777777e-06, |
|
"loss": 0.8937, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 83.56, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.8514, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 83.73, |
|
"learning_rate": 8.829365079365081e-06, |
|
"loss": 0.8772, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 83.91, |
|
"learning_rate": 8.73015873015873e-06, |
|
"loss": 0.8698, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.4661111111111111, |
|
"eval_loss": 1.7483917474746704, |
|
"eval_runtime": 6.4282, |
|
"eval_samples_per_second": 280.014, |
|
"eval_steps_per_second": 8.867, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 84.09, |
|
"learning_rate": 8.630952380952381e-06, |
|
"loss": 0.8657, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"learning_rate": 8.531746031746032e-06, |
|
"loss": 0.8555, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 84.44, |
|
"learning_rate": 8.432539682539684e-06, |
|
"loss": 0.8727, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 84.62, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.8733, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 8.234126984126984e-06, |
|
"loss": 0.9119, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 84.98, |
|
"learning_rate": 8.134920634920636e-06, |
|
"loss": 0.8623, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.4777777777777778, |
|
"eval_loss": 1.7420405149459839, |
|
"eval_runtime": 7.1131, |
|
"eval_samples_per_second": 253.054, |
|
"eval_steps_per_second": 8.013, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 85.16, |
|
"learning_rate": 8.035714285714286e-06, |
|
"loss": 0.9058, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 85.33, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.9022, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 85.51, |
|
"learning_rate": 7.837301587301588e-06, |
|
"loss": 0.8344, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 85.69, |
|
"learning_rate": 7.738095238095238e-06, |
|
"loss": 0.8355, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 85.87, |
|
"learning_rate": 7.63888888888889e-06, |
|
"loss": 0.8643, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"eval_accuracy": 0.46166666666666667, |
|
"eval_loss": 1.7636218070983887, |
|
"eval_runtime": 6.5419, |
|
"eval_samples_per_second": 275.149, |
|
"eval_steps_per_second": 8.713, |
|
"step": 4837 |
|
}, |
|
{ |
|
"epoch": 86.04, |
|
"learning_rate": 7.5396825396825394e-06, |
|
"loss": 0.8704, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 86.22, |
|
"learning_rate": 7.4404761904761905e-06, |
|
"loss": 0.8238, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"learning_rate": 7.3412698412698415e-06, |
|
"loss": 0.8606, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 86.58, |
|
"learning_rate": 7.242063492063493e-06, |
|
"loss": 0.8792, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 86.76, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.8643, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 86.93, |
|
"learning_rate": 7.043650793650794e-06, |
|
"loss": 0.8914, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"eval_accuracy": 0.465, |
|
"eval_loss": 1.7551671266555786, |
|
"eval_runtime": 6.4893, |
|
"eval_samples_per_second": 277.379, |
|
"eval_steps_per_second": 8.784, |
|
"step": 4893 |
|
}, |
|
{ |
|
"epoch": 87.11, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.8279, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 87.29, |
|
"learning_rate": 6.845238095238096e-06, |
|
"loss": 0.8537, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 87.47, |
|
"learning_rate": 6.746031746031746e-06, |
|
"loss": 0.8736, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 87.64, |
|
"learning_rate": 6.646825396825397e-06, |
|
"loss": 0.8449, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 87.82, |
|
"learning_rate": 6.547619047619048e-06, |
|
"loss": 0.8282, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 6.448412698412699e-06, |
|
"loss": 0.837, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.46444444444444444, |
|
"eval_loss": 1.755152702331543, |
|
"eval_runtime": 6.8002, |
|
"eval_samples_per_second": 264.7, |
|
"eval_steps_per_second": 8.382, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 88.18, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.8562, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 88.36, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.8646, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 88.53, |
|
"learning_rate": 6.1507936507936505e-06, |
|
"loss": 0.8499, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 88.71, |
|
"learning_rate": 6.0515873015873015e-06, |
|
"loss": 0.8845, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.8217, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.4638888888888889, |
|
"eval_loss": 1.7532140016555786, |
|
"eval_runtime": 7.0476, |
|
"eval_samples_per_second": 255.408, |
|
"eval_steps_per_second": 8.088, |
|
"step": 5006 |
|
}, |
|
{ |
|
"epoch": 89.07, |
|
"learning_rate": 5.8531746031746036e-06, |
|
"loss": 0.8373, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 89.24, |
|
"learning_rate": 5.753968253968254e-06, |
|
"loss": 0.7907, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 89.42, |
|
"learning_rate": 5.654761904761905e-06, |
|
"loss": 0.8347, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 89.6, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.867, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 89.78, |
|
"learning_rate": 5.456349206349207e-06, |
|
"loss": 0.8816, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 89.96, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.8601, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"eval_accuracy": 0.4683333333333333, |
|
"eval_loss": 1.7447186708450317, |
|
"eval_runtime": 6.3932, |
|
"eval_samples_per_second": 281.551, |
|
"eval_steps_per_second": 8.916, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 90.13, |
|
"learning_rate": 5.257936507936508e-06, |
|
"loss": 0.8323, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 90.31, |
|
"learning_rate": 5.158730158730159e-06, |
|
"loss": 0.8112, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 90.49, |
|
"learning_rate": 5.05952380952381e-06, |
|
"loss": 0.8216, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 90.67, |
|
"learning_rate": 4.96031746031746e-06, |
|
"loss": 0.8778, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 90.84, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.8293, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"eval_accuracy": 0.46111111111111114, |
|
"eval_loss": 1.7622219324111938, |
|
"eval_runtime": 6.9088, |
|
"eval_samples_per_second": 260.536, |
|
"eval_steps_per_second": 8.25, |
|
"step": 5118 |
|
}, |
|
{ |
|
"epoch": 91.02, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 0.8845, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 91.2, |
|
"learning_rate": 4.662698412698413e-06, |
|
"loss": 0.8444, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 91.38, |
|
"learning_rate": 4.563492063492064e-06, |
|
"loss": 0.8317, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 91.56, |
|
"learning_rate": 4.464285714285715e-06, |
|
"loss": 0.8954, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 91.73, |
|
"learning_rate": 4.365079365079365e-06, |
|
"loss": 0.8456, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 91.91, |
|
"learning_rate": 4.265873015873016e-06, |
|
"loss": 0.8301, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.4633333333333333, |
|
"eval_loss": 1.7615541219711304, |
|
"eval_runtime": 6.8433, |
|
"eval_samples_per_second": 263.03, |
|
"eval_steps_per_second": 8.329, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 92.09, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.8093, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 92.27, |
|
"learning_rate": 4.067460317460318e-06, |
|
"loss": 0.777, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 92.44, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.8227, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 92.62, |
|
"learning_rate": 3.869047619047619e-06, |
|
"loss": 0.8856, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 3.7698412698412697e-06, |
|
"loss": 0.8843, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 92.98, |
|
"learning_rate": 3.6706349206349208e-06, |
|
"loss": 0.7752, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.4722222222222222, |
|
"eval_loss": 1.758492112159729, |
|
"eval_runtime": 6.7386, |
|
"eval_samples_per_second": 267.116, |
|
"eval_steps_per_second": 8.459, |
|
"step": 5231 |
|
}, |
|
{ |
|
"epoch": 93.16, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.8809, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.8723, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 93.51, |
|
"learning_rate": 3.373015873015873e-06, |
|
"loss": 0.8171, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 93.69, |
|
"learning_rate": 3.273809523809524e-06, |
|
"loss": 0.8263, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 93.87, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.8533, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"eval_accuracy": 0.46166666666666667, |
|
"eval_loss": 1.7842094898223877, |
|
"eval_runtime": 6.567, |
|
"eval_samples_per_second": 274.097, |
|
"eval_steps_per_second": 8.68, |
|
"step": 5287 |
|
}, |
|
{ |
|
"epoch": 94.04, |
|
"learning_rate": 3.0753968253968252e-06, |
|
"loss": 0.8544, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 94.22, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"loss": 0.8108, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 94.4, |
|
"learning_rate": 2.876984126984127e-06, |
|
"loss": 0.8582, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 94.58, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.796, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 94.76, |
|
"learning_rate": 2.6785714285714285e-06, |
|
"loss": 0.8624, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 94.93, |
|
"learning_rate": 2.5793650793650795e-06, |
|
"loss": 0.8156, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"eval_accuracy": 0.4622222222222222, |
|
"eval_loss": 1.7836720943450928, |
|
"eval_runtime": 7.173, |
|
"eval_samples_per_second": 250.943, |
|
"eval_steps_per_second": 7.947, |
|
"step": 5343 |
|
}, |
|
{ |
|
"epoch": 95.11, |
|
"learning_rate": 2.48015873015873e-06, |
|
"loss": 0.8156, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 95.29, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 0.8371, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 95.47, |
|
"learning_rate": 2.281746031746032e-06, |
|
"loss": 0.8235, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 95.64, |
|
"learning_rate": 2.1825396825396824e-06, |
|
"loss": 0.8466, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 95.82, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.8283, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.8094, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.4583333333333333, |
|
"eval_loss": 1.7896106243133545, |
|
"eval_runtime": 6.5048, |
|
"eval_samples_per_second": 276.719, |
|
"eval_steps_per_second": 8.763, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 96.18, |
|
"learning_rate": 1.8849206349206349e-06, |
|
"loss": 0.7934, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 96.36, |
|
"learning_rate": 1.7857142857142857e-06, |
|
"loss": 0.8529, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 96.53, |
|
"learning_rate": 1.6865079365079365e-06, |
|
"loss": 0.8691, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 96.71, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.8508, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 96.89, |
|
"learning_rate": 1.4880952380952381e-06, |
|
"loss": 0.839, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.465, |
|
"eval_loss": 1.7834640741348267, |
|
"eval_runtime": 6.5614, |
|
"eval_samples_per_second": 274.333, |
|
"eval_steps_per_second": 8.687, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 97.07, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.8844, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 97.24, |
|
"learning_rate": 1.2896825396825398e-06, |
|
"loss": 0.8183, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 97.42, |
|
"learning_rate": 1.1904761904761904e-06, |
|
"loss": 0.8597, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 97.6, |
|
"learning_rate": 1.0912698412698412e-06, |
|
"loss": 0.8369, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 97.78, |
|
"learning_rate": 9.92063492063492e-07, |
|
"loss": 0.8169, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 97.96, |
|
"learning_rate": 8.928571428571428e-07, |
|
"loss": 0.839, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 1.7882862091064453, |
|
"eval_runtime": 6.3877, |
|
"eval_samples_per_second": 281.793, |
|
"eval_steps_per_second": 8.923, |
|
"step": 5512 |
|
}, |
|
{ |
|
"epoch": 98.13, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.8395, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 98.31, |
|
"learning_rate": 6.944444444444445e-07, |
|
"loss": 0.8356, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 98.49, |
|
"learning_rate": 5.952380952380952e-07, |
|
"loss": 0.8396, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 98.67, |
|
"learning_rate": 4.96031746031746e-07, |
|
"loss": 0.8194, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 98.84, |
|
"learning_rate": 3.9682539682539683e-07, |
|
"loss": 0.7763, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"eval_accuracy": 0.45944444444444443, |
|
"eval_loss": 1.7838345766067505, |
|
"eval_runtime": 6.912, |
|
"eval_samples_per_second": 260.415, |
|
"eval_steps_per_second": 8.246, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 99.02, |
|
"learning_rate": 2.976190476190476e-07, |
|
"loss": 0.809, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"learning_rate": 1.9841269841269841e-07, |
|
"loss": 0.8239, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 99.38, |
|
"learning_rate": 9.920634920634921e-08, |
|
"loss": 0.7982, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 99.56, |
|
"learning_rate": 0.0, |
|
"loss": 0.8186, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 99.56, |
|
"eval_accuracy": 0.46055555555555555, |
|
"eval_loss": 1.7836859226226807, |
|
"eval_runtime": 6.4969, |
|
"eval_samples_per_second": 277.055, |
|
"eval_steps_per_second": 8.773, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 99.56, |
|
"step": 5600, |
|
"total_flos": 1.782025780985856e+19, |
|
"train_loss": 1.117841152037893, |
|
"train_runtime": 7254.7208, |
|
"train_samples_per_second": 99.246, |
|
"train_steps_per_second": 0.772 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.782025780985856e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|