|
{ |
|
"best_metric": 0.974025974025974, |
|
"best_model_checkpoint": "resnet-50-resnet50_fashion/checkpoint-2077", |
|
"epoch": 49.66996699669967, |
|
"global_step": 4300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.6908, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.6904, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.6864, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.6821, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.991778247592201e-05, |
|
"loss": 0.6771, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.9800328870096314e-05, |
|
"loss": 0.6719, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.968287526427062e-05, |
|
"loss": 0.6696, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.956542165844491e-05, |
|
"loss": 0.6532, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.634508348794063, |
|
"eval_loss": 0.6780841946601868, |
|
"eval_runtime": 8.8727, |
|
"eval_samples_per_second": 60.748, |
|
"eval_steps_per_second": 7.664, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.944796805261922e-05, |
|
"loss": 0.6442, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.933051444679352e-05, |
|
"loss": 0.6432, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.9213060840967814e-05, |
|
"loss": 0.64, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.9095607235142123e-05, |
|
"loss": 0.6133, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.897815362931642e-05, |
|
"loss": 0.5955, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.886070002349072e-05, |
|
"loss": 0.5998, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.8743246417665025e-05, |
|
"loss": 0.5724, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.862579281183933e-05, |
|
"loss": 0.573, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.850833920601362e-05, |
|
"loss": 0.5407, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8589981447124304, |
|
"eval_loss": 0.5222358107566833, |
|
"eval_runtime": 8.0811, |
|
"eval_samples_per_second": 66.699, |
|
"eval_steps_per_second": 8.415, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.839088560018793e-05, |
|
"loss": 0.5141, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.827343199436223e-05, |
|
"loss": 0.5092, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.815597838853653e-05, |
|
"loss": 0.5005, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.8038524782710834e-05, |
|
"loss": 0.4718, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.7921071176885137e-05, |
|
"loss": 0.4495, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.780361757105943e-05, |
|
"loss": 0.4403, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.7686163965233735e-05, |
|
"loss": 0.438, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 4.756871035940804e-05, |
|
"loss": 0.4086, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8923933209647495, |
|
"eval_loss": 0.3594764173030853, |
|
"eval_runtime": 8.0206, |
|
"eval_samples_per_second": 67.202, |
|
"eval_steps_per_second": 8.478, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7451256753582334e-05, |
|
"loss": 0.4199, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.7333803147756636e-05, |
|
"loss": 0.3936, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.721634954193094e-05, |
|
"loss": 0.3951, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.709889593610524e-05, |
|
"loss": 0.365, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.698144233027954e-05, |
|
"loss": 0.4232, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.686398872445385e-05, |
|
"loss": 0.3703, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.674653511862814e-05, |
|
"loss": 0.3714, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.6629081512802445e-05, |
|
"loss": 0.3859, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.3449, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9183673469387755, |
|
"eval_loss": 0.2615828514099121, |
|
"eval_runtime": 7.9757, |
|
"eval_samples_per_second": 67.58, |
|
"eval_steps_per_second": 8.526, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.639417430115105e-05, |
|
"loss": 0.356, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.627672069532535e-05, |
|
"loss": 0.3595, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.615926708949965e-05, |
|
"loss": 0.3519, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 4.604181348367395e-05, |
|
"loss": 0.3428, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.592435987784825e-05, |
|
"loss": 0.332, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.580690627202255e-05, |
|
"loss": 0.3621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 4.568945266619685e-05, |
|
"loss": 0.3565, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 4.5571999060371156e-05, |
|
"loss": 0.3075, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.3518, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9443413729128015, |
|
"eval_loss": 0.22880208492279053, |
|
"eval_runtime": 7.8304, |
|
"eval_samples_per_second": 68.834, |
|
"eval_steps_per_second": 8.684, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 4.533709184871976e-05, |
|
"loss": 0.3537, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.521963824289406e-05, |
|
"loss": 0.2907, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.510218463706836e-05, |
|
"loss": 0.3318, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.498473103124266e-05, |
|
"loss": 0.3502, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 4.4867277425416965e-05, |
|
"loss": 0.3319, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 4.474982381959126e-05, |
|
"loss": 0.3241, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 4.463237021376557e-05, |
|
"loss": 0.2762, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 4.4514916607939866e-05, |
|
"loss": 0.308, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9424860853432282, |
|
"eval_loss": 0.2758006155490875, |
|
"eval_runtime": 7.9236, |
|
"eval_samples_per_second": 68.024, |
|
"eval_steps_per_second": 8.582, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.439746300211416e-05, |
|
"loss": 0.3129, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.428000939628847e-05, |
|
"loss": 0.2942, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 4.416255579046277e-05, |
|
"loss": 0.3346, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 4.404510218463707e-05, |
|
"loss": 0.3333, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 4.392764857881137e-05, |
|
"loss": 0.2897, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 4.3810194972985676e-05, |
|
"loss": 0.3428, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.369274136715997e-05, |
|
"loss": 0.2877, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 4.3575287761334274e-05, |
|
"loss": 0.2512, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 4.345783415550858e-05, |
|
"loss": 0.3209, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9369202226345084, |
|
"eval_loss": 0.3777436316013336, |
|
"eval_runtime": 7.9087, |
|
"eval_samples_per_second": 68.153, |
|
"eval_steps_per_second": 8.598, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 4.334038054968288e-05, |
|
"loss": 0.2756, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 4.3222926943857175e-05, |
|
"loss": 0.2645, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 4.3105473338031485e-05, |
|
"loss": 0.2753, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.298801973220578e-05, |
|
"loss": 0.3309, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 4.2870566126380077e-05, |
|
"loss": 0.2937, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4.2753112520554386e-05, |
|
"loss": 0.246, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.263565891472868e-05, |
|
"loss": 0.3071, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 4.2518205308902985e-05, |
|
"loss": 0.2831, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 4.240075170307729e-05, |
|
"loss": 0.284, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.9554730983302412, |
|
"eval_loss": 0.1704244613647461, |
|
"eval_runtime": 7.9276, |
|
"eval_samples_per_second": 67.99, |
|
"eval_steps_per_second": 8.578, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.228329809725159e-05, |
|
"loss": 0.3047, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.2165844491425886e-05, |
|
"loss": 0.2757, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 4.2048390885600195e-05, |
|
"loss": 0.2666, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 4.193093727977449e-05, |
|
"loss": 0.2711, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.1813483673948794e-05, |
|
"loss": 0.2843, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 4.1696030068123096e-05, |
|
"loss": 0.2664, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.15785764622974e-05, |
|
"loss": 0.2963, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.1461122856471695e-05, |
|
"loss": 0.2466, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9461966604823747, |
|
"eval_loss": 0.15713872015476227, |
|
"eval_runtime": 7.9343, |
|
"eval_samples_per_second": 67.933, |
|
"eval_steps_per_second": 8.57, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.1343669250646e-05, |
|
"loss": 0.2721, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.12262156448203e-05, |
|
"loss": 0.2391, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 4.1108762038994596e-05, |
|
"loss": 0.2502, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 4.09913084331689e-05, |
|
"loss": 0.2653, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.08738548273432e-05, |
|
"loss": 0.2557, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 4.0756401221517504e-05, |
|
"loss": 0.2148, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 4.06389476156918e-05, |
|
"loss": 0.2495, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 4.052149400986611e-05, |
|
"loss": 0.2678, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.3123, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9406307977736549, |
|
"eval_loss": 0.6491873860359192, |
|
"eval_runtime": 8.122, |
|
"eval_samples_per_second": 66.363, |
|
"eval_steps_per_second": 8.372, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.028658679821471e-05, |
|
"loss": 0.256, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 4.016913319238901e-05, |
|
"loss": 0.2439, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 4.005167958656331e-05, |
|
"loss": 0.2592, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 3.993422598073761e-05, |
|
"loss": 0.2539, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 3.981677237491191e-05, |
|
"loss": 0.2771, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 3.9699318769086215e-05, |
|
"loss": 0.3149, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 3.958186516326051e-05, |
|
"loss": 0.2281, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 3.946441155743481e-05, |
|
"loss": 0.2264, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 3.9346957951609116e-05, |
|
"loss": 0.2827, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9406307977736549, |
|
"eval_loss": 0.49678388237953186, |
|
"eval_runtime": 8.0733, |
|
"eval_samples_per_second": 66.763, |
|
"eval_steps_per_second": 8.423, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 3.922950434578342e-05, |
|
"loss": 0.2954, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 3.9112050739957714e-05, |
|
"loss": 0.2484, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 3.8994597134132024e-05, |
|
"loss": 0.2382, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 3.887714352830632e-05, |
|
"loss": 0.2572, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 3.875968992248062e-05, |
|
"loss": 0.243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 3.8642236316654925e-05, |
|
"loss": 0.2569, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 3.852478271082923e-05, |
|
"loss": 0.2302, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 3.8407329105003524e-05, |
|
"loss": 0.2736, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.9591836734693877, |
|
"eval_loss": 0.13702794909477234, |
|
"eval_runtime": 8.0983, |
|
"eval_samples_per_second": 66.557, |
|
"eval_steps_per_second": 8.397, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.8289875499177826e-05, |
|
"loss": 0.2202, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.817242189335213e-05, |
|
"loss": 0.2247, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 3.8054968287526425e-05, |
|
"loss": 0.2449, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 3.7937514681700734e-05, |
|
"loss": 0.249, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 3.782006107587503e-05, |
|
"loss": 0.2809, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 3.770260747004933e-05, |
|
"loss": 0.2439, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 3.7585153864223635e-05, |
|
"loss": 0.2326, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 3.746770025839794e-05, |
|
"loss": 0.2725, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 3.7350246652572234e-05, |
|
"loss": 0.2476, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9499072356215214, |
|
"eval_loss": 0.16156192123889923, |
|
"eval_runtime": 8.2723, |
|
"eval_samples_per_second": 65.157, |
|
"eval_steps_per_second": 8.22, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 3.723279304674654e-05, |
|
"loss": 0.2495, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 3.711533944092084e-05, |
|
"loss": 0.2815, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 3.699788583509514e-05, |
|
"loss": 0.27, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 3.688043222926944e-05, |
|
"loss": 0.2174, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.676297862344374e-05, |
|
"loss": 0.2503, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 3.664552501761804e-05, |
|
"loss": 0.2209, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 3.652807141179234e-05, |
|
"loss": 0.1989, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 3.641061780596665e-05, |
|
"loss": 0.2473, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 3.6293164200140944e-05, |
|
"loss": 0.195, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.961038961038961, |
|
"eval_loss": 0.1361682415008545, |
|
"eval_runtime": 8.4028, |
|
"eval_samples_per_second": 64.145, |
|
"eval_steps_per_second": 8.093, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 3.617571059431525e-05, |
|
"loss": 0.2768, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 3.605825698848955e-05, |
|
"loss": 0.2251, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 3.594080338266385e-05, |
|
"loss": 0.1665, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 3.582334977683815e-05, |
|
"loss": 0.2384, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 3.570589617101245e-05, |
|
"loss": 0.2133, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 3.5588442565186754e-05, |
|
"loss": 0.2234, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 3.5470988959361056e-05, |
|
"loss": 0.2373, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.2536, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9536178107606679, |
|
"eval_loss": 0.12982788681983948, |
|
"eval_runtime": 8.2944, |
|
"eval_samples_per_second": 64.984, |
|
"eval_steps_per_second": 8.198, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 3.5236081747709655e-05, |
|
"loss": 0.2483, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 3.511862814188396e-05, |
|
"loss": 0.2263, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 3.500117453605825e-05, |
|
"loss": 0.2542, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.2009, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.476626732440686e-05, |
|
"loss": 0.2383, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 3.464881371858116e-05, |
|
"loss": 0.2116, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 3.4531360112755464e-05, |
|
"loss": 0.2653, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 3.441390650692977e-05, |
|
"loss": 0.2447, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 3.429645290110406e-05, |
|
"loss": 0.2022, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9517625231910947, |
|
"eval_loss": 0.7470229268074036, |
|
"eval_runtime": 8.0594, |
|
"eval_samples_per_second": 66.878, |
|
"eval_steps_per_second": 8.437, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 3.417899929527837e-05, |
|
"loss": 0.2249, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 3.406154568945267e-05, |
|
"loss": 0.2498, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 3.394409208362697e-05, |
|
"loss": 0.215, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 3.382663847780127e-05, |
|
"loss": 0.2512, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 3.370918487197557e-05, |
|
"loss": 0.2096, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 3.359173126614987e-05, |
|
"loss": 0.2097, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 3.3474277660324174e-05, |
|
"loss": 0.2158, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 3.335682405449848e-05, |
|
"loss": 0.2303, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 3.323937044867277e-05, |
|
"loss": 0.2406, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.12411854416131973, |
|
"eval_runtime": 7.9838, |
|
"eval_samples_per_second": 67.512, |
|
"eval_steps_per_second": 8.517, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 3.3121916842847076e-05, |
|
"loss": 0.2697, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 3.300446323702138e-05, |
|
"loss": 0.2567, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 3.288700963119568e-05, |
|
"loss": 0.2745, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 3.276955602536998e-05, |
|
"loss": 0.2269, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 3.2652102419544286e-05, |
|
"loss": 0.1893, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 3.253464881371858e-05, |
|
"loss": 0.1827, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 3.2417195207892885e-05, |
|
"loss": 0.2579, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 3.229974160206719e-05, |
|
"loss": 0.2019, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9536178107606679, |
|
"eval_loss": 0.12778125703334808, |
|
"eval_runtime": 8.0655, |
|
"eval_samples_per_second": 66.828, |
|
"eval_steps_per_second": 8.431, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.2182287996241483e-05, |
|
"loss": 0.184, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 3.2064834390415786e-05, |
|
"loss": 0.2261, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 3.194738078459009e-05, |
|
"loss": 0.2155, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 3.182992717876439e-05, |
|
"loss": 0.1857, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 3.171247357293869e-05, |
|
"loss": 0.2009, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 3.159501996711299e-05, |
|
"loss": 0.213, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 3.147756636128729e-05, |
|
"loss": 0.2299, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 3.1360112755461595e-05, |
|
"loss": 0.1956, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 3.124265914963589e-05, |
|
"loss": 0.2073, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9684601113172542, |
|
"eval_loss": 0.11341895163059235, |
|
"eval_runtime": 8.1079, |
|
"eval_samples_per_second": 66.478, |
|
"eval_steps_per_second": 8.387, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 3.11252055438102e-05, |
|
"loss": 0.1869, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 3.1007751937984497e-05, |
|
"loss": 0.1941, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 3.08902983321588e-05, |
|
"loss": 0.2218, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 3.07728447263331e-05, |
|
"loss": 0.196, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 3.06553911205074e-05, |
|
"loss": 0.2339, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 3.05379375146817e-05, |
|
"loss": 0.2045, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"learning_rate": 3.0420483908856e-05, |
|
"loss": 0.2057, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.2136, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 3.0185576697204605e-05, |
|
"loss": 0.1873, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9628942486085343, |
|
"eval_loss": 0.6738272309303284, |
|
"eval_runtime": 8.4568, |
|
"eval_samples_per_second": 63.736, |
|
"eval_steps_per_second": 8.041, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 3.0068123091378908e-05, |
|
"loss": 0.2643, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 2.9950669485553207e-05, |
|
"loss": 0.1854, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 2.983321587972751e-05, |
|
"loss": 0.2382, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 2.971576227390181e-05, |
|
"loss": 0.18, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 2.9598308668076115e-05, |
|
"loss": 0.1763, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 20.68, |
|
"learning_rate": 2.948085506225041e-05, |
|
"loss": 0.2399, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 2.9363401456424717e-05, |
|
"loss": 0.2275, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 2.9245947850599016e-05, |
|
"loss": 0.2446, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9684601113172542, |
|
"eval_loss": 0.1033068299293518, |
|
"eval_runtime": 8.3041, |
|
"eval_samples_per_second": 64.908, |
|
"eval_steps_per_second": 8.189, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 2.9128494244773312e-05, |
|
"loss": 0.2423, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 2.9011040638947618e-05, |
|
"loss": 0.2239, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 2.8893587033121917e-05, |
|
"loss": 0.2212, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 2.877613342729622e-05, |
|
"loss": 0.1594, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 2.865867982147052e-05, |
|
"loss": 0.1878, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 2.8541226215644822e-05, |
|
"loss": 0.1965, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 21.72, |
|
"learning_rate": 2.842377260981912e-05, |
|
"loss": 0.2186, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"learning_rate": 2.8306319003993427e-05, |
|
"loss": 0.1904, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"learning_rate": 2.8188865398167723e-05, |
|
"loss": 0.1999, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.11812406778335571, |
|
"eval_runtime": 8.2822, |
|
"eval_samples_per_second": 65.079, |
|
"eval_steps_per_second": 8.21, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"learning_rate": 2.807141179234203e-05, |
|
"loss": 0.2147, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"learning_rate": 2.795395818651633e-05, |
|
"loss": 0.2158, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 2.783650458069063e-05, |
|
"loss": 0.1937, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 2.771905097486493e-05, |
|
"loss": 0.2208, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 2.760159736903923e-05, |
|
"loss": 0.155, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 2.7484143763213532e-05, |
|
"loss": 0.1793, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 2.736669015738783e-05, |
|
"loss": 0.1794, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 2.7249236551562134e-05, |
|
"loss": 0.22, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 2.7131782945736434e-05, |
|
"loss": 0.1716, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.961038961038961, |
|
"eval_loss": 0.10991629213094711, |
|
"eval_runtime": 8.221, |
|
"eval_samples_per_second": 65.564, |
|
"eval_steps_per_second": 8.272, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"learning_rate": 2.7014329339910736e-05, |
|
"loss": 0.1692, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"learning_rate": 2.6896875734085036e-05, |
|
"loss": 0.1931, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 2.677942212825934e-05, |
|
"loss": 0.1663, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 2.6661968522433637e-05, |
|
"loss": 0.2102, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 2.6544514916607944e-05, |
|
"loss": 0.1822, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 2.6427061310782243e-05, |
|
"loss": 0.2216, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 2.6309607704956545e-05, |
|
"loss": 0.1731, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 2.6192154099130845e-05, |
|
"loss": 0.175, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_accuracy": 0.974025974025974, |
|
"eval_loss": 0.10644800215959549, |
|
"eval_runtime": 8.1046, |
|
"eval_samples_per_second": 66.505, |
|
"eval_steps_per_second": 8.39, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 2.6074700493305144e-05, |
|
"loss": 0.1845, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 2.5957246887479447e-05, |
|
"loss": 0.2091, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 2.5839793281653746e-05, |
|
"loss": 0.1783, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 2.572233967582805e-05, |
|
"loss": 0.1822, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"learning_rate": 2.5604886070002348e-05, |
|
"loss": 0.2239, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 2.5487432464176654e-05, |
|
"loss": 0.1639, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 24.72, |
|
"learning_rate": 2.536997885835095e-05, |
|
"loss": 0.1839, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 24.83, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.2114, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 2.5135071646699555e-05, |
|
"loss": 0.1962, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9721706864564007, |
|
"eval_loss": 0.11735469102859497, |
|
"eval_runtime": 8.2081, |
|
"eval_samples_per_second": 65.667, |
|
"eval_steps_per_second": 8.284, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 2.5017618040873858e-05, |
|
"loss": 0.1658, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"learning_rate": 2.4900164435048157e-05, |
|
"loss": 0.1979, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"learning_rate": 2.4782710829222456e-05, |
|
"loss": 0.1707, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"learning_rate": 2.466525722339676e-05, |
|
"loss": 0.1932, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 2.4547803617571062e-05, |
|
"loss": 0.2355, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 2.443035001174536e-05, |
|
"loss": 0.2246, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"learning_rate": 2.4312896405919664e-05, |
|
"loss": 0.2031, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"learning_rate": 2.4195442800093966e-05, |
|
"loss": 0.2062, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"learning_rate": 2.4077989194268266e-05, |
|
"loss": 0.1943, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.9517625231910947, |
|
"eval_loss": 1.0624566078186035, |
|
"eval_runtime": 8.1518, |
|
"eval_samples_per_second": 66.12, |
|
"eval_steps_per_second": 8.342, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 2.3960535588442568e-05, |
|
"loss": 0.2071, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 2.3843081982616868e-05, |
|
"loss": 0.1872, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 26.34, |
|
"learning_rate": 2.3725628376791167e-05, |
|
"loss": 0.1835, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 26.45, |
|
"learning_rate": 2.360817477096547e-05, |
|
"loss": 0.171, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 2.349072116513977e-05, |
|
"loss": 0.2028, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 26.68, |
|
"learning_rate": 2.337326755931407e-05, |
|
"loss": 0.2108, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.2046, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"learning_rate": 2.3138360347662673e-05, |
|
"loss": 0.2044, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9573283858998145, |
|
"eval_loss": 0.8419390916824341, |
|
"eval_runtime": 7.9375, |
|
"eval_samples_per_second": 67.906, |
|
"eval_steps_per_second": 8.567, |
|
"step": 2337 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 2.3020906741836976e-05, |
|
"loss": 0.2017, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"learning_rate": 2.2903453136011275e-05, |
|
"loss": 0.1923, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 27.26, |
|
"learning_rate": 2.2785999530185578e-05, |
|
"loss": 0.171, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"learning_rate": 2.266854592435988e-05, |
|
"loss": 0.1912, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 27.49, |
|
"learning_rate": 2.255109231853418e-05, |
|
"loss": 0.2001, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 2.2433638712708483e-05, |
|
"loss": 0.1668, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 27.72, |
|
"learning_rate": 2.2316185106882785e-05, |
|
"loss": 0.1699, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 2.219873150105708e-05, |
|
"loss": 0.1944, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 2.2081277895231384e-05, |
|
"loss": 0.1835, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.11119159311056137, |
|
"eval_runtime": 7.8965, |
|
"eval_samples_per_second": 68.258, |
|
"eval_steps_per_second": 8.611, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 2.1963824289405686e-05, |
|
"loss": 0.1724, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 28.18, |
|
"learning_rate": 2.1846370683579986e-05, |
|
"loss": 0.2228, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 2.172891707775429e-05, |
|
"loss": 0.1672, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 2.1611463471928588e-05, |
|
"loss": 0.2021, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"learning_rate": 2.149400986610289e-05, |
|
"loss": 0.1893, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 28.65, |
|
"learning_rate": 2.1376556260277193e-05, |
|
"loss": 0.2024, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 2.1259102654451492e-05, |
|
"loss": 0.1508, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 28.88, |
|
"learning_rate": 2.1141649048625795e-05, |
|
"loss": 0.1562, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 2.1024195442800098e-05, |
|
"loss": 0.191, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9684601113172542, |
|
"eval_loss": 0.11420014500617981, |
|
"eval_runtime": 7.9348, |
|
"eval_samples_per_second": 67.929, |
|
"eval_steps_per_second": 8.57, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"learning_rate": 2.0906741836974397e-05, |
|
"loss": 0.1925, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 29.22, |
|
"learning_rate": 2.07892882311487e-05, |
|
"loss": 0.1513, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 29.34, |
|
"learning_rate": 2.0671834625323e-05, |
|
"loss": 0.2406, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 29.46, |
|
"learning_rate": 2.0554381019497298e-05, |
|
"loss": 0.1809, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 2.04369274136716e-05, |
|
"loss": 0.1641, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 2.03194738078459e-05, |
|
"loss": 0.1805, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.1702, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 2.0084566596194505e-05, |
|
"loss": 0.1676, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.10803297162055969, |
|
"eval_runtime": 7.8199, |
|
"eval_samples_per_second": 68.927, |
|
"eval_steps_per_second": 8.696, |
|
"step": 2597 |
|
}, |
|
{ |
|
"epoch": 30.03, |
|
"learning_rate": 1.9967112990368805e-05, |
|
"loss": 0.1554, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"learning_rate": 1.9849659384543107e-05, |
|
"loss": 0.2092, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 30.26, |
|
"learning_rate": 1.9732205778717407e-05, |
|
"loss": 0.16, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 30.38, |
|
"learning_rate": 1.961475217289171e-05, |
|
"loss": 0.1932, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"learning_rate": 1.9497298567066012e-05, |
|
"loss": 0.1742, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 30.61, |
|
"learning_rate": 1.937984496124031e-05, |
|
"loss": 0.1714, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 30.73, |
|
"learning_rate": 1.9262391355414614e-05, |
|
"loss": 0.1668, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 30.84, |
|
"learning_rate": 1.9144937749588913e-05, |
|
"loss": 0.2284, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"learning_rate": 1.9027484143763212e-05, |
|
"loss": 0.1533, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.14941494166851044, |
|
"eval_runtime": 7.9882, |
|
"eval_samples_per_second": 67.474, |
|
"eval_steps_per_second": 8.513, |
|
"step": 2683 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"learning_rate": 1.8910030537937515e-05, |
|
"loss": 0.179, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 31.19, |
|
"learning_rate": 1.8792576932111818e-05, |
|
"loss": 0.1589, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 1.8675123326286117e-05, |
|
"loss": 0.2133, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 31.42, |
|
"learning_rate": 1.855766972046042e-05, |
|
"loss": 0.173, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 31.53, |
|
"learning_rate": 1.844021611463472e-05, |
|
"loss": 0.1998, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"learning_rate": 1.832276250880902e-05, |
|
"loss": 0.2054, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 31.77, |
|
"learning_rate": 1.8205308902983324e-05, |
|
"loss": 0.1739, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 31.88, |
|
"learning_rate": 1.8087855297157624e-05, |
|
"loss": 0.1581, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.7970401691331926e-05, |
|
"loss": 0.1991, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.10002347081899643, |
|
"eval_runtime": 8.066, |
|
"eval_samples_per_second": 66.823, |
|
"eval_steps_per_second": 8.43, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 32.11, |
|
"learning_rate": 1.7852948085506225e-05, |
|
"loss": 0.1573, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 32.23, |
|
"learning_rate": 1.7735494479680528e-05, |
|
"loss": 0.1641, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 1.7618040873854827e-05, |
|
"loss": 0.1656, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 32.46, |
|
"learning_rate": 1.7500587268029127e-05, |
|
"loss": 0.2127, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 32.57, |
|
"learning_rate": 1.738313366220343e-05, |
|
"loss": 0.1756, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 1.7265680056377732e-05, |
|
"loss": 0.1754, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 32.81, |
|
"learning_rate": 1.714822645055203e-05, |
|
"loss": 0.1605, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 32.92, |
|
"learning_rate": 1.7030772844726334e-05, |
|
"loss": 0.1845, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.974025974025974, |
|
"eval_loss": 0.09888846427202225, |
|
"eval_runtime": 7.9989, |
|
"eval_samples_per_second": 67.385, |
|
"eval_steps_per_second": 8.501, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 1.6913319238900637e-05, |
|
"loss": 0.1855, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 33.15, |
|
"learning_rate": 1.6795865633074936e-05, |
|
"loss": 0.211, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"learning_rate": 1.667841202724924e-05, |
|
"loss": 0.2067, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 33.38, |
|
"learning_rate": 1.6560958421423538e-05, |
|
"loss": 0.1738, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 33.5, |
|
"learning_rate": 1.644350481559784e-05, |
|
"loss": 0.1725, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 33.61, |
|
"learning_rate": 1.6326051209772143e-05, |
|
"loss": 0.1686, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"learning_rate": 1.6208597603946442e-05, |
|
"loss": 0.1642, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 33.84, |
|
"learning_rate": 1.6091143998120742e-05, |
|
"loss": 0.1527, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.5973690392295044e-05, |
|
"loss": 0.1605, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9684601113172542, |
|
"eval_loss": 0.09749138355255127, |
|
"eval_runtime": 8.1236, |
|
"eval_samples_per_second": 66.35, |
|
"eval_steps_per_second": 8.371, |
|
"step": 2943 |
|
}, |
|
{ |
|
"epoch": 34.08, |
|
"learning_rate": 1.5856236786469344e-05, |
|
"loss": 0.1699, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 34.19, |
|
"learning_rate": 1.5738783180643646e-05, |
|
"loss": 0.1646, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 34.31, |
|
"learning_rate": 1.5621329574817946e-05, |
|
"loss": 0.1779, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"learning_rate": 1.5503875968992248e-05, |
|
"loss": 0.1908, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 34.54, |
|
"learning_rate": 1.538642236316655e-05, |
|
"loss": 0.182, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 34.65, |
|
"learning_rate": 1.526896875734085e-05, |
|
"loss": 0.2004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.1426, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 1.5034061545689454e-05, |
|
"loss": 0.1614, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 1.4916607939863755e-05, |
|
"loss": 0.1928, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9628942486085343, |
|
"eval_loss": 0.4555383026599884, |
|
"eval_runtime": 7.8835, |
|
"eval_samples_per_second": 68.371, |
|
"eval_steps_per_second": 8.626, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"learning_rate": 1.4799154334038057e-05, |
|
"loss": 0.1884, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 35.23, |
|
"learning_rate": 1.4681700728212358e-05, |
|
"loss": 0.1651, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 35.35, |
|
"learning_rate": 1.4564247122386656e-05, |
|
"loss": 0.1602, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 35.46, |
|
"learning_rate": 1.4446793516560959e-05, |
|
"loss": 0.1688, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"learning_rate": 1.432933991073526e-05, |
|
"loss": 0.1719, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 1.421188630490956e-05, |
|
"loss": 0.1608, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"learning_rate": 1.4094432699083862e-05, |
|
"loss": 0.163, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"learning_rate": 1.3976979093258164e-05, |
|
"loss": 0.1506, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.1059107631444931, |
|
"eval_runtime": 8.0108, |
|
"eval_samples_per_second": 67.284, |
|
"eval_steps_per_second": 8.489, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 1.3859525487432465e-05, |
|
"loss": 0.1802, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"learning_rate": 1.3742071881606766e-05, |
|
"loss": 0.1332, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 1.3624618275781067e-05, |
|
"loss": 0.1298, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"learning_rate": 1.3507164669955368e-05, |
|
"loss": 0.1705, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 1.338971106412967e-05, |
|
"loss": 0.1431, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"learning_rate": 1.3272257458303972e-05, |
|
"loss": 0.1582, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 36.73, |
|
"learning_rate": 1.3154803852478273e-05, |
|
"loss": 0.1544, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 36.85, |
|
"learning_rate": 1.3037350246652572e-05, |
|
"loss": 0.1966, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 1.2919896640826873e-05, |
|
"loss": 0.1912, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.10163893550634384, |
|
"eval_runtime": 8.0326, |
|
"eval_samples_per_second": 67.102, |
|
"eval_steps_per_second": 8.466, |
|
"step": 3203 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"learning_rate": 1.2802443035001174e-05, |
|
"loss": 0.1956, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"learning_rate": 1.2684989429175475e-05, |
|
"loss": 0.1705, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 1.2567535823349778e-05, |
|
"loss": 0.1559, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 37.43, |
|
"learning_rate": 1.2450082217524079e-05, |
|
"loss": 0.1684, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"learning_rate": 1.233262861169838e-05, |
|
"loss": 0.1962, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 37.66, |
|
"learning_rate": 1.221517500587268e-05, |
|
"loss": 0.1528, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 37.77, |
|
"learning_rate": 1.2097721400046983e-05, |
|
"loss": 0.1788, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"learning_rate": 1.1980267794221284e-05, |
|
"loss": 0.1689, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.9666048237476809, |
|
"eval_loss": 0.5420700907707214, |
|
"eval_runtime": 8.2102, |
|
"eval_samples_per_second": 65.65, |
|
"eval_steps_per_second": 8.282, |
|
"step": 3289 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 1.1862814188395583e-05, |
|
"loss": 0.1739, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 38.12, |
|
"learning_rate": 1.1745360582569884e-05, |
|
"loss": 0.1396, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.1871, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"learning_rate": 1.1510453370918488e-05, |
|
"loss": 0.1947, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 38.47, |
|
"learning_rate": 1.1392999765092789e-05, |
|
"loss": 0.1823, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 38.58, |
|
"learning_rate": 1.127554615926709e-05, |
|
"loss": 0.1816, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 38.7, |
|
"learning_rate": 1.1158092553441393e-05, |
|
"loss": 0.2031, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 38.81, |
|
"learning_rate": 1.1040638947615692e-05, |
|
"loss": 0.1764, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 1.0923185341789993e-05, |
|
"loss": 0.1467, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9647495361781077, |
|
"eval_loss": 0.10951773822307587, |
|
"eval_runtime": 8.0526, |
|
"eval_samples_per_second": 66.935, |
|
"eval_steps_per_second": 8.444, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 39.04, |
|
"learning_rate": 1.0805731735964294e-05, |
|
"loss": 0.1615, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 39.16, |
|
"learning_rate": 1.0688278130138596e-05, |
|
"loss": 0.1797, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 39.27, |
|
"learning_rate": 1.0570824524312897e-05, |
|
"loss": 0.1314, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"learning_rate": 1.0453370918487198e-05, |
|
"loss": 0.19, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"learning_rate": 1.03359173126615e-05, |
|
"loss": 0.1955, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.02184637068358e-05, |
|
"loss": 0.1635, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 39.74, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.1544, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 39.85, |
|
"learning_rate": 9.983556495184402e-06, |
|
"loss": 0.1604, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 39.97, |
|
"learning_rate": 9.866102889358703e-06, |
|
"loss": 0.1513, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.3827688992023468, |
|
"eval_runtime": 7.9622, |
|
"eval_samples_per_second": 67.695, |
|
"eval_steps_per_second": 8.54, |
|
"step": 3462 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"learning_rate": 9.748649283533006e-06, |
|
"loss": 0.1578, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 9.631195677707307e-06, |
|
"loss": 0.1633, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 40.31, |
|
"learning_rate": 9.513742071881606e-06, |
|
"loss": 0.1273, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 9.396288466055909e-06, |
|
"loss": 0.1535, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 9.27883486023021e-06, |
|
"loss": 0.2011, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 40.66, |
|
"learning_rate": 9.16138125440451e-06, |
|
"loss": 0.1801, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"learning_rate": 9.043927648578812e-06, |
|
"loss": 0.1341, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 8.926474042753113e-06, |
|
"loss": 0.1768, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.09445924311876297, |
|
"eval_runtime": 8.0713, |
|
"eval_samples_per_second": 66.78, |
|
"eval_steps_per_second": 8.425, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 8.809020436927414e-06, |
|
"loss": 0.1797, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 8.691566831101715e-06, |
|
"loss": 0.1782, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 41.24, |
|
"learning_rate": 8.574113225276016e-06, |
|
"loss": 0.164, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 41.35, |
|
"learning_rate": 8.456659619450318e-06, |
|
"loss": 0.156, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"learning_rate": 8.33920601362462e-06, |
|
"loss": 0.1603, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 41.58, |
|
"learning_rate": 8.22175240779892e-06, |
|
"loss": 0.1663, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 41.7, |
|
"learning_rate": 8.104298801973221e-06, |
|
"loss": 0.1433, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 41.82, |
|
"learning_rate": 7.986845196147522e-06, |
|
"loss": 0.1769, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 41.93, |
|
"learning_rate": 7.869391590321823e-06, |
|
"loss": 0.1633, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9591836734693877, |
|
"eval_loss": 0.22497127950191498, |
|
"eval_runtime": 8.2319, |
|
"eval_samples_per_second": 65.477, |
|
"eval_steps_per_second": 8.261, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 42.05, |
|
"learning_rate": 7.751937984496124e-06, |
|
"loss": 0.1682, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 42.16, |
|
"learning_rate": 7.634484378670425e-06, |
|
"loss": 0.1527, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 42.28, |
|
"learning_rate": 7.517030772844727e-06, |
|
"loss": 0.1615, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 42.39, |
|
"learning_rate": 7.399577167019029e-06, |
|
"loss": 0.1867, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 42.51, |
|
"learning_rate": 7.282123561193328e-06, |
|
"loss": 0.1696, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"learning_rate": 7.16466995536763e-06, |
|
"loss": 0.1257, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"learning_rate": 7.047216349541931e-06, |
|
"loss": 0.1549, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 6.929762743716233e-06, |
|
"loss": 0.1604, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 42.97, |
|
"learning_rate": 6.812309137890534e-06, |
|
"loss": 0.1945, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.9684601113172542, |
|
"eval_loss": 0.2014760673046112, |
|
"eval_runtime": 8.2434, |
|
"eval_samples_per_second": 65.386, |
|
"eval_steps_per_second": 8.249, |
|
"step": 3722 |
|
}, |
|
{ |
|
"epoch": 43.09, |
|
"learning_rate": 6.694855532064835e-06, |
|
"loss": 0.1757, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 6.577401926239136e-06, |
|
"loss": 0.1401, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 43.32, |
|
"learning_rate": 6.4599483204134365e-06, |
|
"loss": 0.1655, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"learning_rate": 6.3424947145877375e-06, |
|
"loss": 0.178, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 43.55, |
|
"learning_rate": 6.225041108762039e-06, |
|
"loss": 0.1277, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 43.66, |
|
"learning_rate": 6.10758750293634e-06, |
|
"loss": 0.1861, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 43.78, |
|
"learning_rate": 5.990133897110642e-06, |
|
"loss": 0.1634, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 43.89, |
|
"learning_rate": 5.872680291284942e-06, |
|
"loss": 0.1896, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9666048237476809, |
|
"eval_loss": 0.11137495934963226, |
|
"eval_runtime": 8.1155, |
|
"eval_samples_per_second": 66.416, |
|
"eval_steps_per_second": 8.379, |
|
"step": 3809 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 5.755226685459244e-06, |
|
"loss": 0.1602, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"learning_rate": 5.637773079633545e-06, |
|
"loss": 0.1184, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 44.24, |
|
"learning_rate": 5.520319473807846e-06, |
|
"loss": 0.144, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 44.36, |
|
"learning_rate": 5.402865867982147e-06, |
|
"loss": 0.1956, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"learning_rate": 5.285412262156449e-06, |
|
"loss": 0.1654, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 44.59, |
|
"learning_rate": 5.16795865633075e-06, |
|
"loss": 0.1889, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 44.7, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.108, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 44.82, |
|
"learning_rate": 4.933051444679352e-06, |
|
"loss": 0.1702, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 4.8155978388536535e-06, |
|
"loss": 0.1629, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.9666048237476809, |
|
"eval_loss": 0.0954408049583435, |
|
"eval_runtime": 8.1596, |
|
"eval_samples_per_second": 66.057, |
|
"eval_steps_per_second": 8.334, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 45.05, |
|
"learning_rate": 4.6981442330279544e-06, |
|
"loss": 0.1729, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 45.17, |
|
"learning_rate": 4.580690627202255e-06, |
|
"loss": 0.1781, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 4.463237021376556e-06, |
|
"loss": 0.1565, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 4.345783415550857e-06, |
|
"loss": 0.1445, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 4.228329809725159e-06, |
|
"loss": 0.1315, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 45.63, |
|
"learning_rate": 4.11087620389946e-06, |
|
"loss": 0.1578, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 45.74, |
|
"learning_rate": 3.993422598073761e-06, |
|
"loss": 0.1875, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 45.86, |
|
"learning_rate": 3.875968992248062e-06, |
|
"loss": 0.2048, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"learning_rate": 3.7585153864223635e-06, |
|
"loss": 0.1825, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.974025974025974, |
|
"eval_loss": 0.09737637639045715, |
|
"eval_runtime": 8.2567, |
|
"eval_samples_per_second": 65.28, |
|
"eval_steps_per_second": 8.236, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"learning_rate": 3.641061780596664e-06, |
|
"loss": 0.1715, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 46.2, |
|
"learning_rate": 3.5236081747709654e-06, |
|
"loss": 0.1679, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 46.32, |
|
"learning_rate": 3.406154568945267e-06, |
|
"loss": 0.1809, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 46.44, |
|
"learning_rate": 3.288700963119568e-06, |
|
"loss": 0.1582, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 3.1712473572938687e-06, |
|
"loss": 0.1497, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 3.05379375146817e-06, |
|
"loss": 0.1748, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 46.78, |
|
"learning_rate": 2.936340145642471e-06, |
|
"loss": 0.1893, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"learning_rate": 2.8188865398167725e-06, |
|
"loss": 0.1664, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.09385673701763153, |
|
"eval_runtime": 8.1532, |
|
"eval_samples_per_second": 66.109, |
|
"eval_steps_per_second": 8.34, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 2.7014329339910735e-06, |
|
"loss": 0.1701, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 2.583979328165375e-06, |
|
"loss": 0.1427, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 47.24, |
|
"learning_rate": 2.466525722339676e-06, |
|
"loss": 0.1303, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 47.36, |
|
"learning_rate": 2.3490721165139772e-06, |
|
"loss": 0.1459, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 47.48, |
|
"learning_rate": 2.231618510688278e-06, |
|
"loss": 0.1548, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 47.59, |
|
"learning_rate": 2.1141649048625796e-06, |
|
"loss": 0.1562, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 47.71, |
|
"learning_rate": 1.9967112990368805e-06, |
|
"loss": 0.1668, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 47.82, |
|
"learning_rate": 1.8792576932111817e-06, |
|
"loss": 0.1512, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 1.7618040873854827e-06, |
|
"loss": 0.1535, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.9721706864564007, |
|
"eval_loss": 0.09351829439401627, |
|
"eval_runtime": 8.1415, |
|
"eval_samples_per_second": 66.204, |
|
"eval_steps_per_second": 8.352, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 48.05, |
|
"learning_rate": 1.644350481559784e-06, |
|
"loss": 0.1624, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"learning_rate": 1.526896875734085e-06, |
|
"loss": 0.1332, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 48.28, |
|
"learning_rate": 1.4094432699083862e-06, |
|
"loss": 0.1957, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 1.2919896640826874e-06, |
|
"loss": 0.139, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 48.51, |
|
"learning_rate": 1.1745360582569886e-06, |
|
"loss": 0.1589, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 1.0570824524312898e-06, |
|
"loss": 0.1696, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 48.75, |
|
"learning_rate": 9.396288466055909e-07, |
|
"loss": 0.1712, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 48.86, |
|
"learning_rate": 8.22175240779892e-07, |
|
"loss": 0.1562, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 48.98, |
|
"learning_rate": 7.047216349541931e-07, |
|
"loss": 0.1801, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.09990726411342621, |
|
"eval_runtime": 8.1674, |
|
"eval_samples_per_second": 65.994, |
|
"eval_steps_per_second": 8.326, |
|
"step": 4242 |
|
}, |
|
{ |
|
"epoch": 49.09, |
|
"learning_rate": 5.872680291284943e-07, |
|
"loss": 0.173, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 49.21, |
|
"learning_rate": 4.6981442330279543e-07, |
|
"loss": 0.1455, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"learning_rate": 3.5236081747709656e-07, |
|
"loss": 0.1765, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 2.3490721165139772e-07, |
|
"loss": 0.1866, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 49.55, |
|
"learning_rate": 1.1745360582569886e-07, |
|
"loss": 0.1522, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"learning_rate": 0.0, |
|
"loss": 0.1502, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"eval_accuracy": 0.9703153988868275, |
|
"eval_loss": 0.19585400819778442, |
|
"eval_runtime": 8.3207, |
|
"eval_samples_per_second": 64.779, |
|
"eval_steps_per_second": 8.172, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"step": 4300, |
|
"total_flos": 5.11036354111998e+18, |
|
"train_loss": 0.23105980243793753, |
|
"train_runtime": 6101.6743, |
|
"train_samples_per_second": 39.702, |
|
"train_steps_per_second": 0.705 |
|
} |
|
], |
|
"max_steps": 4300, |
|
"num_train_epochs": 50, |
|
"total_flos": 5.11036354111998e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|