|
{ |
|
"best_metric": 0.9837157660991858, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucf101\\checkpoint-8320", |
|
"epoch": 9.098918269230769, |
|
"eval_steps": 500, |
|
"global_step": 8320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.009615384615385e-07, |
|
"loss": 4.0127, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.201923076923077e-06, |
|
"loss": 4.0116, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.8028846153846153e-06, |
|
"loss": 3.9416, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.403846153846154e-06, |
|
"loss": 3.954, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.0048076923076927e-06, |
|
"loss": 4.0102, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6057692307692307e-06, |
|
"loss": 3.9791, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.20673076923077e-06, |
|
"loss": 3.9077, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 3.9278, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.408653846153847e-06, |
|
"loss": 3.9414, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.0096153846153855e-06, |
|
"loss": 3.992, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.610576923076923e-06, |
|
"loss": 3.9291, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.211538461538461e-06, |
|
"loss": 3.9836, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 3.914, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.41346153846154e-06, |
|
"loss": 3.8372, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.014423076923078e-06, |
|
"loss": 3.8477, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 3.7954, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0216346153846154e-05, |
|
"loss": 3.8203, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0817307692307693e-05, |
|
"loss": 3.8033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1418269230769231e-05, |
|
"loss": 3.7756, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2019230769230771e-05, |
|
"loss": 3.8827, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2620192307692307e-05, |
|
"loss": 3.7937, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3221153846153847e-05, |
|
"loss": 3.7683, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3822115384615386e-05, |
|
"loss": 3.6827, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4423076923076923e-05, |
|
"loss": 3.6884, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5024038461538462e-05, |
|
"loss": 3.6489, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 3.6757, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6225961538461538e-05, |
|
"loss": 3.5549, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.682692307692308e-05, |
|
"loss": 3.509, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7427884615384614e-05, |
|
"loss": 3.4363, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8028846153846156e-05, |
|
"loss": 3.4794, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8629807692307693e-05, |
|
"loss": 3.4726, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 3.3436, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.983173076923077e-05, |
|
"loss": 3.3382, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0432692307692307e-05, |
|
"loss": 3.1034, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.103365384615385e-05, |
|
"loss": 3.151, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1634615384615387e-05, |
|
"loss": 3.1399, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.223557692307692e-05, |
|
"loss": 3.1325, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2836538461538463e-05, |
|
"loss": 3.214, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 2.9566, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4038461538461542e-05, |
|
"loss": 2.8413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.463942307692308e-05, |
|
"loss": 2.769, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5240384615384614e-05, |
|
"loss": 2.8527, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.584134615384616e-05, |
|
"loss": 2.7911, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6442307692307694e-05, |
|
"loss": 2.4166, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.704326923076923e-05, |
|
"loss": 2.7125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7644230769230773e-05, |
|
"loss": 2.4932, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8245192307692307e-05, |
|
"loss": 2.454, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 2.2737, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9447115384615387e-05, |
|
"loss": 2.316, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0048076923076925e-05, |
|
"loss": 2.538, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.064903846153846e-05, |
|
"loss": 2.1644, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.1932, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.185096153846154e-05, |
|
"loss": 2.1082, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2451923076923077e-05, |
|
"loss": 2.2554, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3052884615384615e-05, |
|
"loss": 2.1601, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.365384615384616e-05, |
|
"loss": 2.0342, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.42548076923077e-05, |
|
"loss": 1.9706, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.485576923076923e-05, |
|
"loss": 2.0727, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.545673076923077e-05, |
|
"loss": 1.8431, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.605769230769231e-05, |
|
"loss": 1.9603, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.665865384615384e-05, |
|
"loss": 1.9677, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.725961538461539e-05, |
|
"loss": 1.7238, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7860576923076925e-05, |
|
"loss": 1.8137, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.5439, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 1.7736, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.966346153846154e-05, |
|
"loss": 1.7694, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0264423076923083e-05, |
|
"loss": 1.5685, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0865384615384615e-05, |
|
"loss": 1.3912, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.146634615384616e-05, |
|
"loss": 1.3916, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.20673076923077e-05, |
|
"loss": 1.5716, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266826923076923e-05, |
|
"loss": 1.7127, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.326923076923077e-05, |
|
"loss": 1.6191, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.387019230769231e-05, |
|
"loss": 1.53, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.447115384615384e-05, |
|
"loss": 1.3601, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.507211538461539e-05, |
|
"loss": 1.5335, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5673076923076925e-05, |
|
"loss": 1.5374, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.627403846153846e-05, |
|
"loss": 1.4242, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 1.3306, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.747596153846154e-05, |
|
"loss": 1.1707, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 1.3212, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8677884615384615e-05, |
|
"loss": 1.0776, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.927884615384616e-05, |
|
"loss": 1.1776, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.98798076923077e-05, |
|
"loss": 1.1009, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6558105107327905, |
|
"eval_loss": 1.2521276473999023, |
|
"eval_runtime": 89.0403, |
|
"eval_samples_per_second": 15.173, |
|
"eval_steps_per_second": 2.538, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.99465811965812e-05, |
|
"loss": 1.2513, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.98798076923077e-05, |
|
"loss": 1.2784, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.981303418803419e-05, |
|
"loss": 1.0356, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9746260683760685e-05, |
|
"loss": 1.2674, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9679487179487185e-05, |
|
"loss": 1.2378, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.961271367521368e-05, |
|
"loss": 1.0986, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.954594017094017e-05, |
|
"loss": 0.9062, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.947916666666667e-05, |
|
"loss": 1.0728, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9412393162393166e-05, |
|
"loss": 1.12, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9345619658119666e-05, |
|
"loss": 1.2067, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.927884615384616e-05, |
|
"loss": 1.1527, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9212072649572646e-05, |
|
"loss": 1.1009, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.9438, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.907852564102564e-05, |
|
"loss": 0.8438, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.901175213675214e-05, |
|
"loss": 0.7278, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8944978632478634e-05, |
|
"loss": 0.8685, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.887820512820513e-05, |
|
"loss": 1.0214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.881143162393163e-05, |
|
"loss": 0.9312, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.874465811965812e-05, |
|
"loss": 0.8984, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8677884615384615e-05, |
|
"loss": 0.8442, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.6382, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.854433760683761e-05, |
|
"loss": 0.6792, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.84775641025641e-05, |
|
"loss": 0.8806, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.84107905982906e-05, |
|
"loss": 0.9612, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.8344017094017096e-05, |
|
"loss": 0.8101, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.827724358974359e-05, |
|
"loss": 0.7791, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.821047008547009e-05, |
|
"loss": 0.8231, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.814369658119658e-05, |
|
"loss": 0.6457, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 0.5397, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.801014957264958e-05, |
|
"loss": 0.7663, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.794337606837607e-05, |
|
"loss": 1.0645, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.787660256410257e-05, |
|
"loss": 1.0223, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.7809829059829065e-05, |
|
"loss": 1.0806, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.774305555555556e-05, |
|
"loss": 0.9156, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.767628205128206e-05, |
|
"loss": 0.7737, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.7609508547008545e-05, |
|
"loss": 0.6834, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.7542735042735045e-05, |
|
"loss": 0.808, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.747596153846154e-05, |
|
"loss": 0.9556, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.740918803418803e-05, |
|
"loss": 0.5992, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.734241452991453e-05, |
|
"loss": 0.6876, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.7275641025641026e-05, |
|
"loss": 0.7753, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.720886752136752e-05, |
|
"loss": 0.6666, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.714209401709402e-05, |
|
"loss": 0.4952, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.7075320512820514e-05, |
|
"loss": 0.9254, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.8664, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.694177350427351e-05, |
|
"loss": 0.6691, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.7406, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.68082264957265e-05, |
|
"loss": 0.6539, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.6741452991452995e-05, |
|
"loss": 0.9185, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.667467948717949e-05, |
|
"loss": 0.7776, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.660790598290599e-05, |
|
"loss": 0.8129, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.654113247863248e-05, |
|
"loss": 0.4696, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.6474358974358976e-05, |
|
"loss": 0.4895, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.6407585470085476e-05, |
|
"loss": 0.6573, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.634081196581197e-05, |
|
"loss": 0.6352, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.627403846153846e-05, |
|
"loss": 0.4662, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.620726495726496e-05, |
|
"loss": 0.6592, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.614049145299146e-05, |
|
"loss": 0.6501, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.607371794871795e-05, |
|
"loss": 0.5648, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.6006944444444444e-05, |
|
"loss": 0.7981, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.594017094017094e-05, |
|
"loss": 0.573, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.587339743589744e-05, |
|
"loss": 0.6546, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.580662393162393e-05, |
|
"loss": 0.5825, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5739850427350425e-05, |
|
"loss": 0.5756, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5673076923076925e-05, |
|
"loss": 0.5927, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.560630341880342e-05, |
|
"loss": 0.577, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.553952991452992e-05, |
|
"loss": 0.726, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.547275641025641e-05, |
|
"loss": 0.5516, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5405982905982906e-05, |
|
"loss": 0.5479, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5339209401709406e-05, |
|
"loss": 0.4325, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.52724358974359e-05, |
|
"loss": 0.4567, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.520566239316239e-05, |
|
"loss": 0.5283, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.3399, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.507211538461539e-05, |
|
"loss": 0.5445, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.500534188034188e-05, |
|
"loss": 0.3424, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.493856837606838e-05, |
|
"loss": 0.624, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.4805021367521375e-05, |
|
"loss": 0.5173, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.473824786324787e-05, |
|
"loss": 0.5501, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.467147435897436e-05, |
|
"loss": 0.3545, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.460470085470086e-05, |
|
"loss": 0.5297, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4537927350427356e-05, |
|
"loss": 0.3494, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.447115384615384e-05, |
|
"loss": 0.2821, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8623242042931162, |
|
"eval_loss": 0.43101879954338074, |
|
"eval_runtime": 85.5894, |
|
"eval_samples_per_second": 15.785, |
|
"eval_steps_per_second": 2.641, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.440438034188034e-05, |
|
"loss": 0.4212, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.4337606837606836e-05, |
|
"loss": 0.4583, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.4270833333333337e-05, |
|
"loss": 0.2378, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.420405982905983e-05, |
|
"loss": 0.4887, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4137286324786324e-05, |
|
"loss": 0.312, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4070512820512824e-05, |
|
"loss": 0.3402, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.400373931623932e-05, |
|
"loss": 0.2723, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.393696581196581e-05, |
|
"loss": 0.2785, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.387019230769231e-05, |
|
"loss": 0.187, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.3803418803418805e-05, |
|
"loss": 0.2831, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.37366452991453e-05, |
|
"loss": 0.6487, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.36698717948718e-05, |
|
"loss": 0.3937, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.360309829059829e-05, |
|
"loss": 0.2464, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.353632478632479e-05, |
|
"loss": 0.1763, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.3469551282051286e-05, |
|
"loss": 0.5681, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.340277777777778e-05, |
|
"loss": 0.3058, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.333600427350428e-05, |
|
"loss": 0.4737, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.326923076923077e-05, |
|
"loss": 0.2714, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.320245726495727e-05, |
|
"loss": 0.2996, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.313568376068377e-05, |
|
"loss": 0.1767, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.306891025641026e-05, |
|
"loss": 0.3166, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.3002136752136754e-05, |
|
"loss": 0.6681, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.293536324786325e-05, |
|
"loss": 0.271, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.286858974358974e-05, |
|
"loss": 0.2432, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.280181623931624e-05, |
|
"loss": 0.3976, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.4972, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.266826923076923e-05, |
|
"loss": 0.4663, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.260149572649573e-05, |
|
"loss": 0.456, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.253472222222222e-05, |
|
"loss": 0.5133, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.2467948717948716e-05, |
|
"loss": 0.2464, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.2401175213675216e-05, |
|
"loss": 0.2097, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.233440170940171e-05, |
|
"loss": 0.479, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.226762820512821e-05, |
|
"loss": 0.2995, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.2200854700854704e-05, |
|
"loss": 0.5557, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.21340811965812e-05, |
|
"loss": 0.572, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.20673076923077e-05, |
|
"loss": 0.5636, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.200053418803419e-05, |
|
"loss": 0.2179, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.1933760683760684e-05, |
|
"loss": 0.3565, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.1866987179487185e-05, |
|
"loss": 0.2154, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.180021367521368e-05, |
|
"loss": 0.2826, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.173344017094017e-05, |
|
"loss": 0.3187, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.356, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.1599893162393166e-05, |
|
"loss": 0.1521, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.153311965811966e-05, |
|
"loss": 0.2919, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.146634615384616e-05, |
|
"loss": 0.2014, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.1399572649572646e-05, |
|
"loss": 0.535, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.1332799145299146e-05, |
|
"loss": 0.2376, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.126602564102564e-05, |
|
"loss": 0.3963, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.1199252136752133e-05, |
|
"loss": 0.3987, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.1132478632478634e-05, |
|
"loss": 0.3923, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.106570512820513e-05, |
|
"loss": 0.5621, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.099893162393163e-05, |
|
"loss": 0.3855, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.093215811965812e-05, |
|
"loss": 0.369, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.0865384615384615e-05, |
|
"loss": 0.1231, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.0798611111111115e-05, |
|
"loss": 0.1671, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.073183760683761e-05, |
|
"loss": 0.1788, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.06650641025641e-05, |
|
"loss": 0.5884, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.2911, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.0531517094017096e-05, |
|
"loss": 0.1407, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.046474358974359e-05, |
|
"loss": 0.4743, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.039797008547009e-05, |
|
"loss": 0.2106, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.033119658119658e-05, |
|
"loss": 0.4062, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.0264423076923083e-05, |
|
"loss": 0.195, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.019764957264958e-05, |
|
"loss": 0.1114, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.013087606837607e-05, |
|
"loss": 0.368, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.006410256410257e-05, |
|
"loss": 0.2268, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9997329059829064e-05, |
|
"loss": 0.4106, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.2388, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.986378205128206e-05, |
|
"loss": 0.085, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9797008547008545e-05, |
|
"loss": 0.1083, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9730235042735045e-05, |
|
"loss": 0.1683, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.966346153846154e-05, |
|
"loss": 0.0891, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.959668803418803e-05, |
|
"loss": 0.0728, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.952991452991453e-05, |
|
"loss": 0.1584, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.9463141025641026e-05, |
|
"loss": 0.2689, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.939636752136752e-05, |
|
"loss": 0.1891, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.932959401709402e-05, |
|
"loss": 0.3679, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.9262820512820513e-05, |
|
"loss": 0.1882, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.919604700854701e-05, |
|
"loss": 0.2205, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.912927350427351e-05, |
|
"loss": 0.1591, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.1836, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.89957264957265e-05, |
|
"loss": 0.0979, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.8928952991452995e-05, |
|
"loss": 0.4519, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.9222797927461139, |
|
"eval_loss": 0.26454582810401917, |
|
"eval_runtime": 85.593, |
|
"eval_samples_per_second": 15.784, |
|
"eval_steps_per_second": 2.64, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.886217948717949e-05, |
|
"loss": 0.2009, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.879540598290599e-05, |
|
"loss": 0.1853, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.872863247863248e-05, |
|
"loss": 0.0564, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.8661858974358976e-05, |
|
"loss": 0.2384, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.8595085470085476e-05, |
|
"loss": 0.2116, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.852831196581197e-05, |
|
"loss": 0.2025, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.453, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.839476495726496e-05, |
|
"loss": 0.1854, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.832799145299146e-05, |
|
"loss": 0.1525, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.826121794871795e-05, |
|
"loss": 0.1236, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.1181, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.812767094017094e-05, |
|
"loss": 0.1242, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.806089743589744e-05, |
|
"loss": 0.1651, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.799412393162393e-05, |
|
"loss": 0.1342, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.7927350427350425e-05, |
|
"loss": 0.1438, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.7860576923076925e-05, |
|
"loss": 0.0986, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.779380341880342e-05, |
|
"loss": 0.1217, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.772702991452992e-05, |
|
"loss": 0.1621, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.766025641025641e-05, |
|
"loss": 0.105, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.7593482905982906e-05, |
|
"loss": 0.234, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.7526709401709406e-05, |
|
"loss": 0.2106, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.74599358974359e-05, |
|
"loss": 0.1273, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.739316239316239e-05, |
|
"loss": 0.0124, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.7326388888888893e-05, |
|
"loss": 0.045, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.725961538461539e-05, |
|
"loss": 0.1901, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.719284188034188e-05, |
|
"loss": 0.1012, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.712606837606838e-05, |
|
"loss": 0.2435, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.7059294871794874e-05, |
|
"loss": 0.0302, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.699252136752137e-05, |
|
"loss": 0.1433, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.692574786324787e-05, |
|
"loss": 0.1693, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.685897435897436e-05, |
|
"loss": 0.2665, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.679220085470086e-05, |
|
"loss": 0.2381, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.6725427350427355e-05, |
|
"loss": 0.1251, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.665865384615384e-05, |
|
"loss": 0.1623, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.659188034188034e-05, |
|
"loss": 0.139, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.6525106837606836e-05, |
|
"loss": 0.0755, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.265, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.639155982905983e-05, |
|
"loss": 0.3092, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.134, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.6258012820512824e-05, |
|
"loss": 0.0111, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.619123931623932e-05, |
|
"loss": 0.154, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.612446581196581e-05, |
|
"loss": 0.0385, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.605769230769231e-05, |
|
"loss": 0.1979, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.5990918803418805e-05, |
|
"loss": 0.1618, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.59241452991453e-05, |
|
"loss": 0.1044, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.58573717948718e-05, |
|
"loss": 0.1899, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.579059829059829e-05, |
|
"loss": 0.1188, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.5723824786324785e-05, |
|
"loss": 0.3025, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.5657051282051286e-05, |
|
"loss": 0.1551, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.559027777777778e-05, |
|
"loss": 0.272, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.552350427350428e-05, |
|
"loss": 0.2863, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.545673076923077e-05, |
|
"loss": 0.1508, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.538995726495727e-05, |
|
"loss": 0.3007, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.532318376068377e-05, |
|
"loss": 0.256, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.525641025641026e-05, |
|
"loss": 0.0253, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.5189636752136754e-05, |
|
"loss": 0.0563, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.512286324786325e-05, |
|
"loss": 0.2547, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.505608974358974e-05, |
|
"loss": 0.1, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.498931623931624e-05, |
|
"loss": 0.1087, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.4922542735042735e-05, |
|
"loss": 0.0982, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.485576923076923e-05, |
|
"loss": 0.0756, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.478899572649573e-05, |
|
"loss": 0.1787, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.1296, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4655448717948716e-05, |
|
"loss": 0.1278, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4588675213675216e-05, |
|
"loss": 0.2712, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.452190170940171e-05, |
|
"loss": 0.1053, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.445512820512821e-05, |
|
"loss": 0.0271, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.43883547008547e-05, |
|
"loss": 0.1143, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.43215811965812e-05, |
|
"loss": 0.2631, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.42548076923077e-05, |
|
"loss": 0.3132, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.3753, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.4121260683760684e-05, |
|
"loss": 0.1488, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.4054487179487185e-05, |
|
"loss": 0.1837, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.398771367521368e-05, |
|
"loss": 0.0834, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.392094017094017e-05, |
|
"loss": 0.1418, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.385416666666667e-05, |
|
"loss": 0.2449, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.3787393162393165e-05, |
|
"loss": 0.3223, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.372061965811966e-05, |
|
"loss": 0.0967, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.365384615384616e-05, |
|
"loss": 0.0662, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.3587072649572646e-05, |
|
"loss": 0.1879, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.3520299145299146e-05, |
|
"loss": 0.1226, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.345352564102564e-05, |
|
"loss": 0.1606, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.338675213675213e-05, |
|
"loss": 0.0107, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.3319978632478634e-05, |
|
"loss": 0.137, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.9370836417468542, |
|
"eval_loss": 0.22310341894626617, |
|
"eval_runtime": 85.6004, |
|
"eval_samples_per_second": 15.783, |
|
"eval_steps_per_second": 2.64, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.325320512820513e-05, |
|
"loss": 0.247, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.318643162393163e-05, |
|
"loss": 0.0202, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.311965811965812e-05, |
|
"loss": 0.1689, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.3052884615384615e-05, |
|
"loss": 0.334, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.2986111111111115e-05, |
|
"loss": 0.1227, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.291933760683761e-05, |
|
"loss": 0.05, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.28525641025641e-05, |
|
"loss": 0.0551, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.27857905982906e-05, |
|
"loss": 0.0999, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.2719017094017096e-05, |
|
"loss": 0.1259, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.265224358974359e-05, |
|
"loss": 0.0748, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.258547008547009e-05, |
|
"loss": 0.0967, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.251869658119658e-05, |
|
"loss": 0.0372, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.2451923076923077e-05, |
|
"loss": 0.1477, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.238514957264958e-05, |
|
"loss": 0.0598, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.231837606837607e-05, |
|
"loss": 0.0092, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.225160256410257e-05, |
|
"loss": 0.0115, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.2184829059829064e-05, |
|
"loss": 0.0084, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.211805555555556e-05, |
|
"loss": 0.1052, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.0335, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.1984508547008545e-05, |
|
"loss": 0.1715, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.1917735042735045e-05, |
|
"loss": 0.0244, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.185096153846154e-05, |
|
"loss": 0.0322, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.178418803418803e-05, |
|
"loss": 0.0308, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.171741452991453e-05, |
|
"loss": 0.0982, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.1650641025641026e-05, |
|
"loss": 0.0419, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.158386752136752e-05, |
|
"loss": 0.0448, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.151709401709402e-05, |
|
"loss": 0.082, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.145032051282051e-05, |
|
"loss": 0.1413, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.138354700854701e-05, |
|
"loss": 0.0035, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.131677350427351e-05, |
|
"loss": 0.0325, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.1028, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.1183226495726494e-05, |
|
"loss": 0.0111, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.1116452991452994e-05, |
|
"loss": 0.0263, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.104967948717949e-05, |
|
"loss": 0.0666, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.098290598290599e-05, |
|
"loss": 0.0059, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.091613247863248e-05, |
|
"loss": 0.0704, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.0849358974358975e-05, |
|
"loss": 0.1522, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.0782585470085476e-05, |
|
"loss": 0.0377, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.071581196581197e-05, |
|
"loss": 0.1819, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.064903846153846e-05, |
|
"loss": 0.0209, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.058226495726496e-05, |
|
"loss": 0.0196, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.0515491452991457e-05, |
|
"loss": 0.1089, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.0448717948717947e-05, |
|
"loss": 0.3502, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.0381944444444444e-05, |
|
"loss": 0.0047, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.031517094017094e-05, |
|
"loss": 0.0927, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.0248397435897434e-05, |
|
"loss": 0.0115, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.018162393162393e-05, |
|
"loss": 0.0098, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.0114850427350428e-05, |
|
"loss": 0.2128, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.0048076923076925e-05, |
|
"loss": 0.1717, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9981303418803418e-05, |
|
"loss": 0.0731, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.0716, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9847756410256412e-05, |
|
"loss": 0.0211, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9780982905982906e-05, |
|
"loss": 0.0054, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9714209401709403e-05, |
|
"loss": 0.2639, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.96474358974359e-05, |
|
"loss": 0.0827, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9580662393162396e-05, |
|
"loss": 0.2148, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.007, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9447115384615387e-05, |
|
"loss": 0.143, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9380341880341884e-05, |
|
"loss": 0.0107, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9313568376068377e-05, |
|
"loss": 0.0062, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9246794871794874e-05, |
|
"loss": 0.1301, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.918002136752137e-05, |
|
"loss": 0.0108, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.9113247863247868e-05, |
|
"loss": 0.0731, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.904647435897436e-05, |
|
"loss": 0.0286, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.897970085470086e-05, |
|
"loss": 0.008, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.8912927350427355e-05, |
|
"loss": 0.0894, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 0.0484, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.8779380341880342e-05, |
|
"loss": 0.1517, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.8712606837606836e-05, |
|
"loss": 0.0369, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.8645833333333333e-05, |
|
"loss": 0.1208, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.857905982905983e-05, |
|
"loss": 0.1447, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8512286324786323e-05, |
|
"loss": 0.0959, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.844551282051282e-05, |
|
"loss": 0.021, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8378739316239317e-05, |
|
"loss": 0.2013, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8311965811965814e-05, |
|
"loss": 0.0673, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8245192307692307e-05, |
|
"loss": 0.0502, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8178418803418804e-05, |
|
"loss": 0.0796, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.81116452991453e-05, |
|
"loss": 0.3334, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.8044871794871795e-05, |
|
"loss": 0.1535, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.7978098290598292e-05, |
|
"loss": 0.1823, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.791132478632479e-05, |
|
"loss": 0.0254, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.7844551282051286e-05, |
|
"loss": 0.125, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1014, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.9592894152479645, |
|
"eval_loss": 0.15163935720920563, |
|
"eval_runtime": 85.5792, |
|
"eval_samples_per_second": 15.787, |
|
"eval_steps_per_second": 2.641, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.7711004273504276e-05, |
|
"loss": 0.2326, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.7644230769230773e-05, |
|
"loss": 0.07, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.757745726495727e-05, |
|
"loss": 0.0832, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.7510683760683763e-05, |
|
"loss": 0.1114, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.744391025641026e-05, |
|
"loss": 0.0198, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.7377136752136757e-05, |
|
"loss": 0.0039, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.7310363247863247e-05, |
|
"loss": 0.2082, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.724358974358974e-05, |
|
"loss": 0.1341, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.7176816239316238e-05, |
|
"loss": 0.1808, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.7110042735042735e-05, |
|
"loss": 0.1818, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.704326923076923e-05, |
|
"loss": 0.019, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.6976495726495725e-05, |
|
"loss": 0.005, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6909722222222222e-05, |
|
"loss": 0.0053, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.684294871794872e-05, |
|
"loss": 0.0751, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6776175213675216e-05, |
|
"loss": 0.2481, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.670940170940171e-05, |
|
"loss": 0.0346, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6642628205128206e-05, |
|
"loss": 0.1085, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6575854700854703e-05, |
|
"loss": 0.2827, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6509081196581197e-05, |
|
"loss": 0.2162, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.6442307692307694e-05, |
|
"loss": 0.1607, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.637553418803419e-05, |
|
"loss": 0.0169, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.6308760683760687e-05, |
|
"loss": 0.0122, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.624198717948718e-05, |
|
"loss": 0.1715, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.6175213675213678e-05, |
|
"loss": 0.0114, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.6108440170940175e-05, |
|
"loss": 0.0156, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.0948, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.5974893162393165e-05, |
|
"loss": 0.1142, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.5908119658119662e-05, |
|
"loss": 0.0091, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.584134615384616e-05, |
|
"loss": 0.0021, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.577457264957265e-05, |
|
"loss": 0.0513, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5707799145299143e-05, |
|
"loss": 0.002, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0016, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5574252136752137e-05, |
|
"loss": 0.062, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5507478632478633e-05, |
|
"loss": 0.0168, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5440705128205127e-05, |
|
"loss": 0.1189, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.5373931623931624e-05, |
|
"loss": 0.0289, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.530715811965812e-05, |
|
"loss": 0.0263, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.5240384615384614e-05, |
|
"loss": 0.022, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.517361111111111e-05, |
|
"loss": 0.1551, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.5106837606837608e-05, |
|
"loss": 0.1103, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.5040064102564105e-05, |
|
"loss": 0.1434, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.49732905982906e-05, |
|
"loss": 0.2635, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.4906517094017096e-05, |
|
"loss": 0.0956, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.4839743589743592e-05, |
|
"loss": 0.0518, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.4772970085470086e-05, |
|
"loss": 0.1728, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.4706196581196583e-05, |
|
"loss": 0.0058, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.463942307692308e-05, |
|
"loss": 0.1515, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4572649572649573e-05, |
|
"loss": 0.0366, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.450587606837607e-05, |
|
"loss": 0.003, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4439102564102564e-05, |
|
"loss": 0.0561, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.437232905982906e-05, |
|
"loss": 0.0039, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.0052, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.423878205128205e-05, |
|
"loss": 0.0821, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4172008547008548e-05, |
|
"loss": 0.0126, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4105235042735045e-05, |
|
"loss": 0.0093, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4038461538461542e-05, |
|
"loss": 0.2059, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.3971688034188035e-05, |
|
"loss": 0.0528, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.3904914529914532e-05, |
|
"loss": 0.0022, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.383814102564103e-05, |
|
"loss": 0.0969, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.3771367521367523e-05, |
|
"loss": 0.0024, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.3704594017094016e-05, |
|
"loss": 0.0674, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.3637820512820513e-05, |
|
"loss": 0.007, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.357104700854701e-05, |
|
"loss": 0.0121, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.1585, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 0.1726, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3370726495726497e-05, |
|
"loss": 0.0053, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3303952991452994e-05, |
|
"loss": 0.0597, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3237179487179488e-05, |
|
"loss": 0.0105, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3170405982905985e-05, |
|
"loss": 0.002, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.310363247863248e-05, |
|
"loss": 0.0019, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.3036858974358975e-05, |
|
"loss": 0.2447, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.297008547008547e-05, |
|
"loss": 0.0584, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.2903311965811966e-05, |
|
"loss": 0.0955, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.2836538461538463e-05, |
|
"loss": 0.0293, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.276976495726496e-05, |
|
"loss": 0.0053, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.2702991452991453e-05, |
|
"loss": 0.0666, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.263621794871795e-05, |
|
"loss": 0.0213, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.0668, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.250267094017094e-05, |
|
"loss": 0.0146, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.2435897435897437e-05, |
|
"loss": 0.0046, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.2369123931623934e-05, |
|
"loss": 0.0884, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.230235042735043e-05, |
|
"loss": 0.0072, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.223557692307692e-05, |
|
"loss": 0.0084, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.9681717246484086, |
|
"eval_loss": 0.147811621427536, |
|
"eval_runtime": 85.8137, |
|
"eval_samples_per_second": 15.743, |
|
"eval_steps_per_second": 2.634, |
|
"step": 4998 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.2168803418803418e-05, |
|
"loss": 0.1845, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.2102029914529915e-05, |
|
"loss": 0.0636, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.2035256410256412e-05, |
|
"loss": 0.0051, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.1968482905982905e-05, |
|
"loss": 0.0027, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1901709401709402e-05, |
|
"loss": 0.0974, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.18349358974359e-05, |
|
"loss": 0.0281, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1768162393162396e-05, |
|
"loss": 0.1491, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.170138888888889e-05, |
|
"loss": 0.0247, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1634615384615387e-05, |
|
"loss": 0.0898, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1567841880341884e-05, |
|
"loss": 0.0053, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1501068376068377e-05, |
|
"loss": 0.0295, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.143429487179487e-05, |
|
"loss": 0.0022, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.0016, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.1300747863247864e-05, |
|
"loss": 0.0012, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.1233974358974358e-05, |
|
"loss": 0.003, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.1167200854700855e-05, |
|
"loss": 0.0022, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.1100427350427352e-05, |
|
"loss": 0.0743, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.103365384615385e-05, |
|
"loss": 0.0254, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.0966880341880342e-05, |
|
"loss": 0.1902, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.090010683760684e-05, |
|
"loss": 0.0027, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0041, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.076655982905983e-05, |
|
"loss": 0.1141, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.0699786324786323e-05, |
|
"loss": 0.0124, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.063301282051282e-05, |
|
"loss": 0.0542, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.0566239316239317e-05, |
|
"loss": 0.0032, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.0499465811965814e-05, |
|
"loss": 0.1405, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.0432692307692307e-05, |
|
"loss": 0.1003, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.0365918803418804e-05, |
|
"loss": 0.0025, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.02991452991453e-05, |
|
"loss": 0.2127, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.0232371794871795e-05, |
|
"loss": 0.0047, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.016559829059829e-05, |
|
"loss": 0.0746, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.009882478632479e-05, |
|
"loss": 0.0381, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.0032051282051285e-05, |
|
"loss": 0.0017, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.996527777777778e-05, |
|
"loss": 0.0261, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.9898504273504272e-05, |
|
"loss": 0.0052, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.983173076923077e-05, |
|
"loss": 0.0011, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.9764957264957266e-05, |
|
"loss": 0.1598, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.969818376068376e-05, |
|
"loss": 0.0089, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.9631410256410257e-05, |
|
"loss": 0.0011, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.9564636752136754e-05, |
|
"loss": 0.0036, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.949786324786325e-05, |
|
"loss": 0.0014, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.9431089743589744e-05, |
|
"loss": 0.0031, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.936431623931624e-05, |
|
"loss": 0.1976, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.9297542735042738e-05, |
|
"loss": 0.0014, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.0037, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.916399572649573e-05, |
|
"loss": 0.0055, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.1296, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.903044871794872e-05, |
|
"loss": 0.003, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.8963675213675212e-05, |
|
"loss": 0.0843, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.889690170940171e-05, |
|
"loss": 0.2126, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.8830128205128206e-05, |
|
"loss": 0.0305, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.8763354700854703e-05, |
|
"loss": 0.0941, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.8696581196581197e-05, |
|
"loss": 0.0868, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.8629807692307693e-05, |
|
"loss": 0.0875, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.856303418803419e-05, |
|
"loss": 0.0053, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8496260683760684e-05, |
|
"loss": 0.0012, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.842948717948718e-05, |
|
"loss": 0.0017, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8362713675213678e-05, |
|
"loss": 0.0043, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.829594017094017e-05, |
|
"loss": 0.1032, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8229166666666668e-05, |
|
"loss": 0.0109, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8162393162393162e-05, |
|
"loss": 0.0018, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.809561965811966e-05, |
|
"loss": 0.0017, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8028846153846156e-05, |
|
"loss": 0.0011, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.796207264957265e-05, |
|
"loss": 0.1335, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.7895299145299146e-05, |
|
"loss": 0.0011, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.7828525641025643e-05, |
|
"loss": 0.0019, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.776175213675214e-05, |
|
"loss": 0.0012, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.7694978632478633e-05, |
|
"loss": 0.0063, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.762820512820513e-05, |
|
"loss": 0.0036, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.7561431623931624e-05, |
|
"loss": 0.0293, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.749465811965812e-05, |
|
"loss": 0.1403, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.7427884615384614e-05, |
|
"loss": 0.0012, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.1275, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.7294337606837608e-05, |
|
"loss": 0.0219, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.7227564102564105e-05, |
|
"loss": 0.0883, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.71607905982906e-05, |
|
"loss": 0.003, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.1378, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.7027243589743592e-05, |
|
"loss": 0.344, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.6960470085470086e-05, |
|
"loss": 0.0294, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.6893696581196583e-05, |
|
"loss": 0.0099, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.682692307692308e-05, |
|
"loss": 0.3121, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.6760149572649573e-05, |
|
"loss": 0.0649, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.6693376068376067e-05, |
|
"loss": 0.1054, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.6626602564102564e-05, |
|
"loss": 0.0041, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.9652109548482606, |
|
"eval_loss": 0.12842880189418793, |
|
"eval_runtime": 86.1673, |
|
"eval_samples_per_second": 15.679, |
|
"eval_steps_per_second": 2.623, |
|
"step": 5831 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.655982905982906e-05, |
|
"loss": 0.0289, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.6493055555555557e-05, |
|
"loss": 0.0019, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.642628205128205e-05, |
|
"loss": 0.013, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.6359508547008548e-05, |
|
"loss": 0.0072, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.6292735042735045e-05, |
|
"loss": 0.0014, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.6225961538461538e-05, |
|
"loss": 0.0454, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.6159188034188035e-05, |
|
"loss": 0.007, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.6092414529914532e-05, |
|
"loss": 0.087, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.602564102564103e-05, |
|
"loss": 0.0044, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.5958867521367523e-05, |
|
"loss": 0.0058, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.5892094017094016e-05, |
|
"loss": 0.0405, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.5825320512820513e-05, |
|
"loss": 0.0724, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.575854700854701e-05, |
|
"loss": 0.0014, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5691773504273503e-05, |
|
"loss": 0.0546, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.0016, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5558226495726497e-05, |
|
"loss": 0.0296, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5491452991452994e-05, |
|
"loss": 0.0905, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5424679487179488e-05, |
|
"loss": 0.0015, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5357905982905985e-05, |
|
"loss": 0.057, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.529113247863248e-05, |
|
"loss": 0.1189, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.5224358974358973e-05, |
|
"loss": 0.0133, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.515758547008547e-05, |
|
"loss": 0.001, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.5090811965811965e-05, |
|
"loss": 0.0947, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.5024038461538462e-05, |
|
"loss": 0.0056, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.0015, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.4890491452991453e-05, |
|
"loss": 0.11, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.482371794871795e-05, |
|
"loss": 0.1273, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.4756944444444445e-05, |
|
"loss": 0.1319, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.4690170940170942e-05, |
|
"loss": 0.0055, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4623397435897437e-05, |
|
"loss": 0.001, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4556623931623934e-05, |
|
"loss": 0.002, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.448985042735043e-05, |
|
"loss": 0.0008, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4423076923076923e-05, |
|
"loss": 0.0841, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4356303418803418e-05, |
|
"loss": 0.001, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4289529914529915e-05, |
|
"loss": 0.0016, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.422275641025641e-05, |
|
"loss": 0.0305, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4155982905982907e-05, |
|
"loss": 0.0007, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.4089209401709402e-05, |
|
"loss": 0.0857, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.4022435897435897e-05, |
|
"loss": 0.004, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.3955662393162394e-05, |
|
"loss": 0.001, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0008, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.3822115384615386e-05, |
|
"loss": 0.0025, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.3755341880341882e-05, |
|
"loss": 0.003, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.3688568376068379e-05, |
|
"loss": 0.0075, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.362179487179487e-05, |
|
"loss": 0.0183, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3555021367521367e-05, |
|
"loss": 0.066, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3488247863247863e-05, |
|
"loss": 0.0021, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.342147435897436e-05, |
|
"loss": 0.0009, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3354700854700855e-05, |
|
"loss": 0.0016, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3287927350427352e-05, |
|
"loss": 0.0017, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3221153846153847e-05, |
|
"loss": 0.0045, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3154380341880344e-05, |
|
"loss": 0.0012, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3087606837606839e-05, |
|
"loss": 0.0057, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.3020833333333334e-05, |
|
"loss": 0.0008, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2954059829059831e-05, |
|
"loss": 0.0183, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2887286324786325e-05, |
|
"loss": 0.0011, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.0027, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2753739316239317e-05, |
|
"loss": 0.0384, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2686965811965812e-05, |
|
"loss": 0.0027, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2620192307692307e-05, |
|
"loss": 0.001, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.2553418803418804e-05, |
|
"loss": 0.0296, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.24866452991453e-05, |
|
"loss": 0.0008, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2419871794871796e-05, |
|
"loss": 0.1594, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2353098290598291e-05, |
|
"loss": 0.0579, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2286324786324787e-05, |
|
"loss": 0.001, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2219551282051282e-05, |
|
"loss": 0.0251, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2152777777777779e-05, |
|
"loss": 0.1325, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2086004273504274e-05, |
|
"loss": 0.0598, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.2019230769230771e-05, |
|
"loss": 0.0009, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.1952457264957266e-05, |
|
"loss": 0.0392, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1885683760683761e-05, |
|
"loss": 0.0009, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1818910256410257e-05, |
|
"loss": 0.1047, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1752136752136752e-05, |
|
"loss": 0.0196, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1685363247863249e-05, |
|
"loss": 0.0022, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1618589743589744e-05, |
|
"loss": 0.0411, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.155181623931624e-05, |
|
"loss": 0.0023, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1485042735042734e-05, |
|
"loss": 0.0009, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1418269230769231e-05, |
|
"loss": 0.0007, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1351495726495726e-05, |
|
"loss": 0.0008, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.1284722222222223e-05, |
|
"loss": 0.0008, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.1217948717948719e-05, |
|
"loss": 0.0012, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.1151175213675216e-05, |
|
"loss": 0.1342, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.1084401709401709e-05, |
|
"loss": 0.006, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.9726128793486306, |
|
"eval_loss": 0.12144625186920166, |
|
"eval_runtime": 86.0938, |
|
"eval_samples_per_second": 15.692, |
|
"eval_steps_per_second": 2.625, |
|
"step": 6664 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.1017628205128206e-05, |
|
"loss": 0.002, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.0950854700854701e-05, |
|
"loss": 0.0011, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.0884081196581198e-05, |
|
"loss": 0.0076, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.0817307692307693e-05, |
|
"loss": 0.0339, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0750534188034189e-05, |
|
"loss": 0.0542, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.0536, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0616987179487179e-05, |
|
"loss": 0.0007, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0550213675213676e-05, |
|
"loss": 0.001, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0483440170940171e-05, |
|
"loss": 0.0227, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.0667, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0349893162393162e-05, |
|
"loss": 0.0143, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.0283119658119658e-05, |
|
"loss": 0.0007, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.0216346153846154e-05, |
|
"loss": 0.0007, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.014957264957265e-05, |
|
"loss": 0.0008, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.0082799145299146e-05, |
|
"loss": 0.0013, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.0016025641025643e-05, |
|
"loss": 0.1765, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.949252136752136e-06, |
|
"loss": 0.0007, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.882478632478633e-06, |
|
"loss": 0.0031, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.815705128205128e-06, |
|
"loss": 0.0006, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.748931623931625e-06, |
|
"loss": 0.0021, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.68215811965812e-06, |
|
"loss": 0.0503, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 0.0007, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.548611111111111e-06, |
|
"loss": 0.0009, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.481837606837606e-06, |
|
"loss": 0.1196, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.415064102564103e-06, |
|
"loss": 0.0008, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.348290598290598e-06, |
|
"loss": 0.0041, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.281517094017095e-06, |
|
"loss": 0.0012, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.21474358974359e-06, |
|
"loss": 0.0011, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.147970085470086e-06, |
|
"loss": 0.1124, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.081196581196581e-06, |
|
"loss": 0.1116, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.014423076923078e-06, |
|
"loss": 0.0171, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.947649572649573e-06, |
|
"loss": 0.002, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.88087606837607e-06, |
|
"loss": 0.0688, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.814102564102565e-06, |
|
"loss": 0.0007, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.74732905982906e-06, |
|
"loss": 0.0006, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.680555555555556e-06, |
|
"loss": 0.1653, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 8.613782051282052e-06, |
|
"loss": 0.0007, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.0101, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.480235042735043e-06, |
|
"loss": 0.0005, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.41346153846154e-06, |
|
"loss": 0.001, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.346688034188033e-06, |
|
"loss": 0.0007, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.27991452991453e-06, |
|
"loss": 0.0314, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.213141025641025e-06, |
|
"loss": 0.0194, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.146367521367522e-06, |
|
"loss": 0.0008, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.079594017094018e-06, |
|
"loss": 0.0185, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 8.012820512820515e-06, |
|
"loss": 0.0006, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.946047008547008e-06, |
|
"loss": 0.0777, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.879273504273505e-06, |
|
"loss": 0.0006, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.0008, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.745726495726497e-06, |
|
"loss": 0.0284, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.678952991452992e-06, |
|
"loss": 0.0006, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.612179487179487e-06, |
|
"loss": 0.0011, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.545405982905983e-06, |
|
"loss": 0.0007, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.478632478632479e-06, |
|
"loss": 0.0008, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.411858974358975e-06, |
|
"loss": 0.0011, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.345085470085471e-06, |
|
"loss": 0.0011, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.278311965811967e-06, |
|
"loss": 0.0012, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.211538461538461e-06, |
|
"loss": 0.001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.1447649572649574e-06, |
|
"loss": 0.0006, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.0779914529914535e-06, |
|
"loss": 0.0026, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.011217948717949e-06, |
|
"loss": 0.001, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0005, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.877670940170941e-06, |
|
"loss": 0.0011, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.810897435897435e-06, |
|
"loss": 0.0005, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.744123931623931e-06, |
|
"loss": 0.0026, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.677350427350427e-06, |
|
"loss": 0.0016, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.610576923076923e-06, |
|
"loss": 0.0029, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.5438034188034195e-06, |
|
"loss": 0.0005, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.4770299145299155e-06, |
|
"loss": 0.0099, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.0007, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.343482905982906e-06, |
|
"loss": 0.0018, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.276709401709402e-06, |
|
"loss": 0.001, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.209935897435898e-06, |
|
"loss": 0.0008, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.143162393162393e-06, |
|
"loss": 0.0011, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.076388888888889e-06, |
|
"loss": 0.0098, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.0096153846153855e-06, |
|
"loss": 0.0005, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.942841880341881e-06, |
|
"loss": 0.0006, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.876068376068376e-06, |
|
"loss": 0.0009, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.809294871794872e-06, |
|
"loss": 0.0008, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.742521367521367e-06, |
|
"loss": 0.0007, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.675747863247863e-06, |
|
"loss": 0.0006, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.608974358974359e-06, |
|
"loss": 0.0007, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.5422008547008545e-06, |
|
"loss": 0.001, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.9822353811991118, |
|
"eval_loss": 0.07449387013912201, |
|
"eval_runtime": 85.6508, |
|
"eval_samples_per_second": 15.773, |
|
"eval_steps_per_second": 2.639, |
|
"step": 7497 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.475427350427351e-06, |
|
"loss": 0.0022, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.408653846153847e-06, |
|
"loss": 0.0181, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.341880341880342e-06, |
|
"loss": 0.0153, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.275106837606838e-06, |
|
"loss": 0.0246, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.0006, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.141559829059829e-06, |
|
"loss": 0.0717, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.074786324786325e-06, |
|
"loss": 0.0007, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.008012820512821e-06, |
|
"loss": 0.0005, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.9412393162393166e-06, |
|
"loss": 0.0009, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.874465811965813e-06, |
|
"loss": 0.0005, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 0.0005, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.740918803418803e-06, |
|
"loss": 0.0005, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.674145299145299e-06, |
|
"loss": 0.1065, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.607371794871795e-06, |
|
"loss": 0.0561, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.5405982905982904e-06, |
|
"loss": 0.0006, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.4738247863247865e-06, |
|
"loss": 0.0006, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.4070512820512826e-06, |
|
"loss": 0.0009, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.340277777777778e-06, |
|
"loss": 0.0006, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.0006, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.20673076923077e-06, |
|
"loss": 0.0005, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.139957264957265e-06, |
|
"loss": 0.0006, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.073183760683761e-06, |
|
"loss": 0.0009, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.006410256410257e-06, |
|
"loss": 0.1264, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.9396367521367525e-06, |
|
"loss": 0.0008, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.8728632478632485e-06, |
|
"loss": 0.0005, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.8060897435897433e-06, |
|
"loss": 0.0009, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.7393162393162394e-06, |
|
"loss": 0.0009, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.6725427350427355e-06, |
|
"loss": 0.0005, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.6057692307692307e-06, |
|
"loss": 0.0427, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.5389957264957267e-06, |
|
"loss": 0.0005, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.0696, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.4054487179487176e-06, |
|
"loss": 0.0006, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.3386752136752137e-06, |
|
"loss": 0.0006, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.2719017094017097e-06, |
|
"loss": 0.005, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.205128205128205e-06, |
|
"loss": 0.0022, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.138354700854701e-06, |
|
"loss": 0.0006, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.0715811965811967e-06, |
|
"loss": 0.0093, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.0048076923076927e-06, |
|
"loss": 0.0007, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.938034188034188e-06, |
|
"loss": 0.0006, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.8712606837606836e-06, |
|
"loss": 0.0152, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.8044871794871797e-06, |
|
"loss": 0.0005, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.7377136752136753e-06, |
|
"loss": 0.0071, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.670940170940171e-06, |
|
"loss": 0.1172, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.604166666666667e-06, |
|
"loss": 0.0031, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.5373931623931626e-06, |
|
"loss": 0.0005, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.4706196581196583e-06, |
|
"loss": 0.0006, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.403846153846154e-06, |
|
"loss": 0.0108, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.3370726495726496e-06, |
|
"loss": 0.0005, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.2702991452991452e-06, |
|
"loss": 0.0005, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.2035256410256413e-06, |
|
"loss": 0.0005, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.0005, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.0699786324786326e-06, |
|
"loss": 0.0009, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.0032051282051286e-06, |
|
"loss": 0.0006, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 1.9364316239316243e-06, |
|
"loss": 0.0005, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.8696581196581197e-06, |
|
"loss": 0.001, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.8028846153846153e-06, |
|
"loss": 0.0005, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.7361111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.6693376068376068e-06, |
|
"loss": 0.0005, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.6025641025641025e-06, |
|
"loss": 0.0008, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.5357905982905983e-06, |
|
"loss": 0.0005, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.469017094017094e-06, |
|
"loss": 0.0005, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.4022435897435898e-06, |
|
"loss": 0.0089, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.3354700854700855e-06, |
|
"loss": 0.0009, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.2686965811965813e-06, |
|
"loss": 0.001, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.201923076923077e-06, |
|
"loss": 0.0005, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.1351495726495726e-06, |
|
"loss": 0.0006, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.0683760683760685e-06, |
|
"loss": 0.0005, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.0016025641025643e-06, |
|
"loss": 0.0005, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 9.348290598290598e-07, |
|
"loss": 0.0005, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 8.680555555555556e-07, |
|
"loss": 0.0005, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 8.012820512820512e-07, |
|
"loss": 0.0006, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.34508547008547e-07, |
|
"loss": 0.0005, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 6.677350427350427e-07, |
|
"loss": 0.0005, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 6.009615384615385e-07, |
|
"loss": 0.0004, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 5.341880341880342e-07, |
|
"loss": 0.0049, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.674145299145299e-07, |
|
"loss": 0.0011, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.006410256410256e-07, |
|
"loss": 0.0007, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.3386752136752137e-07, |
|
"loss": 0.0005, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 2.670940170940171e-07, |
|
"loss": 0.0006, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 2.003205128205128e-07, |
|
"loss": 0.0008, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 1.3354700854700856e-07, |
|
"loss": 0.0676, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.677350427350428e-08, |
|
"loss": 0.0005, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 0.0, |
|
"loss": 0.0005, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.9837157660991858, |
|
"eval_loss": 0.0756925716996193, |
|
"eval_runtime": 86.0742, |
|
"eval_samples_per_second": 15.696, |
|
"eval_steps_per_second": 2.626, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"step": 8320, |
|
"total_flos": 6.218584863603543e+19, |
|
"train_loss": 0.42279570934445543, |
|
"train_runtime": 6828.4751, |
|
"train_samples_per_second": 7.311, |
|
"train_steps_per_second": 1.218 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.9837157660991858, |
|
"eval_loss": 0.0756925716996193, |
|
"eval_runtime": 86.0371, |
|
"eval_samples_per_second": 15.703, |
|
"eval_steps_per_second": 2.627, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 9.850842475891113, |
|
"eval_runtime": 334.1168, |
|
"eval_samples_per_second": 14.612, |
|
"eval_steps_per_second": 2.436, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.9837157660991858, |
|
"eval_loss": 0.0756925716996193, |
|
"eval_runtime": 85.3939, |
|
"eval_samples_per_second": 15.821, |
|
"eval_steps_per_second": 2.647, |
|
"step": 8320 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 8320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 6.218584863603543e+19, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|