Spaces:
Sleeping
Sleeping
| { | |
| "best_global_step": 2793, | |
| "best_metric": 0.2159090909090909, | |
| "best_model_checkpoint": "./ocr_model_output/checkpoint-2793", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 3675, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06802721088435375, | |
| "grad_norm": 65.23433685302734, | |
| "learning_rate": 4.9877551020408165e-05, | |
| "loss": 8.6208, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1360544217687075, | |
| "grad_norm": 70.44102478027344, | |
| "learning_rate": 4.974149659863946e-05, | |
| "loss": 4.5903, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 35.348358154296875, | |
| "learning_rate": 4.960544217687075e-05, | |
| "loss": 1.9034, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.272108843537415, | |
| "grad_norm": 22.223546981811523, | |
| "learning_rate": 4.9469387755102045e-05, | |
| "loss": 1.3092, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3401360544217687, | |
| "grad_norm": 35.46092987060547, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 1.0594, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 22.021345138549805, | |
| "learning_rate": 4.9197278911564624e-05, | |
| "loss": 1.0721, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 16.65888214111328, | |
| "learning_rate": 4.9061224489795924e-05, | |
| "loss": 0.825, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.54421768707483, | |
| "grad_norm": 13.323294639587402, | |
| "learning_rate": 4.892517006802722e-05, | |
| "loss": 0.8588, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 16.98369598388672, | |
| "learning_rate": 4.87891156462585e-05, | |
| "loss": 0.6796, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6802721088435374, | |
| "grad_norm": 14.879609107971191, | |
| "learning_rate": 4.8653061224489796e-05, | |
| "loss": 0.715, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7482993197278912, | |
| "grad_norm": 13.820905685424805, | |
| "learning_rate": 4.8517006802721096e-05, | |
| "loss": 0.6773, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 14.802565574645996, | |
| "learning_rate": 4.838095238095238e-05, | |
| "loss": 0.6992, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8843537414965986, | |
| "grad_norm": 15.70506763458252, | |
| "learning_rate": 4.8244897959183675e-05, | |
| "loss": 0.7254, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 16.910625457763672, | |
| "learning_rate": 4.810884353741497e-05, | |
| "loss": 0.8015, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_cer": 0.8118686868686869, | |
| "eval_loss": 0.8795642256736755, | |
| "eval_runtime": 3.4405, | |
| "eval_samples_per_second": 85.161, | |
| "eval_steps_per_second": 42.726, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 12.733670234680176, | |
| "learning_rate": 4.797278911564626e-05, | |
| "loss": 0.6511, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.08843537414966, | |
| "grad_norm": 13.70749568939209, | |
| "learning_rate": 4.7836734693877554e-05, | |
| "loss": 0.7377, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.1564625850340136, | |
| "grad_norm": 15.44306468963623, | |
| "learning_rate": 4.770068027210885e-05, | |
| "loss": 0.6797, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 19.31881332397461, | |
| "learning_rate": 4.756462585034014e-05, | |
| "loss": 0.6719, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2925170068027212, | |
| "grad_norm": 12.049680709838867, | |
| "learning_rate": 4.742857142857143e-05, | |
| "loss": 0.6491, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.3605442176870748, | |
| "grad_norm": 14.28982925415039, | |
| "learning_rate": 4.729251700680272e-05, | |
| "loss": 0.7678, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 25.51521110534668, | |
| "learning_rate": 4.715646258503402e-05, | |
| "loss": 0.7534, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.4965986394557822, | |
| "grad_norm": 14.158224105834961, | |
| "learning_rate": 4.7020408163265306e-05, | |
| "loss": 0.5653, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.564625850340136, | |
| "grad_norm": 10.563309669494629, | |
| "learning_rate": 4.68843537414966e-05, | |
| "loss": 0.6038, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 14.436025619506836, | |
| "learning_rate": 4.67482993197279e-05, | |
| "loss": 0.5794, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.7006802721088436, | |
| "grad_norm": 14.293322563171387, | |
| "learning_rate": 4.6612244897959185e-05, | |
| "loss": 0.6102, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.7687074829931972, | |
| "grad_norm": 6.588962078094482, | |
| "learning_rate": 4.647619047619048e-05, | |
| "loss": 0.6853, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.836734693877551, | |
| "grad_norm": 16.505081176757812, | |
| "learning_rate": 4.634013605442177e-05, | |
| "loss": 0.554, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 16.433975219726562, | |
| "learning_rate": 4.6204081632653064e-05, | |
| "loss": 0.5481, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.9727891156462585, | |
| "grad_norm": 14.68355655670166, | |
| "learning_rate": 4.606802721088436e-05, | |
| "loss": 0.4728, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_cer": 0.571969696969697, | |
| "eval_loss": 0.6962071061134338, | |
| "eval_runtime": 3.8738, | |
| "eval_samples_per_second": 75.637, | |
| "eval_steps_per_second": 37.948, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 9.795095443725586, | |
| "learning_rate": 4.593197278911564e-05, | |
| "loss": 0.4635, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.108843537414966, | |
| "grad_norm": 17.107149124145508, | |
| "learning_rate": 4.579591836734694e-05, | |
| "loss": 0.4794, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.17687074829932, | |
| "grad_norm": 11.940792083740234, | |
| "learning_rate": 4.5659863945578236e-05, | |
| "loss": 0.677, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.2448979591836733, | |
| "grad_norm": 8.351872444152832, | |
| "learning_rate": 4.552380952380952e-05, | |
| "loss": 0.6732, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.312925170068027, | |
| "grad_norm": 2.6622140407562256, | |
| "learning_rate": 4.538775510204082e-05, | |
| "loss": 0.3907, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 10.555298805236816, | |
| "learning_rate": 4.5251700680272115e-05, | |
| "loss": 0.5443, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 7.857567310333252, | |
| "learning_rate": 4.51156462585034e-05, | |
| "loss": 0.5688, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.5170068027210886, | |
| "grad_norm": 15.215005874633789, | |
| "learning_rate": 4.4979591836734694e-05, | |
| "loss": 0.5548, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.5850340136054424, | |
| "grad_norm": 8.438752174377441, | |
| "learning_rate": 4.484353741496599e-05, | |
| "loss": 0.3828, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.6530612244897958, | |
| "grad_norm": 9.824787139892578, | |
| "learning_rate": 4.470748299319728e-05, | |
| "loss": 0.3945, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.7210884353741496, | |
| "grad_norm": 13.708135604858398, | |
| "learning_rate": 4.4571428571428574e-05, | |
| "loss": 0.3751, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.7891156462585034, | |
| "grad_norm": 10.32359790802002, | |
| "learning_rate": 4.4435374149659867e-05, | |
| "loss": 0.4604, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 4.256906509399414, | |
| "learning_rate": 4.429931972789116e-05, | |
| "loss": 0.4558, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.925170068027211, | |
| "grad_norm": 12.131647109985352, | |
| "learning_rate": 4.416326530612245e-05, | |
| "loss": 0.4292, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.9931972789115644, | |
| "grad_norm": 14.156941413879395, | |
| "learning_rate": 4.4027210884353746e-05, | |
| "loss": 0.3713, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_cer": 0.5454545454545454, | |
| "eval_loss": 0.5351251363754272, | |
| "eval_runtime": 3.9015, | |
| "eval_samples_per_second": 75.099, | |
| "eval_steps_per_second": 37.678, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 3.061224489795918, | |
| "grad_norm": 5.429965496063232, | |
| "learning_rate": 4.389115646258504e-05, | |
| "loss": 0.6159, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.129251700680272, | |
| "grad_norm": 11.695433616638184, | |
| "learning_rate": 4.3755102040816325e-05, | |
| "loss": 0.4156, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.197278911564626, | |
| "grad_norm": 14.333072662353516, | |
| "learning_rate": 4.361904761904762e-05, | |
| "loss": 0.4372, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.2653061224489797, | |
| "grad_norm": 14.760481834411621, | |
| "learning_rate": 4.348299319727892e-05, | |
| "loss": 0.4279, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 12.052332878112793, | |
| "learning_rate": 4.3346938775510204e-05, | |
| "loss": 0.3854, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.4013605442176873, | |
| "grad_norm": 14.331747055053711, | |
| "learning_rate": 4.32108843537415e-05, | |
| "loss": 0.3839, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.4693877551020407, | |
| "grad_norm": 21.911863327026367, | |
| "learning_rate": 4.307482993197279e-05, | |
| "loss": 0.4815, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.5374149659863945, | |
| "grad_norm": 6.438183307647705, | |
| "learning_rate": 4.293877551020408e-05, | |
| "loss": 0.2597, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.6054421768707483, | |
| "grad_norm": 13.568741798400879, | |
| "learning_rate": 4.2802721088435376e-05, | |
| "loss": 0.3735, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.673469387755102, | |
| "grad_norm": 0.18484297394752502, | |
| "learning_rate": 4.266666666666667e-05, | |
| "loss": 0.3343, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.741496598639456, | |
| "grad_norm": 0.7180817127227783, | |
| "learning_rate": 4.253061224489796e-05, | |
| "loss": 0.1867, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 0.15185348689556122, | |
| "learning_rate": 4.2394557823129255e-05, | |
| "loss": 0.4185, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.877551020408163, | |
| "grad_norm": 10.095551490783691, | |
| "learning_rate": 4.225850340136054e-05, | |
| "loss": 0.1698, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.945578231292517, | |
| "grad_norm": 15.650616645812988, | |
| "learning_rate": 4.212244897959184e-05, | |
| "loss": 0.4307, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_cer": 0.4911616161616162, | |
| "eval_loss": 0.4221580922603607, | |
| "eval_runtime": 3.8592, | |
| "eval_samples_per_second": 75.922, | |
| "eval_steps_per_second": 38.091, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 4.01360544217687, | |
| "grad_norm": 8.345415115356445, | |
| "learning_rate": 4.1986394557823134e-05, | |
| "loss": 0.2111, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.081632653061225, | |
| "grad_norm": 4.688040256500244, | |
| "learning_rate": 4.185034013605442e-05, | |
| "loss": 0.3211, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.149659863945578, | |
| "grad_norm": 19.075292587280273, | |
| "learning_rate": 4.1714285714285714e-05, | |
| "loss": 0.3103, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.217687074829932, | |
| "grad_norm": 7.607833385467529, | |
| "learning_rate": 4.1578231292517014e-05, | |
| "loss": 0.2308, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 8.69675350189209, | |
| "learning_rate": 4.14421768707483e-05, | |
| "loss": 0.2981, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.35374149659864, | |
| "grad_norm": 2.994023323059082, | |
| "learning_rate": 4.130612244897959e-05, | |
| "loss": 0.2922, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.421768707482993, | |
| "grad_norm": 6.535342693328857, | |
| "learning_rate": 4.1170068027210886e-05, | |
| "loss": 0.3441, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.489795918367347, | |
| "grad_norm": 0.3404290974140167, | |
| "learning_rate": 4.103401360544218e-05, | |
| "loss": 0.3523, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.557823129251701, | |
| "grad_norm": 10.70188045501709, | |
| "learning_rate": 4.089795918367347e-05, | |
| "loss": 0.4148, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.625850340136054, | |
| "grad_norm": 14.900300979614258, | |
| "learning_rate": 4.0761904761904765e-05, | |
| "loss": 0.4041, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.6938775510204085, | |
| "grad_norm": 14.758822441101074, | |
| "learning_rate": 4.062585034013606e-05, | |
| "loss": 0.2453, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.7187572717666626, | |
| "learning_rate": 4.048979591836735e-05, | |
| "loss": 0.3703, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.829931972789115, | |
| "grad_norm": 9.603391647338867, | |
| "learning_rate": 4.035374149659864e-05, | |
| "loss": 0.3354, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.8979591836734695, | |
| "grad_norm": 1.358739972114563, | |
| "learning_rate": 4.021768707482994e-05, | |
| "loss": 0.3297, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.965986394557823, | |
| "grad_norm": 0.37522122263908386, | |
| "learning_rate": 4.008163265306122e-05, | |
| "loss": 0.2072, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_cer": 0.5921717171717171, | |
| "eval_loss": 0.358783096075058, | |
| "eval_runtime": 3.5411, | |
| "eval_samples_per_second": 82.742, | |
| "eval_steps_per_second": 41.512, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 5.034013605442177, | |
| "grad_norm": 3.8917222023010254, | |
| "learning_rate": 3.9945578231292516e-05, | |
| "loss": 0.2488, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 5.1020408163265305, | |
| "grad_norm": 16.159465789794922, | |
| "learning_rate": 3.9809523809523816e-05, | |
| "loss": 0.3769, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.170068027210885, | |
| "grad_norm": 17.226072311401367, | |
| "learning_rate": 3.96734693877551e-05, | |
| "loss": 0.2559, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.238095238095238, | |
| "grad_norm": 0.8567410707473755, | |
| "learning_rate": 3.9537414965986396e-05, | |
| "loss": 0.248, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.3061224489795915, | |
| "grad_norm": 1.5087652206420898, | |
| "learning_rate": 3.940136054421769e-05, | |
| "loss": 0.2737, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.374149659863946, | |
| "grad_norm": 3.331455945968628, | |
| "learning_rate": 3.926530612244898e-05, | |
| "loss": 0.1665, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.442176870748299, | |
| "grad_norm": 11.933197021484375, | |
| "learning_rate": 3.9129251700680275e-05, | |
| "loss": 0.322, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.510204081632653, | |
| "grad_norm": 13.032449722290039, | |
| "learning_rate": 3.899319727891156e-05, | |
| "loss": 0.239, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.578231292517007, | |
| "grad_norm": 5.0442047119140625, | |
| "learning_rate": 3.885714285714286e-05, | |
| "loss": 0.157, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.646258503401361, | |
| "grad_norm": 1.6076925992965698, | |
| "learning_rate": 3.8721088435374154e-05, | |
| "loss": 0.1926, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 22.085569381713867, | |
| "learning_rate": 3.858503401360544e-05, | |
| "loss": 0.3699, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.782312925170068, | |
| "grad_norm": 10.831768989562988, | |
| "learning_rate": 3.844897959183674e-05, | |
| "loss": 0.291, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.850340136054422, | |
| "grad_norm": 0.5946142077445984, | |
| "learning_rate": 3.831292517006803e-05, | |
| "loss": 0.2154, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.918367346938775, | |
| "grad_norm": 1.5171136856079102, | |
| "learning_rate": 3.817687074829932e-05, | |
| "loss": 0.212, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.986394557823129, | |
| "grad_norm": 18.592945098876953, | |
| "learning_rate": 3.804081632653061e-05, | |
| "loss": 0.2963, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_cer": 0.5151515151515151, | |
| "eval_loss": 0.2578863501548767, | |
| "eval_runtime": 3.7328, | |
| "eval_samples_per_second": 78.493, | |
| "eval_steps_per_second": 39.38, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 6.054421768707483, | |
| "grad_norm": 0.10681638866662979, | |
| "learning_rate": 3.7904761904761905e-05, | |
| "loss": 0.1761, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 6.122448979591836, | |
| "grad_norm": 7.661993503570557, | |
| "learning_rate": 3.77687074829932e-05, | |
| "loss": 0.2027, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.190476190476191, | |
| "grad_norm": 12.70997428894043, | |
| "learning_rate": 3.763265306122449e-05, | |
| "loss": 0.1828, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.258503401360544, | |
| "grad_norm": 0.06931126117706299, | |
| "learning_rate": 3.7496598639455784e-05, | |
| "loss": 0.1242, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 6.326530612244898, | |
| "grad_norm": 17.21261215209961, | |
| "learning_rate": 3.736054421768708e-05, | |
| "loss": 0.3785, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 6.394557823129252, | |
| "grad_norm": 0.10957188904285431, | |
| "learning_rate": 3.722448979591837e-05, | |
| "loss": 0.1184, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 6.462585034013605, | |
| "grad_norm": 14.104228019714355, | |
| "learning_rate": 3.7088435374149663e-05, | |
| "loss": 0.2253, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.530612244897959, | |
| "grad_norm": 0.2823491394519806, | |
| "learning_rate": 3.6952380952380956e-05, | |
| "loss": 0.1337, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.598639455782313, | |
| "grad_norm": 0.17527176439762115, | |
| "learning_rate": 3.681632653061224e-05, | |
| "loss": 0.1745, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 12.178832054138184, | |
| "learning_rate": 3.6680272108843536e-05, | |
| "loss": 0.3322, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.73469387755102, | |
| "grad_norm": 18.27240753173828, | |
| "learning_rate": 3.6544217687074836e-05, | |
| "loss": 0.245, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.802721088435375, | |
| "grad_norm": 8.510261535644531, | |
| "learning_rate": 3.640816326530612e-05, | |
| "loss": 0.2928, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.870748299319728, | |
| "grad_norm": 8.47603988647461, | |
| "learning_rate": 3.6272108843537415e-05, | |
| "loss": 0.2048, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.938775510204081, | |
| "grad_norm": 9.21681022644043, | |
| "learning_rate": 3.6136054421768715e-05, | |
| "loss": 0.1416, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_cer": 0.4166666666666667, | |
| "eval_loss": 0.1848345547914505, | |
| "eval_runtime": 4.0307, | |
| "eval_samples_per_second": 72.692, | |
| "eval_steps_per_second": 36.47, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 7.006802721088436, | |
| "grad_norm": 8.593953132629395, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.1307, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 7.074829931972789, | |
| "grad_norm": 6.022789001464844, | |
| "learning_rate": 3.5863945578231294e-05, | |
| "loss": 0.1292, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 10.953206062316895, | |
| "learning_rate": 3.572789115646259e-05, | |
| "loss": 0.0853, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 7.210884353741497, | |
| "grad_norm": 5.332366943359375, | |
| "learning_rate": 3.559183673469388e-05, | |
| "loss": 0.112, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 7.27891156462585, | |
| "grad_norm": 12.159287452697754, | |
| "learning_rate": 3.545578231292517e-05, | |
| "loss": 0.1007, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 7.346938775510204, | |
| "grad_norm": 0.3854842483997345, | |
| "learning_rate": 3.531972789115646e-05, | |
| "loss": 0.2318, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.414965986394558, | |
| "grad_norm": 11.151751518249512, | |
| "learning_rate": 3.518367346938776e-05, | |
| "loss": 0.1768, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 7.482993197278912, | |
| "grad_norm": 20.415531158447266, | |
| "learning_rate": 3.504761904761905e-05, | |
| "loss": 0.1839, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 7.551020408163265, | |
| "grad_norm": 0.7234401106834412, | |
| "learning_rate": 3.491156462585034e-05, | |
| "loss": 0.2944, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 7.619047619047619, | |
| "grad_norm": 0.522950291633606, | |
| "learning_rate": 3.477551020408164e-05, | |
| "loss": 0.087, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.687074829931973, | |
| "grad_norm": 8.606940269470215, | |
| "learning_rate": 3.463945578231293e-05, | |
| "loss": 0.1253, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.755102040816326, | |
| "grad_norm": 22.922000885009766, | |
| "learning_rate": 3.450340136054422e-05, | |
| "loss": 0.2799, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.8231292517006805, | |
| "grad_norm": 21.035017013549805, | |
| "learning_rate": 3.436734693877551e-05, | |
| "loss": 0.1441, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.891156462585034, | |
| "grad_norm": 5.825491905212402, | |
| "learning_rate": 3.4231292517006804e-05, | |
| "loss": 0.0218, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.959183673469388, | |
| "grad_norm": 12.801454544067383, | |
| "learning_rate": 3.40952380952381e-05, | |
| "loss": 0.2319, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_cer": 0.42424242424242425, | |
| "eval_loss": 0.13813678920269012, | |
| "eval_runtime": 4.039, | |
| "eval_samples_per_second": 72.542, | |
| "eval_steps_per_second": 36.395, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 8.02721088435374, | |
| "grad_norm": 1.3456509113311768, | |
| "learning_rate": 3.395918367346939e-05, | |
| "loss": 0.1622, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 8.095238095238095, | |
| "grad_norm": 0.21156376600265503, | |
| "learning_rate": 3.382312925170068e-05, | |
| "loss": 0.0447, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 8.16326530612245, | |
| "grad_norm": 0.08529641479253769, | |
| "learning_rate": 3.3687074829931976e-05, | |
| "loss": 0.3241, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.231292517006803, | |
| "grad_norm": 8.225408554077148, | |
| "learning_rate": 3.355102040816327e-05, | |
| "loss": 0.1136, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 8.299319727891156, | |
| "grad_norm": 0.0486108660697937, | |
| "learning_rate": 3.341496598639456e-05, | |
| "loss": 0.2109, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 8.36734693877551, | |
| "grad_norm": 0.3150612413883209, | |
| "learning_rate": 3.3278911564625855e-05, | |
| "loss": 0.0678, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 8.435374149659864, | |
| "grad_norm": 2.7837414741516113, | |
| "learning_rate": 3.314285714285714e-05, | |
| "loss": 0.0582, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 8.503401360544217, | |
| "grad_norm": 0.08137867599725723, | |
| "learning_rate": 3.3006802721088434e-05, | |
| "loss": 0.2023, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 8.571428571428571, | |
| "grad_norm": 0.12031784653663635, | |
| "learning_rate": 3.2870748299319734e-05, | |
| "loss": 0.2334, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.639455782312925, | |
| "grad_norm": 0.7248769998550415, | |
| "learning_rate": 3.273469387755102e-05, | |
| "loss": 0.0562, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 8.70748299319728, | |
| "grad_norm": 9.878806114196777, | |
| "learning_rate": 3.259863945578231e-05, | |
| "loss": 0.1711, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.775510204081632, | |
| "grad_norm": 11.397530555725098, | |
| "learning_rate": 3.2462585034013606e-05, | |
| "loss": 0.2418, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.843537414965986, | |
| "grad_norm": 13.183833122253418, | |
| "learning_rate": 3.23265306122449e-05, | |
| "loss": 0.056, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.91156462585034, | |
| "grad_norm": 0.8917256593704224, | |
| "learning_rate": 3.219047619047619e-05, | |
| "loss": 0.0436, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.979591836734693, | |
| "grad_norm": 7.448833465576172, | |
| "learning_rate": 3.2054421768707485e-05, | |
| "loss": 0.0476, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_cer": 0.37752525252525254, | |
| "eval_loss": 0.11764977127313614, | |
| "eval_runtime": 3.9588, | |
| "eval_samples_per_second": 74.013, | |
| "eval_steps_per_second": 37.133, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 9.047619047619047, | |
| "grad_norm": 7.476083278656006, | |
| "learning_rate": 3.191836734693878e-05, | |
| "loss": 0.098, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 9.115646258503402, | |
| "grad_norm": 0.2827729880809784, | |
| "learning_rate": 3.178231292517007e-05, | |
| "loss": 0.1332, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 9.183673469387756, | |
| "grad_norm": 1.02791166305542, | |
| "learning_rate": 3.164625850340136e-05, | |
| "loss": 0.0211, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 9.251700680272108, | |
| "grad_norm": 14.376386642456055, | |
| "learning_rate": 3.151020408163266e-05, | |
| "loss": 0.1141, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 9.319727891156463, | |
| "grad_norm": 0.9921436309814453, | |
| "learning_rate": 3.137414965986395e-05, | |
| "loss": 0.0931, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 9.387755102040817, | |
| "grad_norm": 0.1799956113100052, | |
| "learning_rate": 3.123809523809524e-05, | |
| "loss": 0.0095, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 9.45578231292517, | |
| "grad_norm": 0.05473727360367775, | |
| "learning_rate": 3.110204081632653e-05, | |
| "loss": 0.1565, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 9.523809523809524, | |
| "grad_norm": 0.5269390344619751, | |
| "learning_rate": 3.096598639455782e-05, | |
| "loss": 0.0674, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 9.591836734693878, | |
| "grad_norm": 2.706407070159912, | |
| "learning_rate": 3.0829931972789116e-05, | |
| "loss": 0.0966, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 9.65986394557823, | |
| "grad_norm": 0.16416242718696594, | |
| "learning_rate": 3.069387755102041e-05, | |
| "loss": 0.0667, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 9.727891156462585, | |
| "grad_norm": 15.035467147827148, | |
| "learning_rate": 3.05578231292517e-05, | |
| "loss": 0.1272, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 9.795918367346939, | |
| "grad_norm": 0.19053949415683746, | |
| "learning_rate": 3.0421768707482995e-05, | |
| "loss": 0.2847, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 9.863945578231293, | |
| "grad_norm": 0.12774477899074554, | |
| "learning_rate": 3.0285714285714288e-05, | |
| "loss": 0.1103, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 9.931972789115646, | |
| "grad_norm": 0.474401593208313, | |
| "learning_rate": 3.0149659863945578e-05, | |
| "loss": 0.0972, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0236662644892931, | |
| "learning_rate": 3.0013605442176874e-05, | |
| "loss": 0.0626, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 0.4305555555555556, | |
| "eval_loss": 0.09693024307489395, | |
| "eval_runtime": 4.1253, | |
| "eval_samples_per_second": 71.025, | |
| "eval_steps_per_second": 35.633, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 10.068027210884354, | |
| "grad_norm": 0.03369349241256714, | |
| "learning_rate": 2.987755102040816e-05, | |
| "loss": 0.0542, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 10.136054421768707, | |
| "grad_norm": 0.2802339792251587, | |
| "learning_rate": 2.9741496598639457e-05, | |
| "loss": 0.1356, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 10.204081632653061, | |
| "grad_norm": 0.48600488901138306, | |
| "learning_rate": 2.960544217687075e-05, | |
| "loss": 0.1529, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.272108843537415, | |
| "grad_norm": 2.2791683673858643, | |
| "learning_rate": 2.946938775510204e-05, | |
| "loss": 0.1232, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 10.34013605442177, | |
| "grad_norm": 6.162140369415283, | |
| "learning_rate": 2.9333333333333336e-05, | |
| "loss": 0.1948, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 10.408163265306122, | |
| "grad_norm": 1.365488886833191, | |
| "learning_rate": 2.919727891156463e-05, | |
| "loss": 0.0417, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 10.476190476190476, | |
| "grad_norm": 14.345423698425293, | |
| "learning_rate": 2.906122448979592e-05, | |
| "loss": 0.0859, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 10.54421768707483, | |
| "grad_norm": 0.1597384810447693, | |
| "learning_rate": 2.892517006802721e-05, | |
| "loss": 0.0376, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 10.612244897959183, | |
| "grad_norm": 0.24928878247737885, | |
| "learning_rate": 2.87891156462585e-05, | |
| "loss": 0.5477, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 10.680272108843537, | |
| "grad_norm": 0.3629794418811798, | |
| "learning_rate": 2.8653061224489798e-05, | |
| "loss": 0.0102, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 10.748299319727892, | |
| "grad_norm": 17.98668098449707, | |
| "learning_rate": 2.851700680272109e-05, | |
| "loss": 0.0821, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 10.816326530612244, | |
| "grad_norm": 5.073668479919434, | |
| "learning_rate": 2.838095238095238e-05, | |
| "loss": 0.053, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 10.884353741496598, | |
| "grad_norm": 12.528962135314941, | |
| "learning_rate": 2.8244897959183673e-05, | |
| "loss": 0.0463, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 10.952380952380953, | |
| "grad_norm": 1.0358648300170898, | |
| "learning_rate": 2.810884353741497e-05, | |
| "loss": 0.0124, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_cer": 0.39646464646464646, | |
| "eval_loss": 0.08722148090600967, | |
| "eval_runtime": 3.8388, | |
| "eval_samples_per_second": 76.326, | |
| "eval_steps_per_second": 38.293, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 11.020408163265307, | |
| "grad_norm": 0.19600285589694977, | |
| "learning_rate": 2.797278911564626e-05, | |
| "loss": 0.118, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 11.08843537414966, | |
| "grad_norm": 26.618637084960938, | |
| "learning_rate": 2.7836734693877553e-05, | |
| "loss": 0.3147, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 11.156462585034014, | |
| "grad_norm": 0.05680645629763603, | |
| "learning_rate": 2.7700680272108842e-05, | |
| "loss": 0.2198, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 11.224489795918368, | |
| "grad_norm": 13.787897109985352, | |
| "learning_rate": 2.7564625850340135e-05, | |
| "loss": 0.2091, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 11.29251700680272, | |
| "grad_norm": 1.339880108833313, | |
| "learning_rate": 2.742857142857143e-05, | |
| "loss": 0.0221, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 11.360544217687075, | |
| "grad_norm": 0.18231110274791718, | |
| "learning_rate": 2.729251700680272e-05, | |
| "loss": 0.0567, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 11.428571428571429, | |
| "grad_norm": 0.15762682259082794, | |
| "learning_rate": 2.7156462585034014e-05, | |
| "loss": 0.0118, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 11.496598639455783, | |
| "grad_norm": 0.19339053332805634, | |
| "learning_rate": 2.702040816326531e-05, | |
| "loss": 0.0082, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 11.564625850340136, | |
| "grad_norm": 11.242050170898438, | |
| "learning_rate": 2.6884353741496597e-05, | |
| "loss": 0.0398, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 11.63265306122449, | |
| "grad_norm": 0.104960598051548, | |
| "learning_rate": 2.6748299319727893e-05, | |
| "loss": 0.0745, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 11.700680272108844, | |
| "grad_norm": 0.4059283137321472, | |
| "learning_rate": 2.6612244897959187e-05, | |
| "loss": 0.0854, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 11.768707482993197, | |
| "grad_norm": 0.11370517313480377, | |
| "learning_rate": 2.6476190476190476e-05, | |
| "loss": 0.084, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 11.83673469387755, | |
| "grad_norm": 6.6930365562438965, | |
| "learning_rate": 2.6340136054421773e-05, | |
| "loss": 0.082, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 11.904761904761905, | |
| "grad_norm": 0.035495854914188385, | |
| "learning_rate": 2.620408163265306e-05, | |
| "loss": 0.0704, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 11.972789115646258, | |
| "grad_norm": 0.19433455169200897, | |
| "learning_rate": 2.6068027210884355e-05, | |
| "loss": 0.0755, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_cer": 0.4090909090909091, | |
| "eval_loss": 0.06354419887065887, | |
| "eval_runtime": 3.8826, | |
| "eval_samples_per_second": 75.466, | |
| "eval_steps_per_second": 37.862, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 12.040816326530612, | |
| "grad_norm": 0.22618860006332397, | |
| "learning_rate": 2.593197278911565e-05, | |
| "loss": 0.0842, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 12.108843537414966, | |
| "grad_norm": 0.2526053786277771, | |
| "learning_rate": 2.5795918367346938e-05, | |
| "loss": 0.0257, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 12.17687074829932, | |
| "grad_norm": 5.3073649406433105, | |
| "learning_rate": 2.5659863945578234e-05, | |
| "loss": 0.0723, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 12.244897959183673, | |
| "grad_norm": 0.07300838083028793, | |
| "learning_rate": 2.5523809523809527e-05, | |
| "loss": 0.0447, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 12.312925170068027, | |
| "grad_norm": 1.9569120407104492, | |
| "learning_rate": 2.5387755102040817e-05, | |
| "loss": 0.0437, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 12.380952380952381, | |
| "grad_norm": 4.616933822631836, | |
| "learning_rate": 2.525170068027211e-05, | |
| "loss": 0.0802, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 12.448979591836734, | |
| "grad_norm": 0.09218256920576096, | |
| "learning_rate": 2.51156462585034e-05, | |
| "loss": 0.0345, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 12.517006802721088, | |
| "grad_norm": 0.10980120301246643, | |
| "learning_rate": 2.4979591836734696e-05, | |
| "loss": 0.0167, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 12.585034013605442, | |
| "grad_norm": 1.9402213096618652, | |
| "learning_rate": 2.4843537414965986e-05, | |
| "loss": 0.0189, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 12.653061224489797, | |
| "grad_norm": 0.28110960125923157, | |
| "learning_rate": 2.4707482993197282e-05, | |
| "loss": 0.0939, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 12.72108843537415, | |
| "grad_norm": 14.07165813446045, | |
| "learning_rate": 2.4571428571428572e-05, | |
| "loss": 0.1164, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 12.789115646258503, | |
| "grad_norm": 0.09300262480974197, | |
| "learning_rate": 2.4435374149659865e-05, | |
| "loss": 0.0511, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 12.857142857142858, | |
| "grad_norm": 0.08766383677721024, | |
| "learning_rate": 2.4299319727891158e-05, | |
| "loss": 0.0821, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 12.92517006802721, | |
| "grad_norm": 4.915824890136719, | |
| "learning_rate": 2.416326530612245e-05, | |
| "loss": 0.0425, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 12.993197278911564, | |
| "grad_norm": 0.25317126512527466, | |
| "learning_rate": 2.4027210884353744e-05, | |
| "loss": 0.048, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_cer": 0.43813131313131315, | |
| "eval_loss": 0.044823333621025085, | |
| "eval_runtime": 3.9907, | |
| "eval_samples_per_second": 73.42, | |
| "eval_steps_per_second": 36.835, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 13.061224489795919, | |
| "grad_norm": 11.498345375061035, | |
| "learning_rate": 2.3891156462585034e-05, | |
| "loss": 0.0573, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 13.129251700680273, | |
| "grad_norm": 9.521465301513672, | |
| "learning_rate": 2.3755102040816327e-05, | |
| "loss": 0.0186, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 13.197278911564625, | |
| "grad_norm": 5.737509250640869, | |
| "learning_rate": 2.361904761904762e-05, | |
| "loss": 0.0189, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 13.26530612244898, | |
| "grad_norm": 0.3337570130825043, | |
| "learning_rate": 2.3482993197278913e-05, | |
| "loss": 0.0249, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 0.21208225190639496, | |
| "learning_rate": 2.3346938775510206e-05, | |
| "loss": 0.0266, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 13.401360544217686, | |
| "grad_norm": 0.034205980598926544, | |
| "learning_rate": 2.3210884353741495e-05, | |
| "loss": 0.0062, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 13.46938775510204, | |
| "grad_norm": 6.113767623901367, | |
| "learning_rate": 2.3074829931972792e-05, | |
| "loss": 0.1134, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 13.537414965986395, | |
| "grad_norm": 1.1116629838943481, | |
| "learning_rate": 2.293877551020408e-05, | |
| "loss": 0.0873, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 13.60544217687075, | |
| "grad_norm": 8.336064338684082, | |
| "learning_rate": 2.2802721088435375e-05, | |
| "loss": 0.0285, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 13.673469387755102, | |
| "grad_norm": 0.6493708491325378, | |
| "learning_rate": 2.2666666666666668e-05, | |
| "loss": 0.0107, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 13.741496598639456, | |
| "grad_norm": 4.806843280792236, | |
| "learning_rate": 2.253061224489796e-05, | |
| "loss": 0.0163, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 13.80952380952381, | |
| "grad_norm": 0.049966610968112946, | |
| "learning_rate": 2.2394557823129254e-05, | |
| "loss": 0.01, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 13.877551020408163, | |
| "grad_norm": 0.1961314082145691, | |
| "learning_rate": 2.2258503401360543e-05, | |
| "loss": 0.107, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 13.945578231292517, | |
| "grad_norm": 0.04836405813694, | |
| "learning_rate": 2.2122448979591836e-05, | |
| "loss": 0.0518, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_cer": 0.37752525252525254, | |
| "eval_loss": 0.04542345553636551, | |
| "eval_runtime": 3.7709, | |
| "eval_samples_per_second": 77.701, | |
| "eval_steps_per_second": 38.983, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 14.013605442176871, | |
| "grad_norm": 0.12112589925527573, | |
| "learning_rate": 2.1986394557823133e-05, | |
| "loss": 0.007, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 14.081632653061224, | |
| "grad_norm": 0.18053178489208221, | |
| "learning_rate": 2.1850340136054422e-05, | |
| "loss": 0.1291, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 14.149659863945578, | |
| "grad_norm": 0.2066822201013565, | |
| "learning_rate": 2.1714285714285715e-05, | |
| "loss": 0.0257, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 14.217687074829932, | |
| "grad_norm": 0.11014904081821442, | |
| "learning_rate": 2.1578231292517005e-05, | |
| "loss": 0.0535, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 14.285714285714286, | |
| "grad_norm": 12.59344482421875, | |
| "learning_rate": 2.14421768707483e-05, | |
| "loss": 0.1287, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 14.353741496598639, | |
| "grad_norm": 0.045586470514535904, | |
| "learning_rate": 2.1306122448979595e-05, | |
| "loss": 0.0348, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 14.421768707482993, | |
| "grad_norm": 0.16031715273857117, | |
| "learning_rate": 2.1170068027210884e-05, | |
| "loss": 0.0664, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 14.489795918367347, | |
| "grad_norm": 2.561408281326294, | |
| "learning_rate": 2.1034013605442177e-05, | |
| "loss": 0.04, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 14.5578231292517, | |
| "grad_norm": 0.2670551538467407, | |
| "learning_rate": 2.089795918367347e-05, | |
| "loss": 0.0092, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 14.625850340136054, | |
| "grad_norm": 0.12540097534656525, | |
| "learning_rate": 2.0761904761904763e-05, | |
| "loss": 0.0111, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 14.693877551020408, | |
| "grad_norm": 1.2808445692062378, | |
| "learning_rate": 2.0625850340136056e-05, | |
| "loss": 0.0595, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 14.761904761904763, | |
| "grad_norm": 0.03977341949939728, | |
| "learning_rate": 2.0489795918367346e-05, | |
| "loss": 0.0437, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 14.829931972789115, | |
| "grad_norm": 0.40472060441970825, | |
| "learning_rate": 2.0353741496598642e-05, | |
| "loss": 0.0268, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 14.89795918367347, | |
| "grad_norm": 0.08766451478004456, | |
| "learning_rate": 2.0217687074829932e-05, | |
| "loss": 0.0072, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 14.965986394557824, | |
| "grad_norm": 0.34755828976631165, | |
| "learning_rate": 2.0081632653061225e-05, | |
| "loss": 0.0482, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_cer": 0.38257575757575757, | |
| "eval_loss": 0.03393391892313957, | |
| "eval_runtime": 3.9624, | |
| "eval_samples_per_second": 73.945, | |
| "eval_steps_per_second": 37.098, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 15.034013605442176, | |
| "grad_norm": 0.07853918522596359, | |
| "learning_rate": 1.9945578231292518e-05, | |
| "loss": 0.0218, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 15.10204081632653, | |
| "grad_norm": 0.08187614381313324, | |
| "learning_rate": 1.980952380952381e-05, | |
| "loss": 0.0059, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 15.170068027210885, | |
| "grad_norm": 0.5369409918785095, | |
| "learning_rate": 1.9673469387755104e-05, | |
| "loss": 0.0105, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 15.238095238095237, | |
| "grad_norm": 0.46228042244911194, | |
| "learning_rate": 1.9537414965986394e-05, | |
| "loss": 0.0028, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 15.306122448979592, | |
| "grad_norm": 0.1829945147037506, | |
| "learning_rate": 1.940136054421769e-05, | |
| "loss": 0.0355, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 15.374149659863946, | |
| "grad_norm": 0.08586379885673523, | |
| "learning_rate": 1.926530612244898e-05, | |
| "loss": 0.0096, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 15.4421768707483, | |
| "grad_norm": 0.13552436232566833, | |
| "learning_rate": 1.9129251700680273e-05, | |
| "loss": 0.1455, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 15.510204081632653, | |
| "grad_norm": 0.3371906876564026, | |
| "learning_rate": 1.8993197278911566e-05, | |
| "loss": 0.0229, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 15.578231292517007, | |
| "grad_norm": 0.03920818492770195, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.0224, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 15.646258503401361, | |
| "grad_norm": 0.8000497817993164, | |
| "learning_rate": 1.8721088435374152e-05, | |
| "loss": 0.0681, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 15.714285714285714, | |
| "grad_norm": 0.15928244590759277, | |
| "learning_rate": 1.8585034013605442e-05, | |
| "loss": 0.0338, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 15.782312925170068, | |
| "grad_norm": 2.3006467819213867, | |
| "learning_rate": 1.8448979591836735e-05, | |
| "loss": 0.0085, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 15.850340136054422, | |
| "grad_norm": 0.3312649726867676, | |
| "learning_rate": 1.8312925170068028e-05, | |
| "loss": 0.0015, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 15.918367346938776, | |
| "grad_norm": 11.605361938476562, | |
| "learning_rate": 1.817687074829932e-05, | |
| "loss": 0.0616, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 15.986394557823129, | |
| "grad_norm": 0.06783591210842133, | |
| "learning_rate": 1.8040816326530614e-05, | |
| "loss": 0.0061, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_cer": 0.36742424242424243, | |
| "eval_loss": 0.01737603358924389, | |
| "eval_runtime": 3.7608, | |
| "eval_samples_per_second": 77.91, | |
| "eval_steps_per_second": 39.088, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 16.05442176870748, | |
| "grad_norm": 0.03763847053050995, | |
| "learning_rate": 1.7904761904761904e-05, | |
| "loss": 0.018, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 16.122448979591837, | |
| "grad_norm": 1.681272268295288, | |
| "learning_rate": 1.77687074829932e-05, | |
| "loss": 0.0158, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 16.19047619047619, | |
| "grad_norm": 0.31114622950553894, | |
| "learning_rate": 1.763265306122449e-05, | |
| "loss": 0.0045, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 16.258503401360546, | |
| "grad_norm": 3.3073906898498535, | |
| "learning_rate": 1.7496598639455783e-05, | |
| "loss": 0.0266, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 16.3265306122449, | |
| "grad_norm": 0.05872774124145508, | |
| "learning_rate": 1.7360544217687076e-05, | |
| "loss": 0.0094, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 16.39455782312925, | |
| "grad_norm": 0.36872556805610657, | |
| "learning_rate": 1.722448979591837e-05, | |
| "loss": 0.0031, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 16.462585034013607, | |
| "grad_norm": 0.18953841924667358, | |
| "learning_rate": 1.7088435374149662e-05, | |
| "loss": 0.0072, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 16.53061224489796, | |
| "grad_norm": 0.053531669080257416, | |
| "learning_rate": 1.695238095238095e-05, | |
| "loss": 0.0214, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 16.598639455782312, | |
| "grad_norm": 0.9419485926628113, | |
| "learning_rate": 1.6816326530612244e-05, | |
| "loss": 0.0311, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 0.10363417118787766, | |
| "learning_rate": 1.668027210884354e-05, | |
| "loss": 0.0145, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 16.73469387755102, | |
| "grad_norm": 11.27441692352295, | |
| "learning_rate": 1.654421768707483e-05, | |
| "loss": 0.0174, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 16.802721088435373, | |
| "grad_norm": 0.07533001154661179, | |
| "learning_rate": 1.6408163265306124e-05, | |
| "loss": 0.0043, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 16.87074829931973, | |
| "grad_norm": 0.09791432321071625, | |
| "learning_rate": 1.6272108843537413e-05, | |
| "loss": 0.0192, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 16.93877551020408, | |
| "grad_norm": 1.773054838180542, | |
| "learning_rate": 1.613605442176871e-05, | |
| "loss": 0.0846, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_cer": 0.22853535353535354, | |
| "eval_loss": 0.016882039606571198, | |
| "eval_runtime": 3.9198, | |
| "eval_samples_per_second": 74.749, | |
| "eval_steps_per_second": 37.502, | |
| "step": 2499 | |
| }, | |
| { | |
| "epoch": 17.006802721088434, | |
| "grad_norm": 0.08635395020246506, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0117, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 17.07482993197279, | |
| "grad_norm": 0.04216604679822922, | |
| "learning_rate": 1.5863945578231292e-05, | |
| "loss": 0.0075, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 17.142857142857142, | |
| "grad_norm": 0.3129735589027405, | |
| "learning_rate": 1.5727891156462585e-05, | |
| "loss": 0.0024, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 17.2108843537415, | |
| "grad_norm": 0.0337909497320652, | |
| "learning_rate": 1.559183673469388e-05, | |
| "loss": 0.0032, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 17.27891156462585, | |
| "grad_norm": 0.3642733097076416, | |
| "learning_rate": 1.545578231292517e-05, | |
| "loss": 0.1344, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 17.346938775510203, | |
| "grad_norm": 0.06059624254703522, | |
| "learning_rate": 1.5319727891156464e-05, | |
| "loss": 0.0039, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 17.41496598639456, | |
| "grad_norm": 0.5465549826622009, | |
| "learning_rate": 1.5183673469387754e-05, | |
| "loss": 0.0395, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 17.482993197278912, | |
| "grad_norm": 0.048258326947689056, | |
| "learning_rate": 1.5047619047619049e-05, | |
| "loss": 0.0226, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 17.551020408163264, | |
| "grad_norm": 0.5764261484146118, | |
| "learning_rate": 1.4911564625850342e-05, | |
| "loss": 0.0348, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 17.61904761904762, | |
| "grad_norm": 1.802079439163208, | |
| "learning_rate": 1.4775510204081633e-05, | |
| "loss": 0.004, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 17.687074829931973, | |
| "grad_norm": 0.03979931399226189, | |
| "learning_rate": 1.4639455782312925e-05, | |
| "loss": 0.004, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 17.755102040816325, | |
| "grad_norm": 0.25388839840888977, | |
| "learning_rate": 1.450340136054422e-05, | |
| "loss": 0.0039, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 17.82312925170068, | |
| "grad_norm": 0.44963565468788147, | |
| "learning_rate": 1.436734693877551e-05, | |
| "loss": 0.006, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 17.891156462585034, | |
| "grad_norm": 0.0887552797794342, | |
| "learning_rate": 1.4231292517006804e-05, | |
| "loss": 0.0042, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 17.959183673469386, | |
| "grad_norm": 0.11289983987808228, | |
| "learning_rate": 1.4095238095238095e-05, | |
| "loss": 0.0034, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_cer": 0.24242424242424243, | |
| "eval_loss": 0.013762996532022953, | |
| "eval_runtime": 3.9761, | |
| "eval_samples_per_second": 73.691, | |
| "eval_steps_per_second": 36.971, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 18.027210884353742, | |
| "grad_norm": 0.06268062442541122, | |
| "learning_rate": 1.395918367346939e-05, | |
| "loss": 0.0031, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 18.095238095238095, | |
| "grad_norm": 0.03095332533121109, | |
| "learning_rate": 1.3823129251700681e-05, | |
| "loss": 0.0356, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 18.163265306122447, | |
| "grad_norm": 0.6670628786087036, | |
| "learning_rate": 1.3687074829931972e-05, | |
| "loss": 0.004, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 18.231292517006803, | |
| "grad_norm": 0.09079564362764359, | |
| "learning_rate": 1.3551020408163265e-05, | |
| "loss": 0.0036, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 18.299319727891156, | |
| "grad_norm": 0.17814789712429047, | |
| "learning_rate": 1.3414965986394558e-05, | |
| "loss": 0.0016, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 18.367346938775512, | |
| "grad_norm": 0.053088486194610596, | |
| "learning_rate": 1.3278911564625852e-05, | |
| "loss": 0.0037, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 18.435374149659864, | |
| "grad_norm": 0.05287722125649452, | |
| "learning_rate": 1.3142857142857143e-05, | |
| "loss": 0.0031, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 18.503401360544217, | |
| "grad_norm": 0.22168047726154327, | |
| "learning_rate": 1.3006802721088434e-05, | |
| "loss": 0.003, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 18.571428571428573, | |
| "grad_norm": 0.2615916430950165, | |
| "learning_rate": 1.2870748299319729e-05, | |
| "loss": 0.0022, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 18.639455782312925, | |
| "grad_norm": 0.04484458267688751, | |
| "learning_rate": 1.273469387755102e-05, | |
| "loss": 0.083, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 18.707482993197278, | |
| "grad_norm": 0.4530847370624542, | |
| "learning_rate": 1.2598639455782313e-05, | |
| "loss": 0.0034, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 18.775510204081634, | |
| "grad_norm": 0.11792109161615372, | |
| "learning_rate": 1.2462585034013606e-05, | |
| "loss": 0.0295, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 18.843537414965986, | |
| "grad_norm": 0.049426767975091934, | |
| "learning_rate": 1.2326530612244898e-05, | |
| "loss": 0.0019, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 18.91156462585034, | |
| "grad_norm": 0.04500193893909454, | |
| "learning_rate": 1.219047619047619e-05, | |
| "loss": 0.0134, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 18.979591836734695, | |
| "grad_norm": 0.14980462193489075, | |
| "learning_rate": 1.2054421768707484e-05, | |
| "loss": 0.0032, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_cer": 0.2159090909090909, | |
| "eval_loss": 0.012968610972166061, | |
| "eval_runtime": 3.6907, | |
| "eval_samples_per_second": 79.389, | |
| "eval_steps_per_second": 39.83, | |
| "step": 2793 | |
| }, | |
| { | |
| "epoch": 19.047619047619047, | |
| "grad_norm": 0.5596031546592712, | |
| "learning_rate": 1.1918367346938777e-05, | |
| "loss": 0.004, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 19.1156462585034, | |
| "grad_norm": 0.09450047463178635, | |
| "learning_rate": 1.178231292517007e-05, | |
| "loss": 0.0176, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 19.183673469387756, | |
| "grad_norm": 0.030270878225564957, | |
| "learning_rate": 1.1646258503401361e-05, | |
| "loss": 0.0031, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 19.25170068027211, | |
| "grad_norm": 0.5096073746681213, | |
| "learning_rate": 1.1510204081632654e-05, | |
| "loss": 0.0231, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 19.31972789115646, | |
| "grad_norm": 0.2736698389053345, | |
| "learning_rate": 1.1374149659863946e-05, | |
| "loss": 0.0016, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 19.387755102040817, | |
| "grad_norm": 0.06535348296165466, | |
| "learning_rate": 1.1238095238095239e-05, | |
| "loss": 0.0013, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 19.45578231292517, | |
| "grad_norm": 0.0859360322356224, | |
| "learning_rate": 1.1102040816326532e-05, | |
| "loss": 0.003, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 19.523809523809526, | |
| "grad_norm": 0.037795525044202805, | |
| "learning_rate": 1.0965986394557825e-05, | |
| "loss": 0.0026, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 19.591836734693878, | |
| "grad_norm": 1.3488638401031494, | |
| "learning_rate": 1.0829931972789116e-05, | |
| "loss": 0.004, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 19.65986394557823, | |
| "grad_norm": 0.05746370553970337, | |
| "learning_rate": 1.0693877551020409e-05, | |
| "loss": 0.0192, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 19.727891156462587, | |
| "grad_norm": 0.025979384779930115, | |
| "learning_rate": 1.05578231292517e-05, | |
| "loss": 0.0091, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.79591836734694, | |
| "grad_norm": 4.972421646118164, | |
| "learning_rate": 1.0421768707482993e-05, | |
| "loss": 0.0082, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 19.86394557823129, | |
| "grad_norm": 0.49525704979896545, | |
| "learning_rate": 1.0285714285714286e-05, | |
| "loss": 0.0034, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 19.931972789115648, | |
| "grad_norm": 0.02950323186814785, | |
| "learning_rate": 1.014965986394558e-05, | |
| "loss": 0.07, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.047243040055036545, | |
| "learning_rate": 1.001360544217687e-05, | |
| "loss": 0.0019, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 0.30176767676767674, | |
| "eval_loss": 0.011288419365882874, | |
| "eval_runtime": 4.0348, | |
| "eval_samples_per_second": 72.618, | |
| "eval_steps_per_second": 36.433, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 20.068027210884352, | |
| "grad_norm": 8.58004093170166, | |
| "learning_rate": 9.877551020408164e-06, | |
| "loss": 0.02, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 20.13605442176871, | |
| "grad_norm": 0.2544482946395874, | |
| "learning_rate": 9.741496598639455e-06, | |
| "loss": 0.0156, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 20.20408163265306, | |
| "grad_norm": 0.5715163350105286, | |
| "learning_rate": 9.60544217687075e-06, | |
| "loss": 0.0027, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 20.272108843537413, | |
| "grad_norm": 0.134610116481781, | |
| "learning_rate": 9.469387755102041e-06, | |
| "loss": 0.0394, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 20.34013605442177, | |
| "grad_norm": 0.23469507694244385, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.0142, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 20.408163265306122, | |
| "grad_norm": 0.19277207553386688, | |
| "learning_rate": 9.197278911564626e-06, | |
| "loss": 0.0214, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 20.476190476190474, | |
| "grad_norm": 0.04216855764389038, | |
| "learning_rate": 9.061224489795919e-06, | |
| "loss": 0.0014, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 20.54421768707483, | |
| "grad_norm": 0.02860959619283676, | |
| "learning_rate": 8.925170068027212e-06, | |
| "loss": 0.0049, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 20.612244897959183, | |
| "grad_norm": 0.38055145740509033, | |
| "learning_rate": 8.789115646258505e-06, | |
| "loss": 0.0193, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 20.68027210884354, | |
| "grad_norm": 0.034134916961193085, | |
| "learning_rate": 8.653061224489796e-06, | |
| "loss": 0.0015, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 20.74829931972789, | |
| "grad_norm": 0.6501132845878601, | |
| "learning_rate": 8.517006802721089e-06, | |
| "loss": 0.003, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 20.816326530612244, | |
| "grad_norm": 0.26927316188812256, | |
| "learning_rate": 8.38095238095238e-06, | |
| "loss": 0.0064, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 20.8843537414966, | |
| "grad_norm": 0.308063805103302, | |
| "learning_rate": 8.244897959183674e-06, | |
| "loss": 0.0171, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 20.952380952380953, | |
| "grad_norm": 0.0912749320268631, | |
| "learning_rate": 8.108843537414967e-06, | |
| "loss": 0.0034, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_cer": 0.23863636363636365, | |
| "eval_loss": 0.009314554743468761, | |
| "eval_runtime": 3.7562, | |
| "eval_samples_per_second": 78.005, | |
| "eval_steps_per_second": 39.135, | |
| "step": 3087 | |
| }, | |
| { | |
| "epoch": 21.020408163265305, | |
| "grad_norm": 0.031063944101333618, | |
| "learning_rate": 7.97278911564626e-06, | |
| "loss": 0.0025, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 21.08843537414966, | |
| "grad_norm": 0.45678919553756714, | |
| "learning_rate": 7.836734693877551e-06, | |
| "loss": 0.0017, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 21.156462585034014, | |
| "grad_norm": 0.06373850256204605, | |
| "learning_rate": 7.700680272108844e-06, | |
| "loss": 0.0203, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 21.224489795918366, | |
| "grad_norm": 0.04051206260919571, | |
| "learning_rate": 7.564625850340136e-06, | |
| "loss": 0.0028, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 21.292517006802722, | |
| "grad_norm": 0.20778831839561462, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 0.0032, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 21.360544217687075, | |
| "grad_norm": 0.23982657492160797, | |
| "learning_rate": 7.292517006802721e-06, | |
| "loss": 0.0097, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 21.428571428571427, | |
| "grad_norm": 0.30359897017478943, | |
| "learning_rate": 7.1564625850340144e-06, | |
| "loss": 0.002, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 21.496598639455783, | |
| "grad_norm": 0.844930112361908, | |
| "learning_rate": 7.020408163265306e-06, | |
| "loss": 0.064, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 21.564625850340136, | |
| "grad_norm": 0.2660425305366516, | |
| "learning_rate": 6.884353741496599e-06, | |
| "loss": 0.011, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 21.632653061224488, | |
| "grad_norm": 0.1279953122138977, | |
| "learning_rate": 6.748299319727891e-06, | |
| "loss": 0.0027, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 21.700680272108844, | |
| "grad_norm": 0.05603710934519768, | |
| "learning_rate": 6.612244897959184e-06, | |
| "loss": 0.0012, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 21.768707482993197, | |
| "grad_norm": 0.05168928578495979, | |
| "learning_rate": 6.476190476190476e-06, | |
| "loss": 0.0176, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 21.836734693877553, | |
| "grad_norm": 0.046198636293411255, | |
| "learning_rate": 6.340136054421769e-06, | |
| "loss": 0.0013, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 21.904761904761905, | |
| "grad_norm": 0.042502377182245255, | |
| "learning_rate": 6.2040816326530614e-06, | |
| "loss": 0.0015, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 21.972789115646258, | |
| "grad_norm": 0.24547749757766724, | |
| "learning_rate": 6.0680272108843545e-06, | |
| "loss": 0.0023, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_cer": 0.23106060606060605, | |
| "eval_loss": 0.009045995771884918, | |
| "eval_runtime": 3.8812, | |
| "eval_samples_per_second": 75.492, | |
| "eval_steps_per_second": 37.875, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 22.040816326530614, | |
| "grad_norm": 1.8699299097061157, | |
| "learning_rate": 5.931972789115647e-06, | |
| "loss": 0.0095, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 22.108843537414966, | |
| "grad_norm": 0.028658084571361542, | |
| "learning_rate": 5.795918367346939e-06, | |
| "loss": 0.002, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 22.17687074829932, | |
| "grad_norm": 0.11185970157384872, | |
| "learning_rate": 5.659863945578232e-06, | |
| "loss": 0.0139, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 22.244897959183675, | |
| "grad_norm": 0.08078885078430176, | |
| "learning_rate": 5.523809523809524e-06, | |
| "loss": 0.0093, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 22.312925170068027, | |
| "grad_norm": 0.033784542232751846, | |
| "learning_rate": 5.387755102040816e-06, | |
| "loss": 0.0035, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 22.38095238095238, | |
| "grad_norm": 0.04999591037631035, | |
| "learning_rate": 5.251700680272109e-06, | |
| "loss": 0.0158, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 22.448979591836736, | |
| "grad_norm": 0.47869572043418884, | |
| "learning_rate": 5.1156462585034015e-06, | |
| "loss": 0.002, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 22.517006802721088, | |
| "grad_norm": 0.5939333438873291, | |
| "learning_rate": 4.9795918367346945e-06, | |
| "loss": 0.0025, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 22.58503401360544, | |
| "grad_norm": 0.02597820572555065, | |
| "learning_rate": 4.843537414965987e-06, | |
| "loss": 0.017, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 22.653061224489797, | |
| "grad_norm": 0.06343343108892441, | |
| "learning_rate": 4.707482993197279e-06, | |
| "loss": 0.0414, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 22.72108843537415, | |
| "grad_norm": 0.40962278842926025, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 0.0114, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 22.7891156462585, | |
| "grad_norm": 0.29765334725379944, | |
| "learning_rate": 4.435374149659864e-06, | |
| "loss": 0.0023, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 22.857142857142858, | |
| "grad_norm": 0.17841386795043945, | |
| "learning_rate": 4.299319727891156e-06, | |
| "loss": 0.0016, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 22.92517006802721, | |
| "grad_norm": 0.44667163491249084, | |
| "learning_rate": 4.163265306122449e-06, | |
| "loss": 0.0021, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 22.993197278911566, | |
| "grad_norm": 0.19756975769996643, | |
| "learning_rate": 4.0272108843537416e-06, | |
| "loss": 0.0073, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_cer": 0.23737373737373738, | |
| "eval_loss": 0.008361349813640118, | |
| "eval_runtime": 3.9088, | |
| "eval_samples_per_second": 74.958, | |
| "eval_steps_per_second": 37.607, | |
| "step": 3381 | |
| }, | |
| { | |
| "epoch": 23.06122448979592, | |
| "grad_norm": 4.8710503578186035, | |
| "learning_rate": 3.891156462585034e-06, | |
| "loss": 0.0383, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 23.12925170068027, | |
| "grad_norm": 0.05327881500124931, | |
| "learning_rate": 3.7551020408163268e-06, | |
| "loss": 0.0026, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 23.197278911564627, | |
| "grad_norm": 0.4828534722328186, | |
| "learning_rate": 3.619047619047619e-06, | |
| "loss": 0.0024, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 23.26530612244898, | |
| "grad_norm": 0.03481818363070488, | |
| "learning_rate": 3.4829931972789116e-06, | |
| "loss": 0.0068, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 23.333333333333332, | |
| "grad_norm": 0.19071730971336365, | |
| "learning_rate": 3.346938775510204e-06, | |
| "loss": 0.0171, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 23.401360544217688, | |
| "grad_norm": 0.08115135878324509, | |
| "learning_rate": 3.210884353741497e-06, | |
| "loss": 0.0015, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 23.46938775510204, | |
| "grad_norm": 0.13966763019561768, | |
| "learning_rate": 3.074829931972789e-06, | |
| "loss": 0.0309, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 23.537414965986393, | |
| "grad_norm": 0.08014482259750366, | |
| "learning_rate": 2.9387755102040816e-06, | |
| "loss": 0.0028, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 23.60544217687075, | |
| "grad_norm": 0.7266091108322144, | |
| "learning_rate": 2.8027210884353742e-06, | |
| "loss": 0.0019, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 23.6734693877551, | |
| "grad_norm": 0.763943076133728, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.0029, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 23.741496598639454, | |
| "grad_norm": 0.20366428792476654, | |
| "learning_rate": 2.5306122448979594e-06, | |
| "loss": 0.0077, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 23.80952380952381, | |
| "grad_norm": 0.06424231082201004, | |
| "learning_rate": 2.394557823129252e-06, | |
| "loss": 0.0048, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 23.877551020408163, | |
| "grad_norm": 0.0471784844994545, | |
| "learning_rate": 2.2585034013605447e-06, | |
| "loss": 0.0027, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 23.94557823129252, | |
| "grad_norm": 0.6746675968170166, | |
| "learning_rate": 2.122448979591837e-06, | |
| "loss": 0.0022, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_cer": 0.2702020202020202, | |
| "eval_loss": 0.008024842478334904, | |
| "eval_runtime": 3.9491, | |
| "eval_samples_per_second": 74.195, | |
| "eval_steps_per_second": 37.224, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 24.01360544217687, | |
| "grad_norm": 0.248748779296875, | |
| "learning_rate": 1.9863945578231295e-06, | |
| "loss": 0.0013, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 24.081632653061224, | |
| "grad_norm": 5.284445762634277, | |
| "learning_rate": 1.8503401360544219e-06, | |
| "loss": 0.0097, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 24.14965986394558, | |
| "grad_norm": 0.07442311942577362, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 0.011, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 24.217687074829932, | |
| "grad_norm": 0.1644800305366516, | |
| "learning_rate": 1.5782312925170069e-06, | |
| "loss": 0.0014, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 24.285714285714285, | |
| "grad_norm": 0.05962975695729256, | |
| "learning_rate": 1.4421768707482995e-06, | |
| "loss": 0.0018, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 24.35374149659864, | |
| "grad_norm": 0.039682451635599136, | |
| "learning_rate": 1.306122448979592e-06, | |
| "loss": 0.0025, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 24.421768707482993, | |
| "grad_norm": 0.0817071795463562, | |
| "learning_rate": 1.1700680272108845e-06, | |
| "loss": 0.0032, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 24.489795918367346, | |
| "grad_norm": 0.07747264206409454, | |
| "learning_rate": 1.034013605442177e-06, | |
| "loss": 0.015, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 24.5578231292517, | |
| "grad_norm": 0.04547140747308731, | |
| "learning_rate": 8.979591836734694e-07, | |
| "loss": 0.0015, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 24.625850340136054, | |
| "grad_norm": 0.04727374389767647, | |
| "learning_rate": 7.619047619047619e-07, | |
| "loss": 0.0023, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 24.693877551020407, | |
| "grad_norm": 0.14271779358386993, | |
| "learning_rate": 6.258503401360544e-07, | |
| "loss": 0.0025, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 24.761904761904763, | |
| "grad_norm": 0.4749351441860199, | |
| "learning_rate": 4.897959183673469e-07, | |
| "loss": 0.0026, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 24.829931972789115, | |
| "grad_norm": 10.92783260345459, | |
| "learning_rate": 3.537414965986395e-07, | |
| "loss": 0.0217, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 24.897959183673468, | |
| "grad_norm": 0.14225248992443085, | |
| "learning_rate": 2.1768707482993197e-07, | |
| "loss": 0.0016, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 24.965986394557824, | |
| "grad_norm": 0.06080883741378784, | |
| "learning_rate": 8.16326530612245e-08, | |
| "loss": 0.0391, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_cer": 0.26515151515151514, | |
| "eval_loss": 0.007971594110131264, | |
| "eval_runtime": 3.9963, | |
| "eval_samples_per_second": 73.317, | |
| "eval_steps_per_second": 36.784, | |
| "step": 3675 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3675, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.759922930951168e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |