PauloFN's picture
first
6a6918c
raw
history blame
70.5 kB
{
"best_global_step": 2793,
"best_metric": 0.2159090909090909,
"best_model_checkpoint": "./ocr_model_output/checkpoint-2793",
"epoch": 25.0,
"eval_steps": 500,
"global_step": 3675,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06802721088435375,
"grad_norm": 65.23433685302734,
"learning_rate": 4.9877551020408165e-05,
"loss": 8.6208,
"step": 10
},
{
"epoch": 0.1360544217687075,
"grad_norm": 70.44102478027344,
"learning_rate": 4.974149659863946e-05,
"loss": 4.5903,
"step": 20
},
{
"epoch": 0.20408163265306123,
"grad_norm": 35.348358154296875,
"learning_rate": 4.960544217687075e-05,
"loss": 1.9034,
"step": 30
},
{
"epoch": 0.272108843537415,
"grad_norm": 22.223546981811523,
"learning_rate": 4.9469387755102045e-05,
"loss": 1.3092,
"step": 40
},
{
"epoch": 0.3401360544217687,
"grad_norm": 35.46092987060547,
"learning_rate": 4.933333333333334e-05,
"loss": 1.0594,
"step": 50
},
{
"epoch": 0.40816326530612246,
"grad_norm": 22.021345138549805,
"learning_rate": 4.9197278911564624e-05,
"loss": 1.0721,
"step": 60
},
{
"epoch": 0.47619047619047616,
"grad_norm": 16.65888214111328,
"learning_rate": 4.9061224489795924e-05,
"loss": 0.825,
"step": 70
},
{
"epoch": 0.54421768707483,
"grad_norm": 13.323294639587402,
"learning_rate": 4.892517006802722e-05,
"loss": 0.8588,
"step": 80
},
{
"epoch": 0.6122448979591837,
"grad_norm": 16.98369598388672,
"learning_rate": 4.87891156462585e-05,
"loss": 0.6796,
"step": 90
},
{
"epoch": 0.6802721088435374,
"grad_norm": 14.879609107971191,
"learning_rate": 4.8653061224489796e-05,
"loss": 0.715,
"step": 100
},
{
"epoch": 0.7482993197278912,
"grad_norm": 13.820905685424805,
"learning_rate": 4.8517006802721096e-05,
"loss": 0.6773,
"step": 110
},
{
"epoch": 0.8163265306122449,
"grad_norm": 14.802565574645996,
"learning_rate": 4.838095238095238e-05,
"loss": 0.6992,
"step": 120
},
{
"epoch": 0.8843537414965986,
"grad_norm": 15.70506763458252,
"learning_rate": 4.8244897959183675e-05,
"loss": 0.7254,
"step": 130
},
{
"epoch": 0.9523809523809523,
"grad_norm": 16.910625457763672,
"learning_rate": 4.810884353741497e-05,
"loss": 0.8015,
"step": 140
},
{
"epoch": 1.0,
"eval_cer": 0.8118686868686869,
"eval_loss": 0.8795642256736755,
"eval_runtime": 3.4405,
"eval_samples_per_second": 85.161,
"eval_steps_per_second": 42.726,
"step": 147
},
{
"epoch": 1.0204081632653061,
"grad_norm": 12.733670234680176,
"learning_rate": 4.797278911564626e-05,
"loss": 0.6511,
"step": 150
},
{
"epoch": 1.08843537414966,
"grad_norm": 13.70749568939209,
"learning_rate": 4.7836734693877554e-05,
"loss": 0.7377,
"step": 160
},
{
"epoch": 1.1564625850340136,
"grad_norm": 15.44306468963623,
"learning_rate": 4.770068027210885e-05,
"loss": 0.6797,
"step": 170
},
{
"epoch": 1.2244897959183674,
"grad_norm": 19.31881332397461,
"learning_rate": 4.756462585034014e-05,
"loss": 0.6719,
"step": 180
},
{
"epoch": 1.2925170068027212,
"grad_norm": 12.049680709838867,
"learning_rate": 4.742857142857143e-05,
"loss": 0.6491,
"step": 190
},
{
"epoch": 1.3605442176870748,
"grad_norm": 14.28982925415039,
"learning_rate": 4.729251700680272e-05,
"loss": 0.7678,
"step": 200
},
{
"epoch": 1.4285714285714286,
"grad_norm": 25.51521110534668,
"learning_rate": 4.715646258503402e-05,
"loss": 0.7534,
"step": 210
},
{
"epoch": 1.4965986394557822,
"grad_norm": 14.158224105834961,
"learning_rate": 4.7020408163265306e-05,
"loss": 0.5653,
"step": 220
},
{
"epoch": 1.564625850340136,
"grad_norm": 10.563309669494629,
"learning_rate": 4.68843537414966e-05,
"loss": 0.6038,
"step": 230
},
{
"epoch": 1.6326530612244898,
"grad_norm": 14.436025619506836,
"learning_rate": 4.67482993197279e-05,
"loss": 0.5794,
"step": 240
},
{
"epoch": 1.7006802721088436,
"grad_norm": 14.293322563171387,
"learning_rate": 4.6612244897959185e-05,
"loss": 0.6102,
"step": 250
},
{
"epoch": 1.7687074829931972,
"grad_norm": 6.588962078094482,
"learning_rate": 4.647619047619048e-05,
"loss": 0.6853,
"step": 260
},
{
"epoch": 1.836734693877551,
"grad_norm": 16.505081176757812,
"learning_rate": 4.634013605442177e-05,
"loss": 0.554,
"step": 270
},
{
"epoch": 1.9047619047619047,
"grad_norm": 16.433975219726562,
"learning_rate": 4.6204081632653064e-05,
"loss": 0.5481,
"step": 280
},
{
"epoch": 1.9727891156462585,
"grad_norm": 14.68355655670166,
"learning_rate": 4.606802721088436e-05,
"loss": 0.4728,
"step": 290
},
{
"epoch": 2.0,
"eval_cer": 0.571969696969697,
"eval_loss": 0.6962071061134338,
"eval_runtime": 3.8738,
"eval_samples_per_second": 75.637,
"eval_steps_per_second": 37.948,
"step": 294
},
{
"epoch": 2.0408163265306123,
"grad_norm": 9.795095443725586,
"learning_rate": 4.593197278911564e-05,
"loss": 0.4635,
"step": 300
},
{
"epoch": 2.108843537414966,
"grad_norm": 17.107149124145508,
"learning_rate": 4.579591836734694e-05,
"loss": 0.4794,
"step": 310
},
{
"epoch": 2.17687074829932,
"grad_norm": 11.940792083740234,
"learning_rate": 4.5659863945578236e-05,
"loss": 0.677,
"step": 320
},
{
"epoch": 2.2448979591836733,
"grad_norm": 8.351872444152832,
"learning_rate": 4.552380952380952e-05,
"loss": 0.6732,
"step": 330
},
{
"epoch": 2.312925170068027,
"grad_norm": 2.6622140407562256,
"learning_rate": 4.538775510204082e-05,
"loss": 0.3907,
"step": 340
},
{
"epoch": 2.380952380952381,
"grad_norm": 10.555298805236816,
"learning_rate": 4.5251700680272115e-05,
"loss": 0.5443,
"step": 350
},
{
"epoch": 2.4489795918367347,
"grad_norm": 7.857567310333252,
"learning_rate": 4.51156462585034e-05,
"loss": 0.5688,
"step": 360
},
{
"epoch": 2.5170068027210886,
"grad_norm": 15.215005874633789,
"learning_rate": 4.4979591836734694e-05,
"loss": 0.5548,
"step": 370
},
{
"epoch": 2.5850340136054424,
"grad_norm": 8.438752174377441,
"learning_rate": 4.484353741496599e-05,
"loss": 0.3828,
"step": 380
},
{
"epoch": 2.6530612244897958,
"grad_norm": 9.824787139892578,
"learning_rate": 4.470748299319728e-05,
"loss": 0.3945,
"step": 390
},
{
"epoch": 2.7210884353741496,
"grad_norm": 13.708135604858398,
"learning_rate": 4.4571428571428574e-05,
"loss": 0.3751,
"step": 400
},
{
"epoch": 2.7891156462585034,
"grad_norm": 10.32359790802002,
"learning_rate": 4.4435374149659867e-05,
"loss": 0.4604,
"step": 410
},
{
"epoch": 2.857142857142857,
"grad_norm": 4.256906509399414,
"learning_rate": 4.429931972789116e-05,
"loss": 0.4558,
"step": 420
},
{
"epoch": 2.925170068027211,
"grad_norm": 12.131647109985352,
"learning_rate": 4.416326530612245e-05,
"loss": 0.4292,
"step": 430
},
{
"epoch": 2.9931972789115644,
"grad_norm": 14.156941413879395,
"learning_rate": 4.4027210884353746e-05,
"loss": 0.3713,
"step": 440
},
{
"epoch": 3.0,
"eval_cer": 0.5454545454545454,
"eval_loss": 0.5351251363754272,
"eval_runtime": 3.9015,
"eval_samples_per_second": 75.099,
"eval_steps_per_second": 37.678,
"step": 441
},
{
"epoch": 3.061224489795918,
"grad_norm": 5.429965496063232,
"learning_rate": 4.389115646258504e-05,
"loss": 0.6159,
"step": 450
},
{
"epoch": 3.129251700680272,
"grad_norm": 11.695433616638184,
"learning_rate": 4.3755102040816325e-05,
"loss": 0.4156,
"step": 460
},
{
"epoch": 3.197278911564626,
"grad_norm": 14.333072662353516,
"learning_rate": 4.361904761904762e-05,
"loss": 0.4372,
"step": 470
},
{
"epoch": 3.2653061224489797,
"grad_norm": 14.760481834411621,
"learning_rate": 4.348299319727892e-05,
"loss": 0.4279,
"step": 480
},
{
"epoch": 3.3333333333333335,
"grad_norm": 12.052332878112793,
"learning_rate": 4.3346938775510204e-05,
"loss": 0.3854,
"step": 490
},
{
"epoch": 3.4013605442176873,
"grad_norm": 14.331747055053711,
"learning_rate": 4.32108843537415e-05,
"loss": 0.3839,
"step": 500
},
{
"epoch": 3.4693877551020407,
"grad_norm": 21.911863327026367,
"learning_rate": 4.307482993197279e-05,
"loss": 0.4815,
"step": 510
},
{
"epoch": 3.5374149659863945,
"grad_norm": 6.438183307647705,
"learning_rate": 4.293877551020408e-05,
"loss": 0.2597,
"step": 520
},
{
"epoch": 3.6054421768707483,
"grad_norm": 13.568741798400879,
"learning_rate": 4.2802721088435376e-05,
"loss": 0.3735,
"step": 530
},
{
"epoch": 3.673469387755102,
"grad_norm": 0.18484297394752502,
"learning_rate": 4.266666666666667e-05,
"loss": 0.3343,
"step": 540
},
{
"epoch": 3.741496598639456,
"grad_norm": 0.7180817127227783,
"learning_rate": 4.253061224489796e-05,
"loss": 0.1867,
"step": 550
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.15185348689556122,
"learning_rate": 4.2394557823129255e-05,
"loss": 0.4185,
"step": 560
},
{
"epoch": 3.877551020408163,
"grad_norm": 10.095551490783691,
"learning_rate": 4.225850340136054e-05,
"loss": 0.1698,
"step": 570
},
{
"epoch": 3.945578231292517,
"grad_norm": 15.650616645812988,
"learning_rate": 4.212244897959184e-05,
"loss": 0.4307,
"step": 580
},
{
"epoch": 4.0,
"eval_cer": 0.4911616161616162,
"eval_loss": 0.4221580922603607,
"eval_runtime": 3.8592,
"eval_samples_per_second": 75.922,
"eval_steps_per_second": 38.091,
"step": 588
},
{
"epoch": 4.01360544217687,
"grad_norm": 8.345415115356445,
"learning_rate": 4.1986394557823134e-05,
"loss": 0.2111,
"step": 590
},
{
"epoch": 4.081632653061225,
"grad_norm": 4.688040256500244,
"learning_rate": 4.185034013605442e-05,
"loss": 0.3211,
"step": 600
},
{
"epoch": 4.149659863945578,
"grad_norm": 19.075292587280273,
"learning_rate": 4.1714285714285714e-05,
"loss": 0.3103,
"step": 610
},
{
"epoch": 4.217687074829932,
"grad_norm": 7.607833385467529,
"learning_rate": 4.1578231292517014e-05,
"loss": 0.2308,
"step": 620
},
{
"epoch": 4.285714285714286,
"grad_norm": 8.69675350189209,
"learning_rate": 4.14421768707483e-05,
"loss": 0.2981,
"step": 630
},
{
"epoch": 4.35374149659864,
"grad_norm": 2.994023323059082,
"learning_rate": 4.130612244897959e-05,
"loss": 0.2922,
"step": 640
},
{
"epoch": 4.421768707482993,
"grad_norm": 6.535342693328857,
"learning_rate": 4.1170068027210886e-05,
"loss": 0.3441,
"step": 650
},
{
"epoch": 4.489795918367347,
"grad_norm": 0.3404290974140167,
"learning_rate": 4.103401360544218e-05,
"loss": 0.3523,
"step": 660
},
{
"epoch": 4.557823129251701,
"grad_norm": 10.70188045501709,
"learning_rate": 4.089795918367347e-05,
"loss": 0.4148,
"step": 670
},
{
"epoch": 4.625850340136054,
"grad_norm": 14.900300979614258,
"learning_rate": 4.0761904761904765e-05,
"loss": 0.4041,
"step": 680
},
{
"epoch": 4.6938775510204085,
"grad_norm": 14.758822441101074,
"learning_rate": 4.062585034013606e-05,
"loss": 0.2453,
"step": 690
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.7187572717666626,
"learning_rate": 4.048979591836735e-05,
"loss": 0.3703,
"step": 700
},
{
"epoch": 4.829931972789115,
"grad_norm": 9.603391647338867,
"learning_rate": 4.035374149659864e-05,
"loss": 0.3354,
"step": 710
},
{
"epoch": 4.8979591836734695,
"grad_norm": 1.358739972114563,
"learning_rate": 4.021768707482994e-05,
"loss": 0.3297,
"step": 720
},
{
"epoch": 4.965986394557823,
"grad_norm": 0.37522122263908386,
"learning_rate": 4.008163265306122e-05,
"loss": 0.2072,
"step": 730
},
{
"epoch": 5.0,
"eval_cer": 0.5921717171717171,
"eval_loss": 0.358783096075058,
"eval_runtime": 3.5411,
"eval_samples_per_second": 82.742,
"eval_steps_per_second": 41.512,
"step": 735
},
{
"epoch": 5.034013605442177,
"grad_norm": 3.8917222023010254,
"learning_rate": 3.9945578231292516e-05,
"loss": 0.2488,
"step": 740
},
{
"epoch": 5.1020408163265305,
"grad_norm": 16.159465789794922,
"learning_rate": 3.9809523809523816e-05,
"loss": 0.3769,
"step": 750
},
{
"epoch": 5.170068027210885,
"grad_norm": 17.226072311401367,
"learning_rate": 3.96734693877551e-05,
"loss": 0.2559,
"step": 760
},
{
"epoch": 5.238095238095238,
"grad_norm": 0.8567410707473755,
"learning_rate": 3.9537414965986396e-05,
"loss": 0.248,
"step": 770
},
{
"epoch": 5.3061224489795915,
"grad_norm": 1.5087652206420898,
"learning_rate": 3.940136054421769e-05,
"loss": 0.2737,
"step": 780
},
{
"epoch": 5.374149659863946,
"grad_norm": 3.331455945968628,
"learning_rate": 3.926530612244898e-05,
"loss": 0.1665,
"step": 790
},
{
"epoch": 5.442176870748299,
"grad_norm": 11.933197021484375,
"learning_rate": 3.9129251700680275e-05,
"loss": 0.322,
"step": 800
},
{
"epoch": 5.510204081632653,
"grad_norm": 13.032449722290039,
"learning_rate": 3.899319727891156e-05,
"loss": 0.239,
"step": 810
},
{
"epoch": 5.578231292517007,
"grad_norm": 5.0442047119140625,
"learning_rate": 3.885714285714286e-05,
"loss": 0.157,
"step": 820
},
{
"epoch": 5.646258503401361,
"grad_norm": 1.6076925992965698,
"learning_rate": 3.8721088435374154e-05,
"loss": 0.1926,
"step": 830
},
{
"epoch": 5.714285714285714,
"grad_norm": 22.085569381713867,
"learning_rate": 3.858503401360544e-05,
"loss": 0.3699,
"step": 840
},
{
"epoch": 5.782312925170068,
"grad_norm": 10.831768989562988,
"learning_rate": 3.844897959183674e-05,
"loss": 0.291,
"step": 850
},
{
"epoch": 5.850340136054422,
"grad_norm": 0.5946142077445984,
"learning_rate": 3.831292517006803e-05,
"loss": 0.2154,
"step": 860
},
{
"epoch": 5.918367346938775,
"grad_norm": 1.5171136856079102,
"learning_rate": 3.817687074829932e-05,
"loss": 0.212,
"step": 870
},
{
"epoch": 5.986394557823129,
"grad_norm": 18.592945098876953,
"learning_rate": 3.804081632653061e-05,
"loss": 0.2963,
"step": 880
},
{
"epoch": 6.0,
"eval_cer": 0.5151515151515151,
"eval_loss": 0.2578863501548767,
"eval_runtime": 3.7328,
"eval_samples_per_second": 78.493,
"eval_steps_per_second": 39.38,
"step": 882
},
{
"epoch": 6.054421768707483,
"grad_norm": 0.10681638866662979,
"learning_rate": 3.7904761904761905e-05,
"loss": 0.1761,
"step": 890
},
{
"epoch": 6.122448979591836,
"grad_norm": 7.661993503570557,
"learning_rate": 3.77687074829932e-05,
"loss": 0.2027,
"step": 900
},
{
"epoch": 6.190476190476191,
"grad_norm": 12.70997428894043,
"learning_rate": 3.763265306122449e-05,
"loss": 0.1828,
"step": 910
},
{
"epoch": 6.258503401360544,
"grad_norm": 0.06931126117706299,
"learning_rate": 3.7496598639455784e-05,
"loss": 0.1242,
"step": 920
},
{
"epoch": 6.326530612244898,
"grad_norm": 17.21261215209961,
"learning_rate": 3.736054421768708e-05,
"loss": 0.3785,
"step": 930
},
{
"epoch": 6.394557823129252,
"grad_norm": 0.10957188904285431,
"learning_rate": 3.722448979591837e-05,
"loss": 0.1184,
"step": 940
},
{
"epoch": 6.462585034013605,
"grad_norm": 14.104228019714355,
"learning_rate": 3.7088435374149663e-05,
"loss": 0.2253,
"step": 950
},
{
"epoch": 6.530612244897959,
"grad_norm": 0.2823491394519806,
"learning_rate": 3.6952380952380956e-05,
"loss": 0.1337,
"step": 960
},
{
"epoch": 6.598639455782313,
"grad_norm": 0.17527176439762115,
"learning_rate": 3.681632653061224e-05,
"loss": 0.1745,
"step": 970
},
{
"epoch": 6.666666666666667,
"grad_norm": 12.178832054138184,
"learning_rate": 3.6680272108843536e-05,
"loss": 0.3322,
"step": 980
},
{
"epoch": 6.73469387755102,
"grad_norm": 18.27240753173828,
"learning_rate": 3.6544217687074836e-05,
"loss": 0.245,
"step": 990
},
{
"epoch": 6.802721088435375,
"grad_norm": 8.510261535644531,
"learning_rate": 3.640816326530612e-05,
"loss": 0.2928,
"step": 1000
},
{
"epoch": 6.870748299319728,
"grad_norm": 8.47603988647461,
"learning_rate": 3.6272108843537415e-05,
"loss": 0.2048,
"step": 1010
},
{
"epoch": 6.938775510204081,
"grad_norm": 9.21681022644043,
"learning_rate": 3.6136054421768715e-05,
"loss": 0.1416,
"step": 1020
},
{
"epoch": 7.0,
"eval_cer": 0.4166666666666667,
"eval_loss": 0.1848345547914505,
"eval_runtime": 4.0307,
"eval_samples_per_second": 72.692,
"eval_steps_per_second": 36.47,
"step": 1029
},
{
"epoch": 7.006802721088436,
"grad_norm": 8.593953132629395,
"learning_rate": 3.6e-05,
"loss": 0.1307,
"step": 1030
},
{
"epoch": 7.074829931972789,
"grad_norm": 6.022789001464844,
"learning_rate": 3.5863945578231294e-05,
"loss": 0.1292,
"step": 1040
},
{
"epoch": 7.142857142857143,
"grad_norm": 10.953206062316895,
"learning_rate": 3.572789115646259e-05,
"loss": 0.0853,
"step": 1050
},
{
"epoch": 7.210884353741497,
"grad_norm": 5.332366943359375,
"learning_rate": 3.559183673469388e-05,
"loss": 0.112,
"step": 1060
},
{
"epoch": 7.27891156462585,
"grad_norm": 12.159287452697754,
"learning_rate": 3.545578231292517e-05,
"loss": 0.1007,
"step": 1070
},
{
"epoch": 7.346938775510204,
"grad_norm": 0.3854842483997345,
"learning_rate": 3.531972789115646e-05,
"loss": 0.2318,
"step": 1080
},
{
"epoch": 7.414965986394558,
"grad_norm": 11.151751518249512,
"learning_rate": 3.518367346938776e-05,
"loss": 0.1768,
"step": 1090
},
{
"epoch": 7.482993197278912,
"grad_norm": 20.415531158447266,
"learning_rate": 3.504761904761905e-05,
"loss": 0.1839,
"step": 1100
},
{
"epoch": 7.551020408163265,
"grad_norm": 0.7234401106834412,
"learning_rate": 3.491156462585034e-05,
"loss": 0.2944,
"step": 1110
},
{
"epoch": 7.619047619047619,
"grad_norm": 0.522950291633606,
"learning_rate": 3.477551020408164e-05,
"loss": 0.087,
"step": 1120
},
{
"epoch": 7.687074829931973,
"grad_norm": 8.606940269470215,
"learning_rate": 3.463945578231293e-05,
"loss": 0.1253,
"step": 1130
},
{
"epoch": 7.755102040816326,
"grad_norm": 22.922000885009766,
"learning_rate": 3.450340136054422e-05,
"loss": 0.2799,
"step": 1140
},
{
"epoch": 7.8231292517006805,
"grad_norm": 21.035017013549805,
"learning_rate": 3.436734693877551e-05,
"loss": 0.1441,
"step": 1150
},
{
"epoch": 7.891156462585034,
"grad_norm": 5.825491905212402,
"learning_rate": 3.4231292517006804e-05,
"loss": 0.0218,
"step": 1160
},
{
"epoch": 7.959183673469388,
"grad_norm": 12.801454544067383,
"learning_rate": 3.40952380952381e-05,
"loss": 0.2319,
"step": 1170
},
{
"epoch": 8.0,
"eval_cer": 0.42424242424242425,
"eval_loss": 0.13813678920269012,
"eval_runtime": 4.039,
"eval_samples_per_second": 72.542,
"eval_steps_per_second": 36.395,
"step": 1176
},
{
"epoch": 8.02721088435374,
"grad_norm": 1.3456509113311768,
"learning_rate": 3.395918367346939e-05,
"loss": 0.1622,
"step": 1180
},
{
"epoch": 8.095238095238095,
"grad_norm": 0.21156376600265503,
"learning_rate": 3.382312925170068e-05,
"loss": 0.0447,
"step": 1190
},
{
"epoch": 8.16326530612245,
"grad_norm": 0.08529641479253769,
"learning_rate": 3.3687074829931976e-05,
"loss": 0.3241,
"step": 1200
},
{
"epoch": 8.231292517006803,
"grad_norm": 8.225408554077148,
"learning_rate": 3.355102040816327e-05,
"loss": 0.1136,
"step": 1210
},
{
"epoch": 8.299319727891156,
"grad_norm": 0.0486108660697937,
"learning_rate": 3.341496598639456e-05,
"loss": 0.2109,
"step": 1220
},
{
"epoch": 8.36734693877551,
"grad_norm": 0.3150612413883209,
"learning_rate": 3.3278911564625855e-05,
"loss": 0.0678,
"step": 1230
},
{
"epoch": 8.435374149659864,
"grad_norm": 2.7837414741516113,
"learning_rate": 3.314285714285714e-05,
"loss": 0.0582,
"step": 1240
},
{
"epoch": 8.503401360544217,
"grad_norm": 0.08137867599725723,
"learning_rate": 3.3006802721088434e-05,
"loss": 0.2023,
"step": 1250
},
{
"epoch": 8.571428571428571,
"grad_norm": 0.12031784653663635,
"learning_rate": 3.2870748299319734e-05,
"loss": 0.2334,
"step": 1260
},
{
"epoch": 8.639455782312925,
"grad_norm": 0.7248769998550415,
"learning_rate": 3.273469387755102e-05,
"loss": 0.0562,
"step": 1270
},
{
"epoch": 8.70748299319728,
"grad_norm": 9.878806114196777,
"learning_rate": 3.259863945578231e-05,
"loss": 0.1711,
"step": 1280
},
{
"epoch": 8.775510204081632,
"grad_norm": 11.397530555725098,
"learning_rate": 3.2462585034013606e-05,
"loss": 0.2418,
"step": 1290
},
{
"epoch": 8.843537414965986,
"grad_norm": 13.183833122253418,
"learning_rate": 3.23265306122449e-05,
"loss": 0.056,
"step": 1300
},
{
"epoch": 8.91156462585034,
"grad_norm": 0.8917256593704224,
"learning_rate": 3.219047619047619e-05,
"loss": 0.0436,
"step": 1310
},
{
"epoch": 8.979591836734693,
"grad_norm": 7.448833465576172,
"learning_rate": 3.2054421768707485e-05,
"loss": 0.0476,
"step": 1320
},
{
"epoch": 9.0,
"eval_cer": 0.37752525252525254,
"eval_loss": 0.11764977127313614,
"eval_runtime": 3.9588,
"eval_samples_per_second": 74.013,
"eval_steps_per_second": 37.133,
"step": 1323
},
{
"epoch": 9.047619047619047,
"grad_norm": 7.476083278656006,
"learning_rate": 3.191836734693878e-05,
"loss": 0.098,
"step": 1330
},
{
"epoch": 9.115646258503402,
"grad_norm": 0.2827729880809784,
"learning_rate": 3.178231292517007e-05,
"loss": 0.1332,
"step": 1340
},
{
"epoch": 9.183673469387756,
"grad_norm": 1.02791166305542,
"learning_rate": 3.164625850340136e-05,
"loss": 0.0211,
"step": 1350
},
{
"epoch": 9.251700680272108,
"grad_norm": 14.376386642456055,
"learning_rate": 3.151020408163266e-05,
"loss": 0.1141,
"step": 1360
},
{
"epoch": 9.319727891156463,
"grad_norm": 0.9921436309814453,
"learning_rate": 3.137414965986395e-05,
"loss": 0.0931,
"step": 1370
},
{
"epoch": 9.387755102040817,
"grad_norm": 0.1799956113100052,
"learning_rate": 3.123809523809524e-05,
"loss": 0.0095,
"step": 1380
},
{
"epoch": 9.45578231292517,
"grad_norm": 0.05473727360367775,
"learning_rate": 3.110204081632653e-05,
"loss": 0.1565,
"step": 1390
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.5269390344619751,
"learning_rate": 3.096598639455782e-05,
"loss": 0.0674,
"step": 1400
},
{
"epoch": 9.591836734693878,
"grad_norm": 2.706407070159912,
"learning_rate": 3.0829931972789116e-05,
"loss": 0.0966,
"step": 1410
},
{
"epoch": 9.65986394557823,
"grad_norm": 0.16416242718696594,
"learning_rate": 3.069387755102041e-05,
"loss": 0.0667,
"step": 1420
},
{
"epoch": 9.727891156462585,
"grad_norm": 15.035467147827148,
"learning_rate": 3.05578231292517e-05,
"loss": 0.1272,
"step": 1430
},
{
"epoch": 9.795918367346939,
"grad_norm": 0.19053949415683746,
"learning_rate": 3.0421768707482995e-05,
"loss": 0.2847,
"step": 1440
},
{
"epoch": 9.863945578231293,
"grad_norm": 0.12774477899074554,
"learning_rate": 3.0285714285714288e-05,
"loss": 0.1103,
"step": 1450
},
{
"epoch": 9.931972789115646,
"grad_norm": 0.474401593208313,
"learning_rate": 3.0149659863945578e-05,
"loss": 0.0972,
"step": 1460
},
{
"epoch": 10.0,
"grad_norm": 0.0236662644892931,
"learning_rate": 3.0013605442176874e-05,
"loss": 0.0626,
"step": 1470
},
{
"epoch": 10.0,
"eval_cer": 0.4305555555555556,
"eval_loss": 0.09693024307489395,
"eval_runtime": 4.1253,
"eval_samples_per_second": 71.025,
"eval_steps_per_second": 35.633,
"step": 1470
},
{
"epoch": 10.068027210884354,
"grad_norm": 0.03369349241256714,
"learning_rate": 2.987755102040816e-05,
"loss": 0.0542,
"step": 1480
},
{
"epoch": 10.136054421768707,
"grad_norm": 0.2802339792251587,
"learning_rate": 2.9741496598639457e-05,
"loss": 0.1356,
"step": 1490
},
{
"epoch": 10.204081632653061,
"grad_norm": 0.48600488901138306,
"learning_rate": 2.960544217687075e-05,
"loss": 0.1529,
"step": 1500
},
{
"epoch": 10.272108843537415,
"grad_norm": 2.2791683673858643,
"learning_rate": 2.946938775510204e-05,
"loss": 0.1232,
"step": 1510
},
{
"epoch": 10.34013605442177,
"grad_norm": 6.162140369415283,
"learning_rate": 2.9333333333333336e-05,
"loss": 0.1948,
"step": 1520
},
{
"epoch": 10.408163265306122,
"grad_norm": 1.365488886833191,
"learning_rate": 2.919727891156463e-05,
"loss": 0.0417,
"step": 1530
},
{
"epoch": 10.476190476190476,
"grad_norm": 14.345423698425293,
"learning_rate": 2.906122448979592e-05,
"loss": 0.0859,
"step": 1540
},
{
"epoch": 10.54421768707483,
"grad_norm": 0.1597384810447693,
"learning_rate": 2.892517006802721e-05,
"loss": 0.0376,
"step": 1550
},
{
"epoch": 10.612244897959183,
"grad_norm": 0.24928878247737885,
"learning_rate": 2.87891156462585e-05,
"loss": 0.5477,
"step": 1560
},
{
"epoch": 10.680272108843537,
"grad_norm": 0.3629794418811798,
"learning_rate": 2.8653061224489798e-05,
"loss": 0.0102,
"step": 1570
},
{
"epoch": 10.748299319727892,
"grad_norm": 17.98668098449707,
"learning_rate": 2.851700680272109e-05,
"loss": 0.0821,
"step": 1580
},
{
"epoch": 10.816326530612244,
"grad_norm": 5.073668479919434,
"learning_rate": 2.838095238095238e-05,
"loss": 0.053,
"step": 1590
},
{
"epoch": 10.884353741496598,
"grad_norm": 12.528962135314941,
"learning_rate": 2.8244897959183673e-05,
"loss": 0.0463,
"step": 1600
},
{
"epoch": 10.952380952380953,
"grad_norm": 1.0358648300170898,
"learning_rate": 2.810884353741497e-05,
"loss": 0.0124,
"step": 1610
},
{
"epoch": 11.0,
"eval_cer": 0.39646464646464646,
"eval_loss": 0.08722148090600967,
"eval_runtime": 3.8388,
"eval_samples_per_second": 76.326,
"eval_steps_per_second": 38.293,
"step": 1617
},
{
"epoch": 11.020408163265307,
"grad_norm": 0.19600285589694977,
"learning_rate": 2.797278911564626e-05,
"loss": 0.118,
"step": 1620
},
{
"epoch": 11.08843537414966,
"grad_norm": 26.618637084960938,
"learning_rate": 2.7836734693877553e-05,
"loss": 0.3147,
"step": 1630
},
{
"epoch": 11.156462585034014,
"grad_norm": 0.05680645629763603,
"learning_rate": 2.7700680272108842e-05,
"loss": 0.2198,
"step": 1640
},
{
"epoch": 11.224489795918368,
"grad_norm": 13.787897109985352,
"learning_rate": 2.7564625850340135e-05,
"loss": 0.2091,
"step": 1650
},
{
"epoch": 11.29251700680272,
"grad_norm": 1.339880108833313,
"learning_rate": 2.742857142857143e-05,
"loss": 0.0221,
"step": 1660
},
{
"epoch": 11.360544217687075,
"grad_norm": 0.18231110274791718,
"learning_rate": 2.729251700680272e-05,
"loss": 0.0567,
"step": 1670
},
{
"epoch": 11.428571428571429,
"grad_norm": 0.15762682259082794,
"learning_rate": 2.7156462585034014e-05,
"loss": 0.0118,
"step": 1680
},
{
"epoch": 11.496598639455783,
"grad_norm": 0.19339053332805634,
"learning_rate": 2.702040816326531e-05,
"loss": 0.0082,
"step": 1690
},
{
"epoch": 11.564625850340136,
"grad_norm": 11.242050170898438,
"learning_rate": 2.6884353741496597e-05,
"loss": 0.0398,
"step": 1700
},
{
"epoch": 11.63265306122449,
"grad_norm": 0.104960598051548,
"learning_rate": 2.6748299319727893e-05,
"loss": 0.0745,
"step": 1710
},
{
"epoch": 11.700680272108844,
"grad_norm": 0.4059283137321472,
"learning_rate": 2.6612244897959187e-05,
"loss": 0.0854,
"step": 1720
},
{
"epoch": 11.768707482993197,
"grad_norm": 0.11370517313480377,
"learning_rate": 2.6476190476190476e-05,
"loss": 0.084,
"step": 1730
},
{
"epoch": 11.83673469387755,
"grad_norm": 6.6930365562438965,
"learning_rate": 2.6340136054421773e-05,
"loss": 0.082,
"step": 1740
},
{
"epoch": 11.904761904761905,
"grad_norm": 0.035495854914188385,
"learning_rate": 2.620408163265306e-05,
"loss": 0.0704,
"step": 1750
},
{
"epoch": 11.972789115646258,
"grad_norm": 0.19433455169200897,
"learning_rate": 2.6068027210884355e-05,
"loss": 0.0755,
"step": 1760
},
{
"epoch": 12.0,
"eval_cer": 0.4090909090909091,
"eval_loss": 0.06354419887065887,
"eval_runtime": 3.8826,
"eval_samples_per_second": 75.466,
"eval_steps_per_second": 37.862,
"step": 1764
},
{
"epoch": 12.040816326530612,
"grad_norm": 0.22618860006332397,
"learning_rate": 2.593197278911565e-05,
"loss": 0.0842,
"step": 1770
},
{
"epoch": 12.108843537414966,
"grad_norm": 0.2526053786277771,
"learning_rate": 2.5795918367346938e-05,
"loss": 0.0257,
"step": 1780
},
{
"epoch": 12.17687074829932,
"grad_norm": 5.3073649406433105,
"learning_rate": 2.5659863945578234e-05,
"loss": 0.0723,
"step": 1790
},
{
"epoch": 12.244897959183673,
"grad_norm": 0.07300838083028793,
"learning_rate": 2.5523809523809527e-05,
"loss": 0.0447,
"step": 1800
},
{
"epoch": 12.312925170068027,
"grad_norm": 1.9569120407104492,
"learning_rate": 2.5387755102040817e-05,
"loss": 0.0437,
"step": 1810
},
{
"epoch": 12.380952380952381,
"grad_norm": 4.616933822631836,
"learning_rate": 2.525170068027211e-05,
"loss": 0.0802,
"step": 1820
},
{
"epoch": 12.448979591836734,
"grad_norm": 0.09218256920576096,
"learning_rate": 2.51156462585034e-05,
"loss": 0.0345,
"step": 1830
},
{
"epoch": 12.517006802721088,
"grad_norm": 0.10980120301246643,
"learning_rate": 2.4979591836734696e-05,
"loss": 0.0167,
"step": 1840
},
{
"epoch": 12.585034013605442,
"grad_norm": 1.9402213096618652,
"learning_rate": 2.4843537414965986e-05,
"loss": 0.0189,
"step": 1850
},
{
"epoch": 12.653061224489797,
"grad_norm": 0.28110960125923157,
"learning_rate": 2.4707482993197282e-05,
"loss": 0.0939,
"step": 1860
},
{
"epoch": 12.72108843537415,
"grad_norm": 14.07165813446045,
"learning_rate": 2.4571428571428572e-05,
"loss": 0.1164,
"step": 1870
},
{
"epoch": 12.789115646258503,
"grad_norm": 0.09300262480974197,
"learning_rate": 2.4435374149659865e-05,
"loss": 0.0511,
"step": 1880
},
{
"epoch": 12.857142857142858,
"grad_norm": 0.08766383677721024,
"learning_rate": 2.4299319727891158e-05,
"loss": 0.0821,
"step": 1890
},
{
"epoch": 12.92517006802721,
"grad_norm": 4.915824890136719,
"learning_rate": 2.416326530612245e-05,
"loss": 0.0425,
"step": 1900
},
{
"epoch": 12.993197278911564,
"grad_norm": 0.25317126512527466,
"learning_rate": 2.4027210884353744e-05,
"loss": 0.048,
"step": 1910
},
{
"epoch": 13.0,
"eval_cer": 0.43813131313131315,
"eval_loss": 0.044823333621025085,
"eval_runtime": 3.9907,
"eval_samples_per_second": 73.42,
"eval_steps_per_second": 36.835,
"step": 1911
},
{
"epoch": 13.061224489795919,
"grad_norm": 11.498345375061035,
"learning_rate": 2.3891156462585034e-05,
"loss": 0.0573,
"step": 1920
},
{
"epoch": 13.129251700680273,
"grad_norm": 9.521465301513672,
"learning_rate": 2.3755102040816327e-05,
"loss": 0.0186,
"step": 1930
},
{
"epoch": 13.197278911564625,
"grad_norm": 5.737509250640869,
"learning_rate": 2.361904761904762e-05,
"loss": 0.0189,
"step": 1940
},
{
"epoch": 13.26530612244898,
"grad_norm": 0.3337570130825043,
"learning_rate": 2.3482993197278913e-05,
"loss": 0.0249,
"step": 1950
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.21208225190639496,
"learning_rate": 2.3346938775510206e-05,
"loss": 0.0266,
"step": 1960
},
{
"epoch": 13.401360544217686,
"grad_norm": 0.034205980598926544,
"learning_rate": 2.3210884353741495e-05,
"loss": 0.0062,
"step": 1970
},
{
"epoch": 13.46938775510204,
"grad_norm": 6.113767623901367,
"learning_rate": 2.3074829931972792e-05,
"loss": 0.1134,
"step": 1980
},
{
"epoch": 13.537414965986395,
"grad_norm": 1.1116629838943481,
"learning_rate": 2.293877551020408e-05,
"loss": 0.0873,
"step": 1990
},
{
"epoch": 13.60544217687075,
"grad_norm": 8.336064338684082,
"learning_rate": 2.2802721088435375e-05,
"loss": 0.0285,
"step": 2000
},
{
"epoch": 13.673469387755102,
"grad_norm": 0.6493708491325378,
"learning_rate": 2.2666666666666668e-05,
"loss": 0.0107,
"step": 2010
},
{
"epoch": 13.741496598639456,
"grad_norm": 4.806843280792236,
"learning_rate": 2.253061224489796e-05,
"loss": 0.0163,
"step": 2020
},
{
"epoch": 13.80952380952381,
"grad_norm": 0.049966610968112946,
"learning_rate": 2.2394557823129254e-05,
"loss": 0.01,
"step": 2030
},
{
"epoch": 13.877551020408163,
"grad_norm": 0.1961314082145691,
"learning_rate": 2.2258503401360543e-05,
"loss": 0.107,
"step": 2040
},
{
"epoch": 13.945578231292517,
"grad_norm": 0.04836405813694,
"learning_rate": 2.2122448979591836e-05,
"loss": 0.0518,
"step": 2050
},
{
"epoch": 14.0,
"eval_cer": 0.37752525252525254,
"eval_loss": 0.04542345553636551,
"eval_runtime": 3.7709,
"eval_samples_per_second": 77.701,
"eval_steps_per_second": 38.983,
"step": 2058
},
{
"epoch": 14.013605442176871,
"grad_norm": 0.12112589925527573,
"learning_rate": 2.1986394557823133e-05,
"loss": 0.007,
"step": 2060
},
{
"epoch": 14.081632653061224,
"grad_norm": 0.18053178489208221,
"learning_rate": 2.1850340136054422e-05,
"loss": 0.1291,
"step": 2070
},
{
"epoch": 14.149659863945578,
"grad_norm": 0.2066822201013565,
"learning_rate": 2.1714285714285715e-05,
"loss": 0.0257,
"step": 2080
},
{
"epoch": 14.217687074829932,
"grad_norm": 0.11014904081821442,
"learning_rate": 2.1578231292517005e-05,
"loss": 0.0535,
"step": 2090
},
{
"epoch": 14.285714285714286,
"grad_norm": 12.59344482421875,
"learning_rate": 2.14421768707483e-05,
"loss": 0.1287,
"step": 2100
},
{
"epoch": 14.353741496598639,
"grad_norm": 0.045586470514535904,
"learning_rate": 2.1306122448979595e-05,
"loss": 0.0348,
"step": 2110
},
{
"epoch": 14.421768707482993,
"grad_norm": 0.16031715273857117,
"learning_rate": 2.1170068027210884e-05,
"loss": 0.0664,
"step": 2120
},
{
"epoch": 14.489795918367347,
"grad_norm": 2.561408281326294,
"learning_rate": 2.1034013605442177e-05,
"loss": 0.04,
"step": 2130
},
{
"epoch": 14.5578231292517,
"grad_norm": 0.2670551538467407,
"learning_rate": 2.089795918367347e-05,
"loss": 0.0092,
"step": 2140
},
{
"epoch": 14.625850340136054,
"grad_norm": 0.12540097534656525,
"learning_rate": 2.0761904761904763e-05,
"loss": 0.0111,
"step": 2150
},
{
"epoch": 14.693877551020408,
"grad_norm": 1.2808445692062378,
"learning_rate": 2.0625850340136056e-05,
"loss": 0.0595,
"step": 2160
},
{
"epoch": 14.761904761904763,
"grad_norm": 0.03977341949939728,
"learning_rate": 2.0489795918367346e-05,
"loss": 0.0437,
"step": 2170
},
{
"epoch": 14.829931972789115,
"grad_norm": 0.40472060441970825,
"learning_rate": 2.0353741496598642e-05,
"loss": 0.0268,
"step": 2180
},
{
"epoch": 14.89795918367347,
"grad_norm": 0.08766451478004456,
"learning_rate": 2.0217687074829932e-05,
"loss": 0.0072,
"step": 2190
},
{
"epoch": 14.965986394557824,
"grad_norm": 0.34755828976631165,
"learning_rate": 2.0081632653061225e-05,
"loss": 0.0482,
"step": 2200
},
{
"epoch": 15.0,
"eval_cer": 0.38257575757575757,
"eval_loss": 0.03393391892313957,
"eval_runtime": 3.9624,
"eval_samples_per_second": 73.945,
"eval_steps_per_second": 37.098,
"step": 2205
},
{
"epoch": 15.034013605442176,
"grad_norm": 0.07853918522596359,
"learning_rate": 1.9945578231292518e-05,
"loss": 0.0218,
"step": 2210
},
{
"epoch": 15.10204081632653,
"grad_norm": 0.08187614381313324,
"learning_rate": 1.980952380952381e-05,
"loss": 0.0059,
"step": 2220
},
{
"epoch": 15.170068027210885,
"grad_norm": 0.5369409918785095,
"learning_rate": 1.9673469387755104e-05,
"loss": 0.0105,
"step": 2230
},
{
"epoch": 15.238095238095237,
"grad_norm": 0.46228042244911194,
"learning_rate": 1.9537414965986394e-05,
"loss": 0.0028,
"step": 2240
},
{
"epoch": 15.306122448979592,
"grad_norm": 0.1829945147037506,
"learning_rate": 1.940136054421769e-05,
"loss": 0.0355,
"step": 2250
},
{
"epoch": 15.374149659863946,
"grad_norm": 0.08586379885673523,
"learning_rate": 1.926530612244898e-05,
"loss": 0.0096,
"step": 2260
},
{
"epoch": 15.4421768707483,
"grad_norm": 0.13552436232566833,
"learning_rate": 1.9129251700680273e-05,
"loss": 0.1455,
"step": 2270
},
{
"epoch": 15.510204081632653,
"grad_norm": 0.3371906876564026,
"learning_rate": 1.8993197278911566e-05,
"loss": 0.0229,
"step": 2280
},
{
"epoch": 15.578231292517007,
"grad_norm": 0.03920818492770195,
"learning_rate": 1.885714285714286e-05,
"loss": 0.0224,
"step": 2290
},
{
"epoch": 15.646258503401361,
"grad_norm": 0.8000497817993164,
"learning_rate": 1.8721088435374152e-05,
"loss": 0.0681,
"step": 2300
},
{
"epoch": 15.714285714285714,
"grad_norm": 0.15928244590759277,
"learning_rate": 1.8585034013605442e-05,
"loss": 0.0338,
"step": 2310
},
{
"epoch": 15.782312925170068,
"grad_norm": 2.3006467819213867,
"learning_rate": 1.8448979591836735e-05,
"loss": 0.0085,
"step": 2320
},
{
"epoch": 15.850340136054422,
"grad_norm": 0.3312649726867676,
"learning_rate": 1.8312925170068028e-05,
"loss": 0.0015,
"step": 2330
},
{
"epoch": 15.918367346938776,
"grad_norm": 11.605361938476562,
"learning_rate": 1.817687074829932e-05,
"loss": 0.0616,
"step": 2340
},
{
"epoch": 15.986394557823129,
"grad_norm": 0.06783591210842133,
"learning_rate": 1.8040816326530614e-05,
"loss": 0.0061,
"step": 2350
},
{
"epoch": 16.0,
"eval_cer": 0.36742424242424243,
"eval_loss": 0.01737603358924389,
"eval_runtime": 3.7608,
"eval_samples_per_second": 77.91,
"eval_steps_per_second": 39.088,
"step": 2352
},
{
"epoch": 16.05442176870748,
"grad_norm": 0.03763847053050995,
"learning_rate": 1.7904761904761904e-05,
"loss": 0.018,
"step": 2360
},
{
"epoch": 16.122448979591837,
"grad_norm": 1.681272268295288,
"learning_rate": 1.77687074829932e-05,
"loss": 0.0158,
"step": 2370
},
{
"epoch": 16.19047619047619,
"grad_norm": 0.31114622950553894,
"learning_rate": 1.763265306122449e-05,
"loss": 0.0045,
"step": 2380
},
{
"epoch": 16.258503401360546,
"grad_norm": 3.3073906898498535,
"learning_rate": 1.7496598639455783e-05,
"loss": 0.0266,
"step": 2390
},
{
"epoch": 16.3265306122449,
"grad_norm": 0.05872774124145508,
"learning_rate": 1.7360544217687076e-05,
"loss": 0.0094,
"step": 2400
},
{
"epoch": 16.39455782312925,
"grad_norm": 0.36872556805610657,
"learning_rate": 1.722448979591837e-05,
"loss": 0.0031,
"step": 2410
},
{
"epoch": 16.462585034013607,
"grad_norm": 0.18953841924667358,
"learning_rate": 1.7088435374149662e-05,
"loss": 0.0072,
"step": 2420
},
{
"epoch": 16.53061224489796,
"grad_norm": 0.053531669080257416,
"learning_rate": 1.695238095238095e-05,
"loss": 0.0214,
"step": 2430
},
{
"epoch": 16.598639455782312,
"grad_norm": 0.9419485926628113,
"learning_rate": 1.6816326530612244e-05,
"loss": 0.0311,
"step": 2440
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.10363417118787766,
"learning_rate": 1.668027210884354e-05,
"loss": 0.0145,
"step": 2450
},
{
"epoch": 16.73469387755102,
"grad_norm": 11.27441692352295,
"learning_rate": 1.654421768707483e-05,
"loss": 0.0174,
"step": 2460
},
{
"epoch": 16.802721088435373,
"grad_norm": 0.07533001154661179,
"learning_rate": 1.6408163265306124e-05,
"loss": 0.0043,
"step": 2470
},
{
"epoch": 16.87074829931973,
"grad_norm": 0.09791432321071625,
"learning_rate": 1.6272108843537413e-05,
"loss": 0.0192,
"step": 2480
},
{
"epoch": 16.93877551020408,
"grad_norm": 1.773054838180542,
"learning_rate": 1.613605442176871e-05,
"loss": 0.0846,
"step": 2490
},
{
"epoch": 17.0,
"eval_cer": 0.22853535353535354,
"eval_loss": 0.016882039606571198,
"eval_runtime": 3.9198,
"eval_samples_per_second": 74.749,
"eval_steps_per_second": 37.502,
"step": 2499
},
{
"epoch": 17.006802721088434,
"grad_norm": 0.08635395020246506,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0117,
"step": 2500
},
{
"epoch": 17.07482993197279,
"grad_norm": 0.04216604679822922,
"learning_rate": 1.5863945578231292e-05,
"loss": 0.0075,
"step": 2510
},
{
"epoch": 17.142857142857142,
"grad_norm": 0.3129735589027405,
"learning_rate": 1.5727891156462585e-05,
"loss": 0.0024,
"step": 2520
},
{
"epoch": 17.2108843537415,
"grad_norm": 0.0337909497320652,
"learning_rate": 1.559183673469388e-05,
"loss": 0.0032,
"step": 2530
},
{
"epoch": 17.27891156462585,
"grad_norm": 0.3642733097076416,
"learning_rate": 1.545578231292517e-05,
"loss": 0.1344,
"step": 2540
},
{
"epoch": 17.346938775510203,
"grad_norm": 0.06059624254703522,
"learning_rate": 1.5319727891156464e-05,
"loss": 0.0039,
"step": 2550
},
{
"epoch": 17.41496598639456,
"grad_norm": 0.5465549826622009,
"learning_rate": 1.5183673469387754e-05,
"loss": 0.0395,
"step": 2560
},
{
"epoch": 17.482993197278912,
"grad_norm": 0.048258326947689056,
"learning_rate": 1.5047619047619049e-05,
"loss": 0.0226,
"step": 2570
},
{
"epoch": 17.551020408163264,
"grad_norm": 0.5764261484146118,
"learning_rate": 1.4911564625850342e-05,
"loss": 0.0348,
"step": 2580
},
{
"epoch": 17.61904761904762,
"grad_norm": 1.802079439163208,
"learning_rate": 1.4775510204081633e-05,
"loss": 0.004,
"step": 2590
},
{
"epoch": 17.687074829931973,
"grad_norm": 0.03979931399226189,
"learning_rate": 1.4639455782312925e-05,
"loss": 0.004,
"step": 2600
},
{
"epoch": 17.755102040816325,
"grad_norm": 0.25388839840888977,
"learning_rate": 1.450340136054422e-05,
"loss": 0.0039,
"step": 2610
},
{
"epoch": 17.82312925170068,
"grad_norm": 0.44963565468788147,
"learning_rate": 1.436734693877551e-05,
"loss": 0.006,
"step": 2620
},
{
"epoch": 17.891156462585034,
"grad_norm": 0.0887552797794342,
"learning_rate": 1.4231292517006804e-05,
"loss": 0.0042,
"step": 2630
},
{
"epoch": 17.959183673469386,
"grad_norm": 0.11289983987808228,
"learning_rate": 1.4095238095238095e-05,
"loss": 0.0034,
"step": 2640
},
{
"epoch": 18.0,
"eval_cer": 0.24242424242424243,
"eval_loss": 0.013762996532022953,
"eval_runtime": 3.9761,
"eval_samples_per_second": 73.691,
"eval_steps_per_second": 36.971,
"step": 2646
},
{
"epoch": 18.027210884353742,
"grad_norm": 0.06268062442541122,
"learning_rate": 1.395918367346939e-05,
"loss": 0.0031,
"step": 2650
},
{
"epoch": 18.095238095238095,
"grad_norm": 0.03095332533121109,
"learning_rate": 1.3823129251700681e-05,
"loss": 0.0356,
"step": 2660
},
{
"epoch": 18.163265306122447,
"grad_norm": 0.6670628786087036,
"learning_rate": 1.3687074829931972e-05,
"loss": 0.004,
"step": 2670
},
{
"epoch": 18.231292517006803,
"grad_norm": 0.09079564362764359,
"learning_rate": 1.3551020408163265e-05,
"loss": 0.0036,
"step": 2680
},
{
"epoch": 18.299319727891156,
"grad_norm": 0.17814789712429047,
"learning_rate": 1.3414965986394558e-05,
"loss": 0.0016,
"step": 2690
},
{
"epoch": 18.367346938775512,
"grad_norm": 0.053088486194610596,
"learning_rate": 1.3278911564625852e-05,
"loss": 0.0037,
"step": 2700
},
{
"epoch": 18.435374149659864,
"grad_norm": 0.05287722125649452,
"learning_rate": 1.3142857142857143e-05,
"loss": 0.0031,
"step": 2710
},
{
"epoch": 18.503401360544217,
"grad_norm": 0.22168047726154327,
"learning_rate": 1.3006802721088434e-05,
"loss": 0.003,
"step": 2720
},
{
"epoch": 18.571428571428573,
"grad_norm": 0.2615916430950165,
"learning_rate": 1.2870748299319729e-05,
"loss": 0.0022,
"step": 2730
},
{
"epoch": 18.639455782312925,
"grad_norm": 0.04484458267688751,
"learning_rate": 1.273469387755102e-05,
"loss": 0.083,
"step": 2740
},
{
"epoch": 18.707482993197278,
"grad_norm": 0.4530847370624542,
"learning_rate": 1.2598639455782313e-05,
"loss": 0.0034,
"step": 2750
},
{
"epoch": 18.775510204081634,
"grad_norm": 0.11792109161615372,
"learning_rate": 1.2462585034013606e-05,
"loss": 0.0295,
"step": 2760
},
{
"epoch": 18.843537414965986,
"grad_norm": 0.049426767975091934,
"learning_rate": 1.2326530612244898e-05,
"loss": 0.0019,
"step": 2770
},
{
"epoch": 18.91156462585034,
"grad_norm": 0.04500193893909454,
"learning_rate": 1.219047619047619e-05,
"loss": 0.0134,
"step": 2780
},
{
"epoch": 18.979591836734695,
"grad_norm": 0.14980462193489075,
"learning_rate": 1.2054421768707484e-05,
"loss": 0.0032,
"step": 2790
},
{
"epoch": 19.0,
"eval_cer": 0.2159090909090909,
"eval_loss": 0.012968610972166061,
"eval_runtime": 3.6907,
"eval_samples_per_second": 79.389,
"eval_steps_per_second": 39.83,
"step": 2793
},
{
"epoch": 19.047619047619047,
"grad_norm": 0.5596031546592712,
"learning_rate": 1.1918367346938777e-05,
"loss": 0.004,
"step": 2800
},
{
"epoch": 19.1156462585034,
"grad_norm": 0.09450047463178635,
"learning_rate": 1.178231292517007e-05,
"loss": 0.0176,
"step": 2810
},
{
"epoch": 19.183673469387756,
"grad_norm": 0.030270878225564957,
"learning_rate": 1.1646258503401361e-05,
"loss": 0.0031,
"step": 2820
},
{
"epoch": 19.25170068027211,
"grad_norm": 0.5096073746681213,
"learning_rate": 1.1510204081632654e-05,
"loss": 0.0231,
"step": 2830
},
{
"epoch": 19.31972789115646,
"grad_norm": 0.2736698389053345,
"learning_rate": 1.1374149659863946e-05,
"loss": 0.0016,
"step": 2840
},
{
"epoch": 19.387755102040817,
"grad_norm": 0.06535348296165466,
"learning_rate": 1.1238095238095239e-05,
"loss": 0.0013,
"step": 2850
},
{
"epoch": 19.45578231292517,
"grad_norm": 0.0859360322356224,
"learning_rate": 1.1102040816326532e-05,
"loss": 0.003,
"step": 2860
},
{
"epoch": 19.523809523809526,
"grad_norm": 0.037795525044202805,
"learning_rate": 1.0965986394557825e-05,
"loss": 0.0026,
"step": 2870
},
{
"epoch": 19.591836734693878,
"grad_norm": 1.3488638401031494,
"learning_rate": 1.0829931972789116e-05,
"loss": 0.004,
"step": 2880
},
{
"epoch": 19.65986394557823,
"grad_norm": 0.05746370553970337,
"learning_rate": 1.0693877551020409e-05,
"loss": 0.0192,
"step": 2890
},
{
"epoch": 19.727891156462587,
"grad_norm": 0.025979384779930115,
"learning_rate": 1.05578231292517e-05,
"loss": 0.0091,
"step": 2900
},
{
"epoch": 19.79591836734694,
"grad_norm": 4.972421646118164,
"learning_rate": 1.0421768707482993e-05,
"loss": 0.0082,
"step": 2910
},
{
"epoch": 19.86394557823129,
"grad_norm": 0.49525704979896545,
"learning_rate": 1.0285714285714286e-05,
"loss": 0.0034,
"step": 2920
},
{
"epoch": 19.931972789115648,
"grad_norm": 0.02950323186814785,
"learning_rate": 1.014965986394558e-05,
"loss": 0.07,
"step": 2930
},
{
"epoch": 20.0,
"grad_norm": 0.047243040055036545,
"learning_rate": 1.001360544217687e-05,
"loss": 0.0019,
"step": 2940
},
{
"epoch": 20.0,
"eval_cer": 0.30176767676767674,
"eval_loss": 0.011288419365882874,
"eval_runtime": 4.0348,
"eval_samples_per_second": 72.618,
"eval_steps_per_second": 36.433,
"step": 2940
},
{
"epoch": 20.068027210884352,
"grad_norm": 8.58004093170166,
"learning_rate": 9.877551020408164e-06,
"loss": 0.02,
"step": 2950
},
{
"epoch": 20.13605442176871,
"grad_norm": 0.2544482946395874,
"learning_rate": 9.741496598639455e-06,
"loss": 0.0156,
"step": 2960
},
{
"epoch": 20.20408163265306,
"grad_norm": 0.5715163350105286,
"learning_rate": 9.60544217687075e-06,
"loss": 0.0027,
"step": 2970
},
{
"epoch": 20.272108843537413,
"grad_norm": 0.134610116481781,
"learning_rate": 9.469387755102041e-06,
"loss": 0.0394,
"step": 2980
},
{
"epoch": 20.34013605442177,
"grad_norm": 0.23469507694244385,
"learning_rate": 9.333333333333334e-06,
"loss": 0.0142,
"step": 2990
},
{
"epoch": 20.408163265306122,
"grad_norm": 0.19277207553386688,
"learning_rate": 9.197278911564626e-06,
"loss": 0.0214,
"step": 3000
},
{
"epoch": 20.476190476190474,
"grad_norm": 0.04216855764389038,
"learning_rate": 9.061224489795919e-06,
"loss": 0.0014,
"step": 3010
},
{
"epoch": 20.54421768707483,
"grad_norm": 0.02860959619283676,
"learning_rate": 8.925170068027212e-06,
"loss": 0.0049,
"step": 3020
},
{
"epoch": 20.612244897959183,
"grad_norm": 0.38055145740509033,
"learning_rate": 8.789115646258505e-06,
"loss": 0.0193,
"step": 3030
},
{
"epoch": 20.68027210884354,
"grad_norm": 0.034134916961193085,
"learning_rate": 8.653061224489796e-06,
"loss": 0.0015,
"step": 3040
},
{
"epoch": 20.74829931972789,
"grad_norm": 0.6501132845878601,
"learning_rate": 8.517006802721089e-06,
"loss": 0.003,
"step": 3050
},
{
"epoch": 20.816326530612244,
"grad_norm": 0.26927316188812256,
"learning_rate": 8.38095238095238e-06,
"loss": 0.0064,
"step": 3060
},
{
"epoch": 20.8843537414966,
"grad_norm": 0.308063805103302,
"learning_rate": 8.244897959183674e-06,
"loss": 0.0171,
"step": 3070
},
{
"epoch": 20.952380952380953,
"grad_norm": 0.0912749320268631,
"learning_rate": 8.108843537414967e-06,
"loss": 0.0034,
"step": 3080
},
{
"epoch": 21.0,
"eval_cer": 0.23863636363636365,
"eval_loss": 0.009314554743468761,
"eval_runtime": 3.7562,
"eval_samples_per_second": 78.005,
"eval_steps_per_second": 39.135,
"step": 3087
},
{
"epoch": 21.020408163265305,
"grad_norm": 0.031063944101333618,
"learning_rate": 7.97278911564626e-06,
"loss": 0.0025,
"step": 3090
},
{
"epoch": 21.08843537414966,
"grad_norm": 0.45678919553756714,
"learning_rate": 7.836734693877551e-06,
"loss": 0.0017,
"step": 3100
},
{
"epoch": 21.156462585034014,
"grad_norm": 0.06373850256204605,
"learning_rate": 7.700680272108844e-06,
"loss": 0.0203,
"step": 3110
},
{
"epoch": 21.224489795918366,
"grad_norm": 0.04051206260919571,
"learning_rate": 7.564625850340136e-06,
"loss": 0.0028,
"step": 3120
},
{
"epoch": 21.292517006802722,
"grad_norm": 0.20778831839561462,
"learning_rate": 7.428571428571429e-06,
"loss": 0.0032,
"step": 3130
},
{
"epoch": 21.360544217687075,
"grad_norm": 0.23982657492160797,
"learning_rate": 7.292517006802721e-06,
"loss": 0.0097,
"step": 3140
},
{
"epoch": 21.428571428571427,
"grad_norm": 0.30359897017478943,
"learning_rate": 7.1564625850340144e-06,
"loss": 0.002,
"step": 3150
},
{
"epoch": 21.496598639455783,
"grad_norm": 0.844930112361908,
"learning_rate": 7.020408163265306e-06,
"loss": 0.064,
"step": 3160
},
{
"epoch": 21.564625850340136,
"grad_norm": 0.2660425305366516,
"learning_rate": 6.884353741496599e-06,
"loss": 0.011,
"step": 3170
},
{
"epoch": 21.632653061224488,
"grad_norm": 0.1279953122138977,
"learning_rate": 6.748299319727891e-06,
"loss": 0.0027,
"step": 3180
},
{
"epoch": 21.700680272108844,
"grad_norm": 0.05603710934519768,
"learning_rate": 6.612244897959184e-06,
"loss": 0.0012,
"step": 3190
},
{
"epoch": 21.768707482993197,
"grad_norm": 0.05168928578495979,
"learning_rate": 6.476190476190476e-06,
"loss": 0.0176,
"step": 3200
},
{
"epoch": 21.836734693877553,
"grad_norm": 0.046198636293411255,
"learning_rate": 6.340136054421769e-06,
"loss": 0.0013,
"step": 3210
},
{
"epoch": 21.904761904761905,
"grad_norm": 0.042502377182245255,
"learning_rate": 6.2040816326530614e-06,
"loss": 0.0015,
"step": 3220
},
{
"epoch": 21.972789115646258,
"grad_norm": 0.24547749757766724,
"learning_rate": 6.0680272108843545e-06,
"loss": 0.0023,
"step": 3230
},
{
"epoch": 22.0,
"eval_cer": 0.23106060606060605,
"eval_loss": 0.009045995771884918,
"eval_runtime": 3.8812,
"eval_samples_per_second": 75.492,
"eval_steps_per_second": 37.875,
"step": 3234
},
{
"epoch": 22.040816326530614,
"grad_norm": 1.8699299097061157,
"learning_rate": 5.931972789115647e-06,
"loss": 0.0095,
"step": 3240
},
{
"epoch": 22.108843537414966,
"grad_norm": 0.028658084571361542,
"learning_rate": 5.795918367346939e-06,
"loss": 0.002,
"step": 3250
},
{
"epoch": 22.17687074829932,
"grad_norm": 0.11185970157384872,
"learning_rate": 5.659863945578232e-06,
"loss": 0.0139,
"step": 3260
},
{
"epoch": 22.244897959183675,
"grad_norm": 0.08078885078430176,
"learning_rate": 5.523809523809524e-06,
"loss": 0.0093,
"step": 3270
},
{
"epoch": 22.312925170068027,
"grad_norm": 0.033784542232751846,
"learning_rate": 5.387755102040816e-06,
"loss": 0.0035,
"step": 3280
},
{
"epoch": 22.38095238095238,
"grad_norm": 0.04999591037631035,
"learning_rate": 5.251700680272109e-06,
"loss": 0.0158,
"step": 3290
},
{
"epoch": 22.448979591836736,
"grad_norm": 0.47869572043418884,
"learning_rate": 5.1156462585034015e-06,
"loss": 0.002,
"step": 3300
},
{
"epoch": 22.517006802721088,
"grad_norm": 0.5939333438873291,
"learning_rate": 4.9795918367346945e-06,
"loss": 0.0025,
"step": 3310
},
{
"epoch": 22.58503401360544,
"grad_norm": 0.02597820572555065,
"learning_rate": 4.843537414965987e-06,
"loss": 0.017,
"step": 3320
},
{
"epoch": 22.653061224489797,
"grad_norm": 0.06343343108892441,
"learning_rate": 4.707482993197279e-06,
"loss": 0.0414,
"step": 3330
},
{
"epoch": 22.72108843537415,
"grad_norm": 0.40962278842926025,
"learning_rate": 4.571428571428572e-06,
"loss": 0.0114,
"step": 3340
},
{
"epoch": 22.7891156462585,
"grad_norm": 0.29765334725379944,
"learning_rate": 4.435374149659864e-06,
"loss": 0.0023,
"step": 3350
},
{
"epoch": 22.857142857142858,
"grad_norm": 0.17841386795043945,
"learning_rate": 4.299319727891156e-06,
"loss": 0.0016,
"step": 3360
},
{
"epoch": 22.92517006802721,
"grad_norm": 0.44667163491249084,
"learning_rate": 4.163265306122449e-06,
"loss": 0.0021,
"step": 3370
},
{
"epoch": 22.993197278911566,
"grad_norm": 0.19756975769996643,
"learning_rate": 4.0272108843537416e-06,
"loss": 0.0073,
"step": 3380
},
{
"epoch": 23.0,
"eval_cer": 0.23737373737373738,
"eval_loss": 0.008361349813640118,
"eval_runtime": 3.9088,
"eval_samples_per_second": 74.958,
"eval_steps_per_second": 37.607,
"step": 3381
},
{
"epoch": 23.06122448979592,
"grad_norm": 4.8710503578186035,
"learning_rate": 3.891156462585034e-06,
"loss": 0.0383,
"step": 3390
},
{
"epoch": 23.12925170068027,
"grad_norm": 0.05327881500124931,
"learning_rate": 3.7551020408163268e-06,
"loss": 0.0026,
"step": 3400
},
{
"epoch": 23.197278911564627,
"grad_norm": 0.4828534722328186,
"learning_rate": 3.619047619047619e-06,
"loss": 0.0024,
"step": 3410
},
{
"epoch": 23.26530612244898,
"grad_norm": 0.03481818363070488,
"learning_rate": 3.4829931972789116e-06,
"loss": 0.0068,
"step": 3420
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.19071730971336365,
"learning_rate": 3.346938775510204e-06,
"loss": 0.0171,
"step": 3430
},
{
"epoch": 23.401360544217688,
"grad_norm": 0.08115135878324509,
"learning_rate": 3.210884353741497e-06,
"loss": 0.0015,
"step": 3440
},
{
"epoch": 23.46938775510204,
"grad_norm": 0.13966763019561768,
"learning_rate": 3.074829931972789e-06,
"loss": 0.0309,
"step": 3450
},
{
"epoch": 23.537414965986393,
"grad_norm": 0.08014482259750366,
"learning_rate": 2.9387755102040816e-06,
"loss": 0.0028,
"step": 3460
},
{
"epoch": 23.60544217687075,
"grad_norm": 0.7266091108322144,
"learning_rate": 2.8027210884353742e-06,
"loss": 0.0019,
"step": 3470
},
{
"epoch": 23.6734693877551,
"grad_norm": 0.763943076133728,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0029,
"step": 3480
},
{
"epoch": 23.741496598639454,
"grad_norm": 0.20366428792476654,
"learning_rate": 2.5306122448979594e-06,
"loss": 0.0077,
"step": 3490
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.06424231082201004,
"learning_rate": 2.394557823129252e-06,
"loss": 0.0048,
"step": 3500
},
{
"epoch": 23.877551020408163,
"grad_norm": 0.0471784844994545,
"learning_rate": 2.2585034013605447e-06,
"loss": 0.0027,
"step": 3510
},
{
"epoch": 23.94557823129252,
"grad_norm": 0.6746675968170166,
"learning_rate": 2.122448979591837e-06,
"loss": 0.0022,
"step": 3520
},
{
"epoch": 24.0,
"eval_cer": 0.2702020202020202,
"eval_loss": 0.008024842478334904,
"eval_runtime": 3.9491,
"eval_samples_per_second": 74.195,
"eval_steps_per_second": 37.224,
"step": 3528
},
{
"epoch": 24.01360544217687,
"grad_norm": 0.248748779296875,
"learning_rate": 1.9863945578231295e-06,
"loss": 0.0013,
"step": 3530
},
{
"epoch": 24.081632653061224,
"grad_norm": 5.284445762634277,
"learning_rate": 1.8503401360544219e-06,
"loss": 0.0097,
"step": 3540
},
{
"epoch": 24.14965986394558,
"grad_norm": 0.07442311942577362,
"learning_rate": 1.7142857142857145e-06,
"loss": 0.011,
"step": 3550
},
{
"epoch": 24.217687074829932,
"grad_norm": 0.1644800305366516,
"learning_rate": 1.5782312925170069e-06,
"loss": 0.0014,
"step": 3560
},
{
"epoch": 24.285714285714285,
"grad_norm": 0.05962975695729256,
"learning_rate": 1.4421768707482995e-06,
"loss": 0.0018,
"step": 3570
},
{
"epoch": 24.35374149659864,
"grad_norm": 0.039682451635599136,
"learning_rate": 1.306122448979592e-06,
"loss": 0.0025,
"step": 3580
},
{
"epoch": 24.421768707482993,
"grad_norm": 0.0817071795463562,
"learning_rate": 1.1700680272108845e-06,
"loss": 0.0032,
"step": 3590
},
{
"epoch": 24.489795918367346,
"grad_norm": 0.07747264206409454,
"learning_rate": 1.034013605442177e-06,
"loss": 0.015,
"step": 3600
},
{
"epoch": 24.5578231292517,
"grad_norm": 0.04547140747308731,
"learning_rate": 8.979591836734694e-07,
"loss": 0.0015,
"step": 3610
},
{
"epoch": 24.625850340136054,
"grad_norm": 0.04727374389767647,
"learning_rate": 7.619047619047619e-07,
"loss": 0.0023,
"step": 3620
},
{
"epoch": 24.693877551020407,
"grad_norm": 0.14271779358386993,
"learning_rate": 6.258503401360544e-07,
"loss": 0.0025,
"step": 3630
},
{
"epoch": 24.761904761904763,
"grad_norm": 0.4749351441860199,
"learning_rate": 4.897959183673469e-07,
"loss": 0.0026,
"step": 3640
},
{
"epoch": 24.829931972789115,
"grad_norm": 10.92783260345459,
"learning_rate": 3.537414965986395e-07,
"loss": 0.0217,
"step": 3650
},
{
"epoch": 24.897959183673468,
"grad_norm": 0.14225248992443085,
"learning_rate": 2.1768707482993197e-07,
"loss": 0.0016,
"step": 3660
},
{
"epoch": 24.965986394557824,
"grad_norm": 0.06080883741378784,
"learning_rate": 8.16326530612245e-08,
"loss": 0.0391,
"step": 3670
},
{
"epoch": 25.0,
"eval_cer": 0.26515151515151514,
"eval_loss": 0.007971594110131264,
"eval_runtime": 3.9963,
"eval_samples_per_second": 73.317,
"eval_steps_per_second": 36.784,
"step": 3675
}
],
"logging_steps": 10,
"max_steps": 3675,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.759922930951168e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}