whisper-base-cantonese / trainer_state.json
Oblivion208's picture
Upload folder using huggingface_hub
d4d9183
{
"best_metric": 18.128184499865917,
"best_model_checkpoint": "./logs/whisper-base-cantonese/checkpoint-7200",
"epoch": 10.02875,
"eval_steps": 400,
"global_step": 7200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.3e-06,
"loss": 2.0581,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 4.800000000000001e-06,
"loss": 0.8569,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 7.2999999999999996e-06,
"loss": 0.4243,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 9.800000000000001e-06,
"loss": 0.3528,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.23e-05,
"loss": 0.3243,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 1.48e-05,
"loss": 0.3011,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.73e-05,
"loss": 0.2793,
"step": 175
},
{
"epoch": 0.01,
"learning_rate": 1.9800000000000004e-05,
"loss": 0.2669,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 2.23e-05,
"loss": 0.2676,
"step": 225
},
{
"epoch": 0.01,
"learning_rate": 2.48e-05,
"loss": 0.252,
"step": 250
},
{
"epoch": 0.01,
"learning_rate": 2.7300000000000003e-05,
"loss": 0.2484,
"step": 275
},
{
"epoch": 0.01,
"learning_rate": 2.98e-05,
"loss": 0.2327,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 3.2300000000000006e-05,
"loss": 0.235,
"step": 325
},
{
"epoch": 0.02,
"learning_rate": 3.48e-05,
"loss": 0.2229,
"step": 350
},
{
"epoch": 0.02,
"learning_rate": 3.73e-05,
"loss": 0.2272,
"step": 375
},
{
"epoch": 0.02,
"learning_rate": 3.9800000000000005e-05,
"loss": 0.2162,
"step": 400
},
{
"epoch": 0.02,
"eval_cer": 36.56029319746134,
"eval_loss": 1.0470175743103027,
"eval_runtime": 127.7891,
"eval_samples_per_second": 7.825,
"eval_steps_per_second": 0.493,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 4.23e-05,
"loss": 0.2131,
"step": 425
},
{
"epoch": 0.02,
"learning_rate": 4.4800000000000005e-05,
"loss": 0.2135,
"step": 450
},
{
"epoch": 0.02,
"learning_rate": 4.73e-05,
"loss": 0.211,
"step": 475
},
{
"epoch": 0.03,
"learning_rate": 4.9800000000000004e-05,
"loss": 0.2157,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 4.994102564102565e-05,
"loss": 0.2063,
"step": 525
},
{
"epoch": 0.03,
"learning_rate": 4.987692307692308e-05,
"loss": 0.2048,
"step": 550
},
{
"epoch": 0.03,
"learning_rate": 4.981282051282051e-05,
"loss": 0.6392,
"step": 575
},
{
"epoch": 0.03,
"learning_rate": 4.974871794871795e-05,
"loss": 0.5431,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 4.9684615384615384e-05,
"loss": 0.515,
"step": 625
},
{
"epoch": 0.03,
"learning_rate": 4.962051282051282e-05,
"loss": 0.4156,
"step": 650
},
{
"epoch": 1.0,
"learning_rate": 4.9556410256410256e-05,
"loss": 0.3158,
"step": 675
},
{
"epoch": 1.0,
"learning_rate": 4.9492307692307695e-05,
"loss": 0.2046,
"step": 700
},
{
"epoch": 1.0,
"learning_rate": 4.942820512820513e-05,
"loss": 0.1938,
"step": 725
},
{
"epoch": 1.0,
"learning_rate": 4.9364102564102567e-05,
"loss": 0.1776,
"step": 750
},
{
"epoch": 1.01,
"learning_rate": 4.93e-05,
"loss": 0.1726,
"step": 775
},
{
"epoch": 1.01,
"learning_rate": 4.923589743589744e-05,
"loss": 0.1652,
"step": 800
},
{
"epoch": 1.01,
"eval_cer": 26.888352552069367,
"eval_loss": 0.5993894338607788,
"eval_runtime": 126.7133,
"eval_samples_per_second": 7.892,
"eval_steps_per_second": 0.497,
"step": 800
},
{
"epoch": 1.01,
"learning_rate": 4.917179487179488e-05,
"loss": 0.162,
"step": 825
},
{
"epoch": 1.01,
"learning_rate": 4.910769230769231e-05,
"loss": 0.1467,
"step": 850
},
{
"epoch": 1.01,
"learning_rate": 4.904358974358975e-05,
"loss": 0.1524,
"step": 875
},
{
"epoch": 1.01,
"learning_rate": 4.897948717948718e-05,
"loss": 0.147,
"step": 900
},
{
"epoch": 1.01,
"learning_rate": 4.891538461538462e-05,
"loss": 0.1398,
"step": 925
},
{
"epoch": 1.01,
"learning_rate": 4.885128205128205e-05,
"loss": 0.1347,
"step": 950
},
{
"epoch": 1.02,
"learning_rate": 4.878717948717949e-05,
"loss": 0.1314,
"step": 975
},
{
"epoch": 1.02,
"learning_rate": 4.8723076923076925e-05,
"loss": 0.1215,
"step": 1000
},
{
"epoch": 1.02,
"learning_rate": 4.8658974358974364e-05,
"loss": 0.1219,
"step": 1025
},
{
"epoch": 1.02,
"learning_rate": 4.8594871794871796e-05,
"loss": 0.1199,
"step": 1050
},
{
"epoch": 1.02,
"learning_rate": 4.8530769230769236e-05,
"loss": 0.1133,
"step": 1075
},
{
"epoch": 1.02,
"learning_rate": 4.8466666666666675e-05,
"loss": 0.1101,
"step": 1100
},
{
"epoch": 1.02,
"learning_rate": 4.840256410256411e-05,
"loss": 0.1082,
"step": 1125
},
{
"epoch": 1.02,
"learning_rate": 4.833846153846154e-05,
"loss": 0.1124,
"step": 1150
},
{
"epoch": 1.03,
"learning_rate": 4.827435897435897e-05,
"loss": 0.1073,
"step": 1175
},
{
"epoch": 1.03,
"learning_rate": 4.821025641025641e-05,
"loss": 0.0978,
"step": 1200
},
{
"epoch": 1.03,
"eval_cer": 31.79583445070171,
"eval_loss": 0.7157341241836548,
"eval_runtime": 131.399,
"eval_samples_per_second": 7.61,
"eval_steps_per_second": 0.479,
"step": 1200
},
{
"epoch": 1.03,
"learning_rate": 4.8146153846153844e-05,
"loss": 0.2522,
"step": 1225
},
{
"epoch": 1.03,
"learning_rate": 4.808205128205128e-05,
"loss": 0.3732,
"step": 1250
},
{
"epoch": 1.03,
"learning_rate": 4.8017948717948715e-05,
"loss": 0.3506,
"step": 1275
},
{
"epoch": 1.03,
"learning_rate": 4.7953846153846154e-05,
"loss": 0.2876,
"step": 1300
},
{
"epoch": 1.03,
"learning_rate": 4.7889743589743594e-05,
"loss": 0.2475,
"step": 1325
},
{
"epoch": 2.0,
"learning_rate": 4.7825641025641026e-05,
"loss": 0.1111,
"step": 1350
},
{
"epoch": 2.0,
"learning_rate": 4.7761538461538465e-05,
"loss": 0.1053,
"step": 1375
},
{
"epoch": 2.0,
"learning_rate": 4.76974358974359e-05,
"loss": 0.0915,
"step": 1400
},
{
"epoch": 2.0,
"learning_rate": 4.763333333333334e-05,
"loss": 0.0941,
"step": 1425
},
{
"epoch": 2.01,
"learning_rate": 4.756923076923077e-05,
"loss": 0.0863,
"step": 1450
},
{
"epoch": 2.01,
"learning_rate": 4.750512820512821e-05,
"loss": 0.0854,
"step": 1475
},
{
"epoch": 2.01,
"learning_rate": 4.744102564102564e-05,
"loss": 0.0806,
"step": 1500
},
{
"epoch": 2.01,
"learning_rate": 4.737692307692308e-05,
"loss": 0.0788,
"step": 1525
},
{
"epoch": 2.01,
"learning_rate": 4.731282051282051e-05,
"loss": 0.0784,
"step": 1550
},
{
"epoch": 2.01,
"learning_rate": 4.724871794871795e-05,
"loss": 0.0811,
"step": 1575
},
{
"epoch": 2.01,
"learning_rate": 4.718461538461539e-05,
"loss": 0.0769,
"step": 1600
},
{
"epoch": 2.01,
"eval_cer": 25.914007329936535,
"eval_loss": 0.6011224389076233,
"eval_runtime": 131.3826,
"eval_samples_per_second": 7.611,
"eval_steps_per_second": 0.48,
"step": 1600
},
{
"epoch": 2.02,
"learning_rate": 4.7120512820512823e-05,
"loss": 0.069,
"step": 1625
},
{
"epoch": 2.02,
"learning_rate": 4.705641025641026e-05,
"loss": 0.0694,
"step": 1650
},
{
"epoch": 2.02,
"learning_rate": 4.6992307692307695e-05,
"loss": 0.0641,
"step": 1675
},
{
"epoch": 2.02,
"learning_rate": 4.6928205128205134e-05,
"loss": 0.0704,
"step": 1700
},
{
"epoch": 2.02,
"learning_rate": 4.686410256410257e-05,
"loss": 0.0638,
"step": 1725
},
{
"epoch": 2.02,
"learning_rate": 4.6800000000000006e-05,
"loss": 0.0612,
"step": 1750
},
{
"epoch": 2.02,
"learning_rate": 4.673589743589744e-05,
"loss": 0.062,
"step": 1775
},
{
"epoch": 2.02,
"learning_rate": 4.667179487179487e-05,
"loss": 0.0643,
"step": 1800
},
{
"epoch": 2.02,
"learning_rate": 4.660769230769231e-05,
"loss": 0.0608,
"step": 1825
},
{
"epoch": 2.03,
"learning_rate": 4.654358974358974e-05,
"loss": 0.0601,
"step": 1850
},
{
"epoch": 2.03,
"learning_rate": 4.647948717948718e-05,
"loss": 0.06,
"step": 1875
},
{
"epoch": 2.03,
"learning_rate": 4.6415384615384614e-05,
"loss": 0.2819,
"step": 1900
},
{
"epoch": 2.03,
"learning_rate": 4.635128205128205e-05,
"loss": 0.2377,
"step": 1925
},
{
"epoch": 2.03,
"learning_rate": 4.6287179487179486e-05,
"loss": 0.2326,
"step": 1950
},
{
"epoch": 2.03,
"learning_rate": 4.6223076923076925e-05,
"loss": 0.1679,
"step": 1975
},
{
"epoch": 3.0,
"learning_rate": 4.615897435897436e-05,
"loss": 0.1142,
"step": 2000
},
{
"epoch": 3.0,
"eval_cer": 20.872441226423526,
"eval_loss": 0.478270024061203,
"eval_runtime": 125.9954,
"eval_samples_per_second": 7.937,
"eval_steps_per_second": 0.5,
"step": 2000
},
{
"epoch": 3.0,
"learning_rate": 4.6094871794871797e-05,
"loss": 0.0557,
"step": 2025
},
{
"epoch": 3.0,
"learning_rate": 4.6030769230769236e-05,
"loss": 0.0551,
"step": 2050
},
{
"epoch": 3.0,
"learning_rate": 4.596666666666667e-05,
"loss": 0.0511,
"step": 2075
},
{
"epoch": 3.01,
"learning_rate": 4.590256410256411e-05,
"loss": 0.052,
"step": 2100
},
{
"epoch": 3.01,
"learning_rate": 4.583846153846154e-05,
"loss": 0.0462,
"step": 2125
},
{
"epoch": 3.01,
"learning_rate": 4.577435897435898e-05,
"loss": 0.0468,
"step": 2150
},
{
"epoch": 3.01,
"learning_rate": 4.5712820512820514e-05,
"loss": 0.0405,
"step": 2175
},
{
"epoch": 3.01,
"learning_rate": 4.5648717948717954e-05,
"loss": 0.0449,
"step": 2200
},
{
"epoch": 3.01,
"learning_rate": 4.5584615384615386e-05,
"loss": 0.0437,
"step": 2225
},
{
"epoch": 3.01,
"learning_rate": 4.5520512820512825e-05,
"loss": 0.0456,
"step": 2250
},
{
"epoch": 3.01,
"learning_rate": 4.545641025641026e-05,
"loss": 0.0416,
"step": 2275
},
{
"epoch": 3.02,
"learning_rate": 4.53923076923077e-05,
"loss": 0.0411,
"step": 2300
},
{
"epoch": 3.02,
"learning_rate": 4.532820512820513e-05,
"loss": 0.0363,
"step": 2325
},
{
"epoch": 3.02,
"learning_rate": 4.526410256410257e-05,
"loss": 0.0378,
"step": 2350
},
{
"epoch": 3.02,
"learning_rate": 4.52e-05,
"loss": 0.038,
"step": 2375
},
{
"epoch": 3.02,
"learning_rate": 4.513589743589744e-05,
"loss": 0.037,
"step": 2400
},
{
"epoch": 3.02,
"eval_cer": 23.688209528917493,
"eval_loss": 0.5917167067527771,
"eval_runtime": 129.3069,
"eval_samples_per_second": 7.734,
"eval_steps_per_second": 0.487,
"step": 2400
},
{
"epoch": 3.02,
"learning_rate": 4.507179487179487e-05,
"loss": 0.035,
"step": 2425
},
{
"epoch": 3.02,
"learning_rate": 4.500769230769231e-05,
"loss": 0.0372,
"step": 2450
},
{
"epoch": 3.02,
"learning_rate": 4.494358974358975e-05,
"loss": 0.04,
"step": 2475
},
{
"epoch": 3.03,
"learning_rate": 4.4879487179487183e-05,
"loss": 0.0348,
"step": 2500
},
{
"epoch": 3.03,
"learning_rate": 4.481538461538462e-05,
"loss": 0.0328,
"step": 2525
},
{
"epoch": 3.03,
"learning_rate": 4.475128205128205e-05,
"loss": 0.1175,
"step": 2550
},
{
"epoch": 3.03,
"learning_rate": 4.468717948717949e-05,
"loss": 0.1797,
"step": 2575
},
{
"epoch": 3.03,
"learning_rate": 4.462307692307692e-05,
"loss": 0.1705,
"step": 2600
},
{
"epoch": 3.03,
"learning_rate": 4.455897435897436e-05,
"loss": 0.1245,
"step": 2625
},
{
"epoch": 3.03,
"learning_rate": 4.44948717948718e-05,
"loss": 0.1061,
"step": 2650
},
{
"epoch": 4.0,
"learning_rate": 4.443076923076923e-05,
"loss": 0.0318,
"step": 2675
},
{
"epoch": 4.0,
"learning_rate": 4.436666666666667e-05,
"loss": 0.0337,
"step": 2700
},
{
"epoch": 4.0,
"learning_rate": 4.43025641025641e-05,
"loss": 0.029,
"step": 2725
},
{
"epoch": 4.0,
"learning_rate": 4.423846153846154e-05,
"loss": 0.0289,
"step": 2750
},
{
"epoch": 4.01,
"learning_rate": 4.4174358974358974e-05,
"loss": 0.0297,
"step": 2775
},
{
"epoch": 4.01,
"learning_rate": 4.411025641025641e-05,
"loss": 0.027,
"step": 2800
},
{
"epoch": 4.01,
"eval_cer": 21.936175918476803,
"eval_loss": 0.5530263185501099,
"eval_runtime": 132.1424,
"eval_samples_per_second": 7.568,
"eval_steps_per_second": 0.477,
"step": 2800
},
{
"epoch": 4.01,
"learning_rate": 4.4046153846153846e-05,
"loss": 0.0256,
"step": 2825
},
{
"epoch": 4.01,
"learning_rate": 4.3982051282051285e-05,
"loss": 0.0253,
"step": 2850
},
{
"epoch": 4.01,
"learning_rate": 4.391794871794872e-05,
"loss": 0.0247,
"step": 2875
},
{
"epoch": 4.01,
"learning_rate": 4.3853846153846156e-05,
"loss": 0.0244,
"step": 2900
},
{
"epoch": 4.01,
"learning_rate": 4.3789743589743596e-05,
"loss": 0.0253,
"step": 2925
},
{
"epoch": 4.01,
"learning_rate": 4.372564102564103e-05,
"loss": 0.0238,
"step": 2950
},
{
"epoch": 4.02,
"learning_rate": 4.366153846153847e-05,
"loss": 0.0237,
"step": 2975
},
{
"epoch": 4.02,
"learning_rate": 4.35974358974359e-05,
"loss": 0.0209,
"step": 3000
},
{
"epoch": 4.02,
"learning_rate": 4.353333333333334e-05,
"loss": 0.0236,
"step": 3025
},
{
"epoch": 4.02,
"learning_rate": 4.346923076923077e-05,
"loss": 0.0217,
"step": 3050
},
{
"epoch": 4.02,
"learning_rate": 4.340512820512821e-05,
"loss": 0.0208,
"step": 3075
},
{
"epoch": 4.02,
"learning_rate": 4.334102564102564e-05,
"loss": 0.02,
"step": 3100
},
{
"epoch": 4.02,
"learning_rate": 4.327692307692308e-05,
"loss": 0.0222,
"step": 3125
},
{
"epoch": 4.03,
"learning_rate": 4.3212820512820515e-05,
"loss": 0.0204,
"step": 3150
},
{
"epoch": 4.03,
"learning_rate": 4.314871794871795e-05,
"loss": 0.0203,
"step": 3175
},
{
"epoch": 4.03,
"learning_rate": 4.3084615384615386e-05,
"loss": 0.0199,
"step": 3200
},
{
"epoch": 4.03,
"eval_cer": 29.847144006436043,
"eval_loss": 0.6281722187995911,
"eval_runtime": 134.3999,
"eval_samples_per_second": 7.44,
"eval_steps_per_second": 0.469,
"step": 3200
},
{
"epoch": 4.03,
"learning_rate": 4.302051282051282e-05,
"loss": 0.1392,
"step": 3225
},
{
"epoch": 4.03,
"learning_rate": 4.295641025641026e-05,
"loss": 0.1092,
"step": 3250
},
{
"epoch": 4.03,
"learning_rate": 4.289230769230769e-05,
"loss": 0.1003,
"step": 3275
},
{
"epoch": 4.03,
"learning_rate": 4.282820512820513e-05,
"loss": 0.0684,
"step": 3300
},
{
"epoch": 5.0,
"learning_rate": 4.276410256410256e-05,
"loss": 0.0438,
"step": 3325
},
{
"epoch": 5.0,
"learning_rate": 4.27e-05,
"loss": 0.0163,
"step": 3350
},
{
"epoch": 5.0,
"learning_rate": 4.263589743589744e-05,
"loss": 0.0179,
"step": 3375
},
{
"epoch": 5.0,
"learning_rate": 4.257179487179487e-05,
"loss": 0.0178,
"step": 3400
},
{
"epoch": 5.01,
"learning_rate": 4.250769230769231e-05,
"loss": 0.0166,
"step": 3425
},
{
"epoch": 5.01,
"learning_rate": 4.2443589743589744e-05,
"loss": 0.0152,
"step": 3450
},
{
"epoch": 5.01,
"learning_rate": 4.2379487179487184e-05,
"loss": 0.0161,
"step": 3475
},
{
"epoch": 5.01,
"learning_rate": 4.2315384615384616e-05,
"loss": 0.0144,
"step": 3500
},
{
"epoch": 5.01,
"learning_rate": 4.2251282051282055e-05,
"loss": 0.0179,
"step": 3525
},
{
"epoch": 5.01,
"learning_rate": 4.218717948717949e-05,
"loss": 0.0154,
"step": 3550
},
{
"epoch": 5.01,
"learning_rate": 4.212307692307693e-05,
"loss": 0.0146,
"step": 3575
},
{
"epoch": 5.01,
"learning_rate": 4.205897435897436e-05,
"loss": 0.0147,
"step": 3600
},
{
"epoch": 5.01,
"eval_cer": 21.88254223652454,
"eval_loss": 0.5774866938591003,
"eval_runtime": 130.6083,
"eval_samples_per_second": 7.656,
"eval_steps_per_second": 0.482,
"step": 3600
},
{
"epoch": 5.02,
"learning_rate": 4.19948717948718e-05,
"loss": 0.0155,
"step": 3625
},
{
"epoch": 5.02,
"learning_rate": 4.193076923076924e-05,
"loss": 0.0142,
"step": 3650
},
{
"epoch": 5.02,
"learning_rate": 4.186666666666667e-05,
"loss": 0.014,
"step": 3675
},
{
"epoch": 5.02,
"learning_rate": 4.180256410256411e-05,
"loss": 0.0122,
"step": 3700
},
{
"epoch": 5.02,
"learning_rate": 4.173846153846154e-05,
"loss": 0.0119,
"step": 3725
},
{
"epoch": 5.02,
"learning_rate": 4.167435897435898e-05,
"loss": 0.0117,
"step": 3750
},
{
"epoch": 5.02,
"learning_rate": 4.161025641025641e-05,
"loss": 0.0126,
"step": 3775
},
{
"epoch": 5.02,
"learning_rate": 4.1546153846153846e-05,
"loss": 0.0141,
"step": 3800
},
{
"epoch": 5.03,
"learning_rate": 4.1482051282051285e-05,
"loss": 0.012,
"step": 3825
},
{
"epoch": 5.03,
"learning_rate": 4.141794871794872e-05,
"loss": 0.0111,
"step": 3850
},
{
"epoch": 5.03,
"learning_rate": 4.135384615384616e-05,
"loss": 0.06,
"step": 3875
},
{
"epoch": 5.03,
"learning_rate": 4.128974358974359e-05,
"loss": 0.0784,
"step": 3900
},
{
"epoch": 5.03,
"learning_rate": 4.122564102564103e-05,
"loss": 0.0748,
"step": 3925
},
{
"epoch": 5.03,
"learning_rate": 4.116153846153846e-05,
"loss": 0.0475,
"step": 3950
},
{
"epoch": 5.03,
"learning_rate": 4.10974358974359e-05,
"loss": 0.0427,
"step": 3975
},
{
"epoch": 6.0,
"learning_rate": 4.103333333333333e-05,
"loss": 0.0119,
"step": 4000
},
{
"epoch": 6.0,
"eval_cer": 20.461249664789488,
"eval_loss": 0.49904364347457886,
"eval_runtime": 131.4191,
"eval_samples_per_second": 7.609,
"eval_steps_per_second": 0.479,
"step": 4000
},
{
"epoch": 6.0,
"learning_rate": 4.096923076923077e-05,
"loss": 0.0115,
"step": 4025
},
{
"epoch": 6.0,
"learning_rate": 4.0905128205128204e-05,
"loss": 0.0107,
"step": 4050
},
{
"epoch": 6.0,
"learning_rate": 4.084102564102564e-05,
"loss": 0.0108,
"step": 4075
},
{
"epoch": 6.01,
"learning_rate": 4.077692307692308e-05,
"loss": 0.0107,
"step": 4100
},
{
"epoch": 6.01,
"learning_rate": 4.0712820512820515e-05,
"loss": 0.0091,
"step": 4125
},
{
"epoch": 6.01,
"learning_rate": 4.0648717948717954e-05,
"loss": 0.0114,
"step": 4150
},
{
"epoch": 6.01,
"learning_rate": 4.0584615384615386e-05,
"loss": 0.0102,
"step": 4175
},
{
"epoch": 6.01,
"learning_rate": 4.0520512820512826e-05,
"loss": 0.0119,
"step": 4200
},
{
"epoch": 6.01,
"learning_rate": 4.045897435897436e-05,
"loss": 0.0109,
"step": 4225
},
{
"epoch": 6.01,
"learning_rate": 4.03948717948718e-05,
"loss": 0.011,
"step": 4250
},
{
"epoch": 6.01,
"learning_rate": 4.033076923076923e-05,
"loss": 0.0098,
"step": 4275
},
{
"epoch": 6.02,
"learning_rate": 4.026666666666667e-05,
"loss": 0.0114,
"step": 4300
},
{
"epoch": 6.02,
"learning_rate": 4.0202564102564104e-05,
"loss": 0.0097,
"step": 4325
},
{
"epoch": 6.02,
"learning_rate": 4.0138461538461544e-05,
"loss": 0.0098,
"step": 4350
},
{
"epoch": 6.02,
"learning_rate": 4.0074358974358976e-05,
"loss": 0.01,
"step": 4375
},
{
"epoch": 6.02,
"learning_rate": 4.0010256410256415e-05,
"loss": 0.0088,
"step": 4400
},
{
"epoch": 6.02,
"eval_cer": 22.088138017341556,
"eval_loss": 0.5657151341438293,
"eval_runtime": 129.1878,
"eval_samples_per_second": 7.741,
"eval_steps_per_second": 0.488,
"step": 4400
},
{
"epoch": 6.02,
"learning_rate": 3.994615384615385e-05,
"loss": 0.0089,
"step": 4425
},
{
"epoch": 6.02,
"learning_rate": 3.988205128205129e-05,
"loss": 0.01,
"step": 4450
},
{
"epoch": 6.03,
"learning_rate": 3.981794871794872e-05,
"loss": 0.0089,
"step": 4475
},
{
"epoch": 6.03,
"learning_rate": 3.975384615384616e-05,
"loss": 0.0099,
"step": 4500
},
{
"epoch": 6.03,
"learning_rate": 3.96897435897436e-05,
"loss": 0.0079,
"step": 4525
},
{
"epoch": 6.03,
"learning_rate": 3.962564102564102e-05,
"loss": 0.0596,
"step": 4550
},
{
"epoch": 6.03,
"learning_rate": 3.956153846153846e-05,
"loss": 0.0518,
"step": 4575
},
{
"epoch": 6.03,
"learning_rate": 3.9497435897435895e-05,
"loss": 0.0536,
"step": 4600
},
{
"epoch": 6.03,
"learning_rate": 3.9433333333333334e-05,
"loss": 0.0309,
"step": 4625
},
{
"epoch": 7.0,
"learning_rate": 3.9369230769230767e-05,
"loss": 0.0239,
"step": 4650
},
{
"epoch": 7.0,
"learning_rate": 3.9305128205128206e-05,
"loss": 0.0096,
"step": 4675
},
{
"epoch": 7.0,
"learning_rate": 3.9241025641025645e-05,
"loss": 0.0085,
"step": 4700
},
{
"epoch": 7.0,
"learning_rate": 3.917692307692308e-05,
"loss": 0.0075,
"step": 4725
},
{
"epoch": 7.01,
"learning_rate": 3.9112820512820517e-05,
"loss": 0.0083,
"step": 4750
},
{
"epoch": 7.01,
"learning_rate": 3.904871794871795e-05,
"loss": 0.0084,
"step": 4775
},
{
"epoch": 7.01,
"learning_rate": 3.898461538461539e-05,
"loss": 0.0081,
"step": 4800
},
{
"epoch": 7.01,
"eval_cer": 20.6042728166622,
"eval_loss": 0.5471253395080566,
"eval_runtime": 129.9689,
"eval_samples_per_second": 7.694,
"eval_steps_per_second": 0.485,
"step": 4800
},
{
"epoch": 7.01,
"learning_rate": 3.892051282051282e-05,
"loss": 0.007,
"step": 4825
},
{
"epoch": 7.01,
"learning_rate": 3.885641025641026e-05,
"loss": 0.0099,
"step": 4850
},
{
"epoch": 7.01,
"learning_rate": 3.879230769230769e-05,
"loss": 0.0084,
"step": 4875
},
{
"epoch": 7.01,
"learning_rate": 3.872820512820513e-05,
"loss": 0.0087,
"step": 4900
},
{
"epoch": 7.01,
"learning_rate": 3.8664102564102564e-05,
"loss": 0.0072,
"step": 4925
},
{
"epoch": 7.02,
"learning_rate": 3.86e-05,
"loss": 0.008,
"step": 4950
},
{
"epoch": 7.02,
"learning_rate": 3.853589743589744e-05,
"loss": 0.0074,
"step": 4975
},
{
"epoch": 7.02,
"learning_rate": 3.8471794871794875e-05,
"loss": 0.0084,
"step": 5000
},
{
"epoch": 7.02,
"learning_rate": 3.8407692307692314e-05,
"loss": 0.009,
"step": 5025
},
{
"epoch": 7.02,
"learning_rate": 3.8343589743589746e-05,
"loss": 0.0078,
"step": 5050
},
{
"epoch": 7.02,
"learning_rate": 3.8279487179487186e-05,
"loss": 0.0088,
"step": 5075
},
{
"epoch": 7.02,
"learning_rate": 3.821538461538462e-05,
"loss": 0.0075,
"step": 5100
},
{
"epoch": 7.02,
"learning_rate": 3.815128205128206e-05,
"loss": 0.0076,
"step": 5125
},
{
"epoch": 7.03,
"learning_rate": 3.808717948717948e-05,
"loss": 0.0073,
"step": 5150
},
{
"epoch": 7.03,
"learning_rate": 3.802307692307692e-05,
"loss": 0.0072,
"step": 5175
},
{
"epoch": 7.03,
"learning_rate": 3.795897435897436e-05,
"loss": 0.029,
"step": 5200
},
{
"epoch": 7.03,
"eval_cer": 23.035666398498257,
"eval_loss": 0.4823973476886749,
"eval_runtime": 133.963,
"eval_samples_per_second": 7.465,
"eval_steps_per_second": 0.47,
"step": 5200
},
{
"epoch": 7.03,
"learning_rate": 3.7894871794871794e-05,
"loss": 0.0431,
"step": 5225
},
{
"epoch": 7.03,
"learning_rate": 3.783076923076923e-05,
"loss": 0.0445,
"step": 5250
},
{
"epoch": 7.03,
"learning_rate": 3.7766666666666665e-05,
"loss": 0.0228,
"step": 5275
},
{
"epoch": 7.03,
"learning_rate": 3.7702564102564105e-05,
"loss": 0.0241,
"step": 5300
},
{
"epoch": 8.0,
"learning_rate": 3.763846153846154e-05,
"loss": 0.0082,
"step": 5325
},
{
"epoch": 8.0,
"learning_rate": 3.7574358974358976e-05,
"loss": 0.0071,
"step": 5350
},
{
"epoch": 8.0,
"learning_rate": 3.751025641025641e-05,
"loss": 0.0063,
"step": 5375
},
{
"epoch": 8.01,
"learning_rate": 3.744615384615385e-05,
"loss": 0.0067,
"step": 5400
},
{
"epoch": 8.01,
"learning_rate": 3.738205128205128e-05,
"loss": 0.0066,
"step": 5425
},
{
"epoch": 8.01,
"learning_rate": 3.731794871794872e-05,
"loss": 0.0058,
"step": 5450
},
{
"epoch": 8.01,
"learning_rate": 3.725384615384616e-05,
"loss": 0.0066,
"step": 5475
},
{
"epoch": 8.01,
"learning_rate": 3.718974358974359e-05,
"loss": 0.0059,
"step": 5500
},
{
"epoch": 8.01,
"learning_rate": 3.712564102564103e-05,
"loss": 0.0076,
"step": 5525
},
{
"epoch": 8.01,
"learning_rate": 3.706153846153846e-05,
"loss": 0.0076,
"step": 5550
},
{
"epoch": 8.01,
"learning_rate": 3.69974358974359e-05,
"loss": 0.0068,
"step": 5575
},
{
"epoch": 8.02,
"learning_rate": 3.6933333333333334e-05,
"loss": 0.0051,
"step": 5600
},
{
"epoch": 8.02,
"eval_cer": 19.95172968624296,
"eval_loss": 0.5466642379760742,
"eval_runtime": 124.677,
"eval_samples_per_second": 8.021,
"eval_steps_per_second": 0.505,
"step": 5600
},
{
"epoch": 8.02,
"learning_rate": 3.6871794871794877e-05,
"loss": 0.0059,
"step": 5625
},
{
"epoch": 8.02,
"learning_rate": 3.680769230769231e-05,
"loss": 0.0052,
"step": 5650
},
{
"epoch": 8.02,
"learning_rate": 3.674358974358975e-05,
"loss": 0.0062,
"step": 5675
},
{
"epoch": 8.02,
"learning_rate": 3.667948717948718e-05,
"loss": 0.0061,
"step": 5700
},
{
"epoch": 8.02,
"learning_rate": 3.661538461538462e-05,
"loss": 0.0066,
"step": 5725
},
{
"epoch": 8.02,
"learning_rate": 3.655128205128205e-05,
"loss": 0.0053,
"step": 5750
},
{
"epoch": 8.02,
"learning_rate": 3.648717948717949e-05,
"loss": 0.0066,
"step": 5775
},
{
"epoch": 8.03,
"learning_rate": 3.6423076923076924e-05,
"loss": 0.0057,
"step": 5800
},
{
"epoch": 8.03,
"learning_rate": 3.635897435897436e-05,
"loss": 0.0074,
"step": 5825
},
{
"epoch": 8.03,
"learning_rate": 3.6294871794871795e-05,
"loss": 0.0059,
"step": 5850
},
{
"epoch": 8.03,
"learning_rate": 3.6230769230769235e-05,
"loss": 0.035,
"step": 5875
},
{
"epoch": 8.03,
"learning_rate": 3.6166666666666674e-05,
"loss": 0.037,
"step": 5900
},
{
"epoch": 8.03,
"learning_rate": 3.6102564102564106e-05,
"loss": 0.0253,
"step": 5925
},
{
"epoch": 8.03,
"learning_rate": 3.603846153846154e-05,
"loss": 0.0147,
"step": 5950
},
{
"epoch": 9.0,
"learning_rate": 3.597435897435897e-05,
"loss": 0.0148,
"step": 5975
},
{
"epoch": 9.0,
"learning_rate": 3.591025641025641e-05,
"loss": 0.0071,
"step": 6000
},
{
"epoch": 9.0,
"eval_cer": 18.557253955484043,
"eval_loss": 0.4980410635471344,
"eval_runtime": 126.109,
"eval_samples_per_second": 7.93,
"eval_steps_per_second": 0.5,
"step": 6000
},
{
"epoch": 9.0,
"learning_rate": 3.584615384615384e-05,
"loss": 0.0056,
"step": 6025
},
{
"epoch": 9.0,
"learning_rate": 3.578205128205128e-05,
"loss": 0.0049,
"step": 6050
},
{
"epoch": 9.01,
"learning_rate": 3.571794871794872e-05,
"loss": 0.0053,
"step": 6075
},
{
"epoch": 9.01,
"learning_rate": 3.5653846153846154e-05,
"loss": 0.0046,
"step": 6100
},
{
"epoch": 9.01,
"learning_rate": 3.558974358974359e-05,
"loss": 0.0047,
"step": 6125
},
{
"epoch": 9.01,
"learning_rate": 3.5525641025641025e-05,
"loss": 0.0046,
"step": 6150
},
{
"epoch": 9.01,
"learning_rate": 3.5461538461538464e-05,
"loss": 0.0063,
"step": 6175
},
{
"epoch": 9.01,
"learning_rate": 3.53974358974359e-05,
"loss": 0.0056,
"step": 6200
},
{
"epoch": 9.01,
"learning_rate": 3.5333333333333336e-05,
"loss": 0.006,
"step": 6225
},
{
"epoch": 9.01,
"learning_rate": 3.526923076923077e-05,
"loss": 0.005,
"step": 6250
},
{
"epoch": 9.02,
"learning_rate": 3.520512820512821e-05,
"loss": 0.0046,
"step": 6275
},
{
"epoch": 9.02,
"learning_rate": 3.514102564102564e-05,
"loss": 0.0047,
"step": 6300
},
{
"epoch": 9.02,
"learning_rate": 3.507692307692308e-05,
"loss": 0.0049,
"step": 6325
},
{
"epoch": 9.02,
"learning_rate": 3.501282051282052e-05,
"loss": 0.0048,
"step": 6350
},
{
"epoch": 9.02,
"learning_rate": 3.494871794871795e-05,
"loss": 0.0045,
"step": 6375
},
{
"epoch": 9.02,
"learning_rate": 3.488461538461539e-05,
"loss": 0.0046,
"step": 6400
},
{
"epoch": 9.02,
"eval_cer": 20.872441226423526,
"eval_loss": 0.5338811278343201,
"eval_runtime": 125.9373,
"eval_samples_per_second": 7.94,
"eval_steps_per_second": 0.5,
"step": 6400
},
{
"epoch": 9.02,
"learning_rate": 3.482051282051282e-05,
"loss": 0.0039,
"step": 6425
},
{
"epoch": 9.02,
"learning_rate": 3.475641025641026e-05,
"loss": 0.0042,
"step": 6450
},
{
"epoch": 9.03,
"learning_rate": 3.4692307692307694e-05,
"loss": 0.0048,
"step": 6475
},
{
"epoch": 9.03,
"learning_rate": 3.4628205128205133e-05,
"loss": 0.0064,
"step": 6500
},
{
"epoch": 9.03,
"learning_rate": 3.4564102564102566e-05,
"loss": 0.0158,
"step": 6525
},
{
"epoch": 9.03,
"learning_rate": 3.45e-05,
"loss": 0.0284,
"step": 6550
},
{
"epoch": 9.03,
"learning_rate": 3.443589743589744e-05,
"loss": 0.0272,
"step": 6575
},
{
"epoch": 9.03,
"learning_rate": 3.437179487179487e-05,
"loss": 0.0141,
"step": 6600
},
{
"epoch": 9.03,
"learning_rate": 3.430769230769231e-05,
"loss": 0.0147,
"step": 6625
},
{
"epoch": 10.0,
"learning_rate": 3.424358974358974e-05,
"loss": 0.0062,
"step": 6650
},
{
"epoch": 10.0,
"learning_rate": 3.417948717948718e-05,
"loss": 0.0068,
"step": 6675
},
{
"epoch": 10.0,
"learning_rate": 3.411538461538461e-05,
"loss": 0.0047,
"step": 6700
},
{
"epoch": 10.01,
"learning_rate": 3.405128205128205e-05,
"loss": 0.0041,
"step": 6725
},
{
"epoch": 10.01,
"learning_rate": 3.3987179487179485e-05,
"loss": 0.0041,
"step": 6750
},
{
"epoch": 10.01,
"learning_rate": 3.3923076923076924e-05,
"loss": 0.0043,
"step": 6775
},
{
"epoch": 10.01,
"learning_rate": 3.385897435897436e-05,
"loss": 0.0045,
"step": 6800
},
{
"epoch": 10.01,
"eval_cer": 18.432108697595424,
"eval_loss": 0.5061790943145752,
"eval_runtime": 125.241,
"eval_samples_per_second": 7.985,
"eval_steps_per_second": 0.503,
"step": 6800
},
{
"epoch": 10.01,
"learning_rate": 3.3794871794871796e-05,
"loss": 0.0046,
"step": 6825
},
{
"epoch": 10.01,
"learning_rate": 3.3730769230769235e-05,
"loss": 0.0052,
"step": 6850
},
{
"epoch": 10.01,
"learning_rate": 3.366666666666667e-05,
"loss": 0.0055,
"step": 6875
},
{
"epoch": 10.01,
"learning_rate": 3.3602564102564107e-05,
"loss": 0.0053,
"step": 6900
},
{
"epoch": 10.02,
"learning_rate": 3.353846153846154e-05,
"loss": 0.0049,
"step": 6925
},
{
"epoch": 10.02,
"learning_rate": 3.347435897435898e-05,
"loss": 0.0052,
"step": 6950
},
{
"epoch": 10.02,
"learning_rate": 3.341025641025641e-05,
"loss": 0.0044,
"step": 6975
},
{
"epoch": 10.02,
"learning_rate": 3.334615384615385e-05,
"loss": 0.0046,
"step": 7000
},
{
"epoch": 10.02,
"learning_rate": 3.328205128205128e-05,
"loss": 0.0044,
"step": 7025
},
{
"epoch": 10.02,
"learning_rate": 3.321794871794872e-05,
"loss": 0.0053,
"step": 7050
},
{
"epoch": 10.02,
"learning_rate": 3.315384615384616e-05,
"loss": 0.0038,
"step": 7075
},
{
"epoch": 10.02,
"learning_rate": 3.308974358974359e-05,
"loss": 0.0039,
"step": 7100
},
{
"epoch": 10.03,
"learning_rate": 3.302564102564103e-05,
"loss": 0.0049,
"step": 7125
},
{
"epoch": 10.03,
"learning_rate": 3.296153846153846e-05,
"loss": 0.0056,
"step": 7150
},
{
"epoch": 10.03,
"learning_rate": 3.28974358974359e-05,
"loss": 0.0051,
"step": 7175
},
{
"epoch": 10.03,
"learning_rate": 3.283333333333333e-05,
"loss": 0.0186,
"step": 7200
},
{
"epoch": 10.03,
"eval_cer": 18.128184499865917,
"eval_loss": 0.47741541266441345,
"eval_runtime": 124.6703,
"eval_samples_per_second": 8.021,
"eval_steps_per_second": 0.505,
"step": 7200
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_train_epochs": 9223372036854775807,
"save_steps": 800,
"total_flos": 5.97730266611712e+19,
"trial_name": null,
"trial_params": null
}