TenzinGayche's picture
Training in progress, step 3100
3ff632d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 21.08843537414966,
"global_step": 3100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.34,
"learning_rate": 2.8199999999999998e-05,
"loss": 14.6495,
"step": 50
},
{
"epoch": 0.68,
"learning_rate": 5.82e-05,
"loss": 8.9314,
"step": 100
},
{
"epoch": 0.68,
"eval_cer": 0.9940322843632806,
"eval_loss": 4.78579044342041,
"eval_runtime": 24.1442,
"eval_samples_per_second": 24.354,
"eval_steps_per_second": 3.065,
"step": 100
},
{
"epoch": 1.02,
"learning_rate": 8.819999999999999e-05,
"loss": 4.2225,
"step": 150
},
{
"epoch": 1.36,
"learning_rate": 0.0001182,
"loss": 3.2937,
"step": 200
},
{
"epoch": 1.36,
"eval_cer": 0.9940322843632806,
"eval_loss": 3.2489850521087646,
"eval_runtime": 24.3318,
"eval_samples_per_second": 24.166,
"eval_steps_per_second": 3.041,
"step": 200
},
{
"epoch": 1.7,
"learning_rate": 0.0001482,
"loss": 3.2508,
"step": 250
},
{
"epoch": 2.04,
"learning_rate": 0.00017819999999999997,
"loss": 3.2412,
"step": 300
},
{
"epoch": 2.04,
"eval_cer": 0.9940322843632806,
"eval_loss": 3.222844123840332,
"eval_runtime": 24.3579,
"eval_samples_per_second": 24.14,
"eval_steps_per_second": 3.038,
"step": 300
},
{
"epoch": 2.38,
"learning_rate": 0.00020819999999999996,
"loss": 3.1855,
"step": 350
},
{
"epoch": 2.72,
"learning_rate": 0.0002382,
"loss": 3.0985,
"step": 400
},
{
"epoch": 2.72,
"eval_cer": 0.9940322843632806,
"eval_loss": 3.1458029747009277,
"eval_runtime": 24.7673,
"eval_samples_per_second": 23.741,
"eval_steps_per_second": 2.988,
"step": 400
},
{
"epoch": 3.06,
"learning_rate": 0.00026819999999999996,
"loss": 3.0739,
"step": 450
},
{
"epoch": 3.4,
"learning_rate": 0.0002982,
"loss": 3.0209,
"step": 500
},
{
"epoch": 3.4,
"eval_cer": 0.9938692320234795,
"eval_loss": 3.004169225692749,
"eval_runtime": 24.3542,
"eval_samples_per_second": 24.144,
"eval_steps_per_second": 3.038,
"step": 500
},
{
"epoch": 3.74,
"learning_rate": 0.0002955590551181102,
"loss": 2.9536,
"step": 550
},
{
"epoch": 4.08,
"learning_rate": 0.0002908346456692913,
"loss": 2.518,
"step": 600
},
{
"epoch": 4.08,
"eval_cer": 0.5400945703570846,
"eval_loss": 1.770609736442566,
"eval_runtime": 24.8378,
"eval_samples_per_second": 23.674,
"eval_steps_per_second": 2.979,
"step": 600
},
{
"epoch": 4.42,
"learning_rate": 0.00028611023622047245,
"loss": 1.6291,
"step": 650
},
{
"epoch": 4.76,
"learning_rate": 0.0002813858267716535,
"loss": 1.2291,
"step": 700
},
{
"epoch": 4.76,
"eval_cer": 0.30572313712701776,
"eval_loss": 0.8816090226173401,
"eval_runtime": 24.4305,
"eval_samples_per_second": 24.068,
"eval_steps_per_second": 3.029,
"step": 700
},
{
"epoch": 5.1,
"learning_rate": 0.0002766614173228346,
"loss": 1.0153,
"step": 750
},
{
"epoch": 5.44,
"learning_rate": 0.0002719370078740157,
"loss": 0.8879,
"step": 800
},
{
"epoch": 5.44,
"eval_cer": 0.23668677645524214,
"eval_loss": 0.6693652868270874,
"eval_runtime": 24.5154,
"eval_samples_per_second": 23.985,
"eval_steps_per_second": 3.019,
"step": 800
},
{
"epoch": 5.78,
"learning_rate": 0.00026721259842519684,
"loss": 0.8337,
"step": 850
},
{
"epoch": 6.12,
"learning_rate": 0.00026248818897637796,
"loss": 0.7717,
"step": 900
},
{
"epoch": 6.12,
"eval_cer": 0.20955486711234306,
"eval_loss": 0.5637524127960205,
"eval_runtime": 24.5794,
"eval_samples_per_second": 23.922,
"eval_steps_per_second": 3.011,
"step": 900
},
{
"epoch": 6.46,
"learning_rate": 0.00025776377952755903,
"loss": 0.6711,
"step": 950
},
{
"epoch": 6.8,
"learning_rate": 0.00025303937007874016,
"loss": 0.7003,
"step": 1000
},
{
"epoch": 6.8,
"eval_cer": 0.19279308658079244,
"eval_loss": 0.5100580453872681,
"eval_runtime": 24.6049,
"eval_samples_per_second": 23.898,
"eval_steps_per_second": 3.008,
"step": 1000
},
{
"epoch": 7.14,
"learning_rate": 0.0002483149606299212,
"loss": 0.6349,
"step": 1050
},
{
"epoch": 7.48,
"learning_rate": 0.00024359055118110235,
"loss": 0.5935,
"step": 1100
},
{
"epoch": 7.48,
"eval_cer": 0.17746616663949127,
"eval_loss": 0.46862325072288513,
"eval_runtime": 24.2024,
"eval_samples_per_second": 24.295,
"eval_steps_per_second": 3.058,
"step": 1100
},
{
"epoch": 7.82,
"learning_rate": 0.00023886614173228342,
"loss": 0.576,
"step": 1150
},
{
"epoch": 8.16,
"learning_rate": 0.00023414173228346455,
"loss": 0.5239,
"step": 1200
},
{
"epoch": 8.16,
"eval_cer": 0.1918799934779064,
"eval_loss": 0.4271094799041748,
"eval_runtime": 24.6968,
"eval_samples_per_second": 23.809,
"eval_steps_per_second": 2.996,
"step": 1200
},
{
"epoch": 8.5,
"learning_rate": 0.00022941732283464564,
"loss": 0.5207,
"step": 1250
},
{
"epoch": 8.84,
"learning_rate": 0.00022469291338582677,
"loss": 0.4855,
"step": 1300
},
{
"epoch": 8.84,
"eval_cer": 0.17550953856187837,
"eval_loss": 0.40729257464408875,
"eval_runtime": 24.885,
"eval_samples_per_second": 23.629,
"eval_steps_per_second": 2.974,
"step": 1300
},
{
"epoch": 9.18,
"learning_rate": 0.00021996850393700784,
"loss": 0.4621,
"step": 1350
},
{
"epoch": 9.52,
"learning_rate": 0.00021524409448818896,
"loss": 0.4583,
"step": 1400
},
{
"epoch": 9.52,
"eval_cer": 0.1760313060492418,
"eval_loss": 0.41417357325553894,
"eval_runtime": 24.8425,
"eval_samples_per_second": 23.669,
"eval_steps_per_second": 2.979,
"step": 1400
},
{
"epoch": 9.86,
"learning_rate": 0.00021051968503937006,
"loss": 0.4415,
"step": 1450
},
{
"epoch": 10.2,
"learning_rate": 0.00020579527559055118,
"loss": 0.417,
"step": 1500
},
{
"epoch": 10.2,
"eval_cer": 0.17156367193869232,
"eval_loss": 0.3833578824996948,
"eval_runtime": 24.455,
"eval_samples_per_second": 24.044,
"eval_steps_per_second": 3.026,
"step": 1500
},
{
"epoch": 10.54,
"learning_rate": 0.00020107086614173225,
"loss": 0.3959,
"step": 1550
},
{
"epoch": 10.88,
"learning_rate": 0.00019634645669291338,
"loss": 0.4074,
"step": 1600
},
{
"epoch": 10.88,
"eval_cer": 0.16301972933311593,
"eval_loss": 0.3626195192337036,
"eval_runtime": 24.5184,
"eval_samples_per_second": 23.982,
"eval_steps_per_second": 3.018,
"step": 1600
},
{
"epoch": 11.22,
"learning_rate": 0.00019162204724409448,
"loss": 0.3584,
"step": 1650
},
{
"epoch": 11.56,
"learning_rate": 0.0001868976377952756,
"loss": 0.3682,
"step": 1700
},
{
"epoch": 11.56,
"eval_cer": 0.15685635088863525,
"eval_loss": 0.36927542090415955,
"eval_runtime": 24.3751,
"eval_samples_per_second": 24.123,
"eval_steps_per_second": 3.036,
"step": 1700
},
{
"epoch": 11.9,
"learning_rate": 0.00018217322834645667,
"loss": 0.3455,
"step": 1750
},
{
"epoch": 12.24,
"learning_rate": 0.00017744881889763777,
"loss": 0.3245,
"step": 1800
},
{
"epoch": 12.24,
"eval_cer": 0.15816076960704387,
"eval_loss": 0.3740461468696594,
"eval_runtime": 24.7162,
"eval_samples_per_second": 23.79,
"eval_steps_per_second": 2.994,
"step": 1800
},
{
"epoch": 12.59,
"learning_rate": 0.0001727244094488189,
"loss": 0.3208,
"step": 1850
},
{
"epoch": 12.93,
"learning_rate": 0.000168,
"loss": 0.3063,
"step": 1900
},
{
"epoch": 12.93,
"eval_cer": 0.15904125224196966,
"eval_loss": 0.3622555434703827,
"eval_runtime": 24.4729,
"eval_samples_per_second": 24.027,
"eval_steps_per_second": 3.024,
"step": 1900
},
{
"epoch": 13.27,
"learning_rate": 0.0001632755905511811,
"loss": 0.3019,
"step": 1950
},
{
"epoch": 13.61,
"learning_rate": 0.00015855118110236219,
"loss": 0.2945,
"step": 2000
},
{
"epoch": 13.61,
"eval_cer": 0.16634599706505787,
"eval_loss": 0.3725011348724365,
"eval_runtime": 25.0023,
"eval_samples_per_second": 23.518,
"eval_steps_per_second": 2.96,
"step": 2000
},
{
"epoch": 13.95,
"learning_rate": 0.0001538267716535433,
"loss": 0.279,
"step": 2050
},
{
"epoch": 14.29,
"learning_rate": 0.0001491023622047244,
"loss": 0.2674,
"step": 2100
},
{
"epoch": 14.29,
"eval_cer": 0.15731289744007826,
"eval_loss": 0.3531067371368408,
"eval_runtime": 24.8381,
"eval_samples_per_second": 23.673,
"eval_steps_per_second": 2.979,
"step": 2100
},
{
"epoch": 14.63,
"learning_rate": 0.0001443779527559055,
"loss": 0.2584,
"step": 2150
},
{
"epoch": 14.97,
"learning_rate": 0.0001396535433070866,
"loss": 0.2796,
"step": 2200
},
{
"epoch": 14.97,
"eval_cer": 0.14808413500733736,
"eval_loss": 0.3606802523136139,
"eval_runtime": 24.8151,
"eval_samples_per_second": 23.695,
"eval_steps_per_second": 2.982,
"step": 2200
},
{
"epoch": 15.31,
"learning_rate": 0.0001349291338582677,
"loss": 0.2462,
"step": 2250
},
{
"epoch": 15.65,
"learning_rate": 0.0001302047244094488,
"loss": 0.256,
"step": 2300
},
{
"epoch": 15.65,
"eval_cer": 0.15819338007500408,
"eval_loss": 0.3580550253391266,
"eval_runtime": 24.5695,
"eval_samples_per_second": 23.932,
"eval_steps_per_second": 3.012,
"step": 2300
},
{
"epoch": 15.99,
"learning_rate": 0.00012548031496062992,
"loss": 0.2524,
"step": 2350
},
{
"epoch": 16.33,
"learning_rate": 0.00012075590551181102,
"loss": 0.2219,
"step": 2400
},
{
"epoch": 16.33,
"eval_cer": 0.14801891407141693,
"eval_loss": 0.35925593972206116,
"eval_runtime": 24.982,
"eval_samples_per_second": 23.537,
"eval_steps_per_second": 2.962,
"step": 2400
},
{
"epoch": 16.67,
"learning_rate": 0.0001160314960629921,
"loss": 0.2364,
"step": 2450
},
{
"epoch": 17.01,
"learning_rate": 0.00011130708661417321,
"loss": 0.2291,
"step": 2500
},
{
"epoch": 17.01,
"eval_cer": 0.1471058209685309,
"eval_loss": 0.35567909479141235,
"eval_runtime": 24.4749,
"eval_samples_per_second": 24.025,
"eval_steps_per_second": 3.024,
"step": 2500
},
{
"epoch": 17.35,
"learning_rate": 0.00010658267716535431,
"loss": 0.2045,
"step": 2550
},
{
"epoch": 17.69,
"learning_rate": 0.00010185826771653542,
"loss": 0.2172,
"step": 2600
},
{
"epoch": 17.69,
"eval_cer": 0.14792108266753629,
"eval_loss": 0.3606509566307068,
"eval_runtime": 25.1105,
"eval_samples_per_second": 23.416,
"eval_steps_per_second": 2.947,
"step": 2600
},
{
"epoch": 18.03,
"learning_rate": 9.713385826771652e-05,
"loss": 0.2271,
"step": 2650
},
{
"epoch": 18.37,
"learning_rate": 9.240944881889763e-05,
"loss": 0.1858,
"step": 2700
},
{
"epoch": 18.37,
"eval_cer": 0.15144301320723952,
"eval_loss": 0.3589307963848114,
"eval_runtime": 24.5005,
"eval_samples_per_second": 24.0,
"eval_steps_per_second": 3.02,
"step": 2700
},
{
"epoch": 18.71,
"learning_rate": 8.768503937007873e-05,
"loss": 0.1995,
"step": 2750
},
{
"epoch": 19.05,
"learning_rate": 8.296062992125984e-05,
"loss": 0.1872,
"step": 2800
},
{
"epoch": 19.05,
"eval_cer": 0.14766019892385457,
"eval_loss": 0.36663514375686646,
"eval_runtime": 24.4383,
"eval_samples_per_second": 24.061,
"eval_steps_per_second": 3.028,
"step": 2800
},
{
"epoch": 19.39,
"learning_rate": 7.823622047244094e-05,
"loss": 0.177,
"step": 2850
},
{
"epoch": 19.73,
"learning_rate": 7.351181102362205e-05,
"loss": 0.1855,
"step": 2900
},
{
"epoch": 19.73,
"eval_cer": 0.1432577857492255,
"eval_loss": 0.3651330769062042,
"eval_runtime": 24.7826,
"eval_samples_per_second": 23.726,
"eval_steps_per_second": 2.986,
"step": 2900
},
{
"epoch": 20.07,
"learning_rate": 6.878740157480315e-05,
"loss": 0.1773,
"step": 2950
},
{
"epoch": 20.41,
"learning_rate": 6.406299212598424e-05,
"loss": 0.185,
"step": 3000
},
{
"epoch": 20.41,
"eval_cer": 0.1447578672753954,
"eval_loss": 0.36554473638534546,
"eval_runtime": 24.7061,
"eval_samples_per_second": 23.8,
"eval_steps_per_second": 2.995,
"step": 3000
},
{
"epoch": 20.75,
"learning_rate": 5.933858267716535e-05,
"loss": 0.1735,
"step": 3050
},
{
"epoch": 21.09,
"learning_rate": 5.461417322834645e-05,
"loss": 0.1599,
"step": 3100
},
{
"epoch": 21.09,
"eval_cer": 0.14867112343062122,
"eval_loss": 0.37344449758529663,
"eval_runtime": 24.8752,
"eval_samples_per_second": 23.638,
"eval_steps_per_second": 2.975,
"step": 3100
}
],
"max_steps": 3675,
"num_train_epochs": 25,
"total_flos": 1.2047450649580579e+19,
"trial_name": null,
"trial_params": null
}