wav2vec2-BERT-cantonese / trainer_state.json
alvanli
Added 16.26 version
4c428af
raw
history blame
No virus
30.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.930420896543858,
"eval_steps": 300,
"global_step": 24600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.37,
"learning_rate": 9.900000000000002e-06,
"loss": 19.7382,
"step": 300
},
{
"epoch": 0.37,
"eval_cer": 1.0,
"eval_loss": 6.821648120880127,
"eval_runtime": 52.3193,
"eval_samples_per_second": 42.451,
"eval_steps_per_second": 5.314,
"step": 300
},
{
"epoch": 0.73,
"learning_rate": 1.9900000000000003e-05,
"loss": 9.8181,
"step": 600
},
{
"epoch": 0.73,
"eval_cer": 1.0,
"eval_loss": 6.651111602783203,
"eval_runtime": 43.577,
"eval_samples_per_second": 50.967,
"eval_steps_per_second": 6.38,
"step": 600
},
{
"epoch": 1.1,
"learning_rate": 2.9900000000000002e-05,
"loss": 9.5191,
"step": 900
},
{
"epoch": 1.1,
"eval_cer": 0.9806448402826152,
"eval_loss": 6.58424186706543,
"eval_runtime": 43.1655,
"eval_samples_per_second": 51.453,
"eval_steps_per_second": 6.44,
"step": 900
},
{
"epoch": 1.46,
"learning_rate": 3.99e-05,
"loss": 8.6238,
"step": 1200
},
{
"epoch": 1.46,
"eval_cer": 0.9216837496268285,
"eval_loss": 6.142301082611084,
"eval_runtime": 42.9764,
"eval_samples_per_second": 51.68,
"eval_steps_per_second": 6.469,
"step": 1200
},
{
"epoch": 1.83,
"learning_rate": 4.99e-05,
"loss": 6.883,
"step": 1500
},
{
"epoch": 1.83,
"eval_cer": 0.850830928450592,
"eval_loss": 3.596842050552368,
"eval_runtime": 42.8348,
"eval_samples_per_second": 51.85,
"eval_steps_per_second": 6.49,
"step": 1500
},
{
"epoch": 2.19,
"learning_rate": 4.93579766536965e-05,
"loss": 4.0838,
"step": 1800
},
{
"epoch": 2.19,
"eval_cer": 0.48343118718280426,
"eval_loss": 2.5516390800476074,
"eval_runtime": 42.9354,
"eval_samples_per_second": 51.729,
"eval_steps_per_second": 6.475,
"step": 1800
},
{
"epoch": 2.56,
"learning_rate": 4.87094682230869e-05,
"loss": 3.167,
"step": 2100
},
{
"epoch": 2.56,
"eval_cer": 0.4450691611105583,
"eval_loss": 2.2739391326904297,
"eval_runtime": 42.8894,
"eval_samples_per_second": 51.784,
"eval_steps_per_second": 6.482,
"step": 2100
},
{
"epoch": 2.92,
"learning_rate": 4.806312148724601e-05,
"loss": 2.826,
"step": 2400
},
{
"epoch": 2.92,
"eval_cer": 0.4178525226390686,
"eval_loss": 2.0223917961120605,
"eval_runtime": 42.9677,
"eval_samples_per_second": 51.69,
"eval_steps_per_second": 6.47,
"step": 2400
},
{
"epoch": 3.29,
"learning_rate": 4.7414613056636405e-05,
"loss": 2.6955,
"step": 2700
},
{
"epoch": 3.29,
"eval_cer": 0.4174544730818987,
"eval_loss": 1.9600275754928589,
"eval_runtime": 42.8412,
"eval_samples_per_second": 51.843,
"eval_steps_per_second": 6.489,
"step": 2700
},
{
"epoch": 3.65,
"learning_rate": 4.676610462602681e-05,
"loss": 2.5812,
"step": 3000
},
{
"epoch": 3.65,
"eval_cer": 0.40994128769031746,
"eval_loss": 1.769142985343933,
"eval_runtime": 42.9361,
"eval_samples_per_second": 51.728,
"eval_steps_per_second": 6.475,
"step": 3000
},
{
"epoch": 4.02,
"learning_rate": 4.611975789018591e-05,
"loss": 2.4952,
"step": 3300
},
{
"epoch": 4.02,
"eval_cer": 0.4013832222111653,
"eval_loss": 1.8323670625686646,
"eval_runtime": 42.7115,
"eval_samples_per_second": 52.0,
"eval_steps_per_second": 6.509,
"step": 3300
},
{
"epoch": 4.38,
"learning_rate": 4.547124945957631e-05,
"loss": 2.3938,
"step": 3600
},
{
"epoch": 4.38,
"eval_cer": 0.37799781072743555,
"eval_loss": 1.7351980209350586,
"eval_runtime": 42.7436,
"eval_samples_per_second": 51.961,
"eval_steps_per_second": 6.504,
"step": 3600
},
{
"epoch": 4.75,
"learning_rate": 4.482490272373541e-05,
"loss": 2.3584,
"step": 3900
},
{
"epoch": 4.75,
"eval_cer": 0.3678475470196039,
"eval_loss": 1.64540696144104,
"eval_runtime": 42.7445,
"eval_samples_per_second": 51.96,
"eval_steps_per_second": 6.504,
"step": 3900
},
{
"epoch": 5.11,
"learning_rate": 4.417639429312581e-05,
"loss": 2.325,
"step": 4200
},
{
"epoch": 5.11,
"eval_cer": 0.38352074833316746,
"eval_loss": 1.6946874856948853,
"eval_runtime": 42.4882,
"eval_samples_per_second": 52.273,
"eval_steps_per_second": 6.543,
"step": 4200
},
{
"epoch": 5.48,
"learning_rate": 4.3527885862516214e-05,
"loss": 2.2454,
"step": 4500
},
{
"epoch": 5.48,
"eval_cer": 0.34078017713205294,
"eval_loss": 1.5765234231948853,
"eval_runtime": 42.1243,
"eval_samples_per_second": 52.725,
"eval_steps_per_second": 6.6,
"step": 4500
},
{
"epoch": 5.84,
"learning_rate": 4.287937743190661e-05,
"loss": 2.1954,
"step": 4800
},
{
"epoch": 5.84,
"eval_cer": 0.37093243108767043,
"eval_loss": 1.603211760520935,
"eval_runtime": 42.6116,
"eval_samples_per_second": 52.122,
"eval_steps_per_second": 6.524,
"step": 4800
},
{
"epoch": 6.21,
"learning_rate": 4.223086900129702e-05,
"loss": 2.1492,
"step": 5100
},
{
"epoch": 6.21,
"eval_cer": 0.3447606727037516,
"eval_loss": 1.6078611612319946,
"eval_runtime": 42.9188,
"eval_samples_per_second": 51.749,
"eval_steps_per_second": 6.477,
"step": 5100
},
{
"epoch": 6.57,
"learning_rate": 4.1582360570687426e-05,
"loss": 2.1655,
"step": 5400
},
{
"epoch": 6.57,
"eval_cer": 0.33555577669419845,
"eval_loss": 1.4955742359161377,
"eval_runtime": 42.6136,
"eval_samples_per_second": 52.12,
"eval_steps_per_second": 6.524,
"step": 5400
},
{
"epoch": 6.94,
"learning_rate": 4.093385214007782e-05,
"loss": 2.1393,
"step": 5700
},
{
"epoch": 6.94,
"eval_cer": 0.331625037317146,
"eval_loss": 1.4772567749023438,
"eval_runtime": 42.6929,
"eval_samples_per_second": 52.023,
"eval_steps_per_second": 6.512,
"step": 5700
},
{
"epoch": 7.3,
"learning_rate": 4.028534370946823e-05,
"loss": 2.1027,
"step": 6000
},
{
"epoch": 7.3,
"eval_cer": 0.341427007662454,
"eval_loss": 1.5089548826217651,
"eval_runtime": 42.7699,
"eval_samples_per_second": 51.929,
"eval_steps_per_second": 6.5,
"step": 6000
},
{
"epoch": 7.67,
"learning_rate": 3.9636835278858624e-05,
"loss": 2.0824,
"step": 6300
},
{
"epoch": 7.67,
"eval_cer": 0.34575579659667627,
"eval_loss": 1.5948169231414795,
"eval_runtime": 42.6031,
"eval_samples_per_second": 52.132,
"eval_steps_per_second": 6.525,
"step": 6300
},
{
"epoch": 8.03,
"learning_rate": 3.899048854301773e-05,
"loss": 2.061,
"step": 6600
},
{
"epoch": 8.03,
"eval_cer": 0.35058214747736094,
"eval_loss": 1.4923882484436035,
"eval_runtime": 42.6516,
"eval_samples_per_second": 52.073,
"eval_steps_per_second": 6.518,
"step": 6600
},
{
"epoch": 8.4,
"learning_rate": 3.8341980112408135e-05,
"loss": 2.0212,
"step": 6900
},
{
"epoch": 8.4,
"eval_cer": 0.33247089262613194,
"eval_loss": 1.4590569734573364,
"eval_runtime": 42.5489,
"eval_samples_per_second": 52.199,
"eval_steps_per_second": 6.534,
"step": 6900
},
{
"epoch": 8.76,
"learning_rate": 3.769347168179853e-05,
"loss": 2.0504,
"step": 7200
},
{
"epoch": 8.76,
"eval_cer": 0.3344611404119813,
"eval_loss": 1.4551000595092773,
"eval_runtime": 42.7689,
"eval_samples_per_second": 51.93,
"eval_steps_per_second": 6.5,
"step": 7200
},
{
"epoch": 9.13,
"learning_rate": 3.7044963251188936e-05,
"loss": 2.0113,
"step": 7500
},
{
"epoch": 9.13,
"eval_cer": 0.3344113842173351,
"eval_loss": 1.4068984985351562,
"eval_runtime": 42.6741,
"eval_samples_per_second": 52.046,
"eval_steps_per_second": 6.514,
"step": 7500
},
{
"epoch": 9.49,
"learning_rate": 3.639645482057933e-05,
"loss": 2.0057,
"step": 7800
},
{
"epoch": 9.49,
"eval_cer": 0.3230669718379938,
"eval_loss": 1.443265676498413,
"eval_runtime": 42.7855,
"eval_samples_per_second": 51.91,
"eval_steps_per_second": 6.498,
"step": 7800
},
{
"epoch": 9.86,
"learning_rate": 3.574794638996974e-05,
"loss": 1.9741,
"step": 8100
},
{
"epoch": 9.86,
"eval_cer": 0.3216240421932531,
"eval_loss": 1.426885724067688,
"eval_runtime": 42.8301,
"eval_samples_per_second": 51.856,
"eval_steps_per_second": 6.491,
"step": 8100
},
{
"epoch": 10.22,
"learning_rate": 3.509943795936014e-05,
"loss": 1.936,
"step": 8400
},
{
"epoch": 10.22,
"eval_cer": 0.3227186784754702,
"eval_loss": 1.3612221479415894,
"eval_runtime": 43.0542,
"eval_samples_per_second": 51.586,
"eval_steps_per_second": 6.457,
"step": 8400
},
{
"epoch": 10.59,
"learning_rate": 3.4450929528750544e-05,
"loss": 1.9416,
"step": 8700
},
{
"epoch": 10.59,
"eval_cer": 0.3027664444223306,
"eval_loss": 1.363100290298462,
"eval_runtime": 43.1271,
"eval_samples_per_second": 51.499,
"eval_steps_per_second": 6.446,
"step": 8700
},
{
"epoch": 10.95,
"learning_rate": 3.380242109814095e-05,
"loss": 1.9425,
"step": 9000
},
{
"epoch": 10.95,
"eval_cer": 0.3038610807045477,
"eval_loss": 1.3716000318527222,
"eval_runtime": 43.286,
"eval_samples_per_second": 51.31,
"eval_steps_per_second": 6.422,
"step": 9000
},
{
"epoch": 11.32,
"learning_rate": 3.3153912667531345e-05,
"loss": 1.9351,
"step": 9300
},
{
"epoch": 11.32,
"eval_cer": 0.31490695591601153,
"eval_loss": 1.3932286500930786,
"eval_runtime": 43.3029,
"eval_samples_per_second": 51.29,
"eval_steps_per_second": 6.42,
"step": 9300
},
{
"epoch": 11.68,
"learning_rate": 3.250540423692175e-05,
"loss": 1.9046,
"step": 9600
},
{
"epoch": 11.68,
"eval_cer": 0.3329684545725943,
"eval_loss": 1.4470584392547607,
"eval_runtime": 42.9923,
"eval_samples_per_second": 51.66,
"eval_steps_per_second": 6.466,
"step": 9600
},
{
"epoch": 12.05,
"learning_rate": 3.185905750108085e-05,
"loss": 1.8587,
"step": 9900
},
{
"epoch": 12.05,
"eval_cer": 0.3056523037118121,
"eval_loss": 1.3519924879074097,
"eval_runtime": 42.7004,
"eval_samples_per_second": 52.014,
"eval_steps_per_second": 6.51,
"step": 9900
},
{
"epoch": 12.41,
"learning_rate": 3.1210549070471253e-05,
"loss": 1.8699,
"step": 10200
},
{
"epoch": 12.41,
"eval_cer": 0.3289879590008956,
"eval_loss": 1.4434651136398315,
"eval_runtime": 42.904,
"eval_samples_per_second": 51.767,
"eval_steps_per_second": 6.48,
"step": 10200
},
{
"epoch": 12.78,
"learning_rate": 3.056204063986166e-05,
"loss": 1.8328,
"step": 10500
},
{
"epoch": 12.78,
"eval_cer": 0.31356353866056325,
"eval_loss": 1.361649751663208,
"eval_runtime": 42.7673,
"eval_samples_per_second": 51.932,
"eval_steps_per_second": 6.5,
"step": 10500
},
{
"epoch": 13.14,
"learning_rate": 2.9913532209252054e-05,
"loss": 1.8136,
"step": 10800
},
{
"epoch": 13.14,
"eval_cer": 0.2943078913324709,
"eval_loss": 1.3512203693389893,
"eval_runtime": 42.5723,
"eval_samples_per_second": 52.17,
"eval_steps_per_second": 6.53,
"step": 10800
},
{
"epoch": 13.51,
"learning_rate": 2.9265023778642458e-05,
"loss": 1.8099,
"step": 11100
},
{
"epoch": 13.51,
"eval_cer": 0.2956513085879192,
"eval_loss": 1.3534834384918213,
"eval_runtime": 42.854,
"eval_samples_per_second": 51.827,
"eval_steps_per_second": 6.487,
"step": 11100
},
{
"epoch": 13.87,
"learning_rate": 2.861651534803286e-05,
"loss": 1.8021,
"step": 11400
},
{
"epoch": 13.87,
"eval_cer": 0.2981888745148771,
"eval_loss": 1.3732918500900269,
"eval_runtime": 42.5792,
"eval_samples_per_second": 52.162,
"eval_steps_per_second": 6.529,
"step": 11400
},
{
"epoch": 14.24,
"learning_rate": 2.7968006917423263e-05,
"loss": 1.7809,
"step": 11700
},
{
"epoch": 14.24,
"eval_cer": 0.30804060105483133,
"eval_loss": 1.3088232278823853,
"eval_runtime": 42.9072,
"eval_samples_per_second": 51.763,
"eval_steps_per_second": 6.479,
"step": 11700
},
{
"epoch": 14.6,
"learning_rate": 2.731949848681366e-05,
"loss": 1.7734,
"step": 12000
},
{
"epoch": 14.6,
"eval_cer": 0.28858592894815405,
"eval_loss": 1.320089340209961,
"eval_runtime": 42.6671,
"eval_samples_per_second": 52.054,
"eval_steps_per_second": 6.516,
"step": 12000
},
{
"epoch": 14.97,
"learning_rate": 2.6670990056204063e-05,
"loss": 1.7646,
"step": 12300
},
{
"epoch": 14.97,
"eval_cer": 0.3268981988257538,
"eval_loss": 1.3471167087554932,
"eval_runtime": 42.7924,
"eval_samples_per_second": 51.902,
"eval_steps_per_second": 6.496,
"step": 12300
},
{
"epoch": 15.33,
"learning_rate": 2.602248162559447e-05,
"loss": 1.733,
"step": 12600
},
{
"epoch": 15.33,
"eval_cer": 0.30321425017414666,
"eval_loss": 1.3437916040420532,
"eval_runtime": 42.7066,
"eval_samples_per_second": 52.006,
"eval_steps_per_second": 6.51,
"step": 12600
},
{
"epoch": 15.7,
"learning_rate": 2.5373973194984868e-05,
"loss": 1.7182,
"step": 12900
},
{
"epoch": 15.7,
"eval_cer": 0.2999800975221415,
"eval_loss": 1.3310909271240234,
"eval_runtime": 42.787,
"eval_samples_per_second": 51.908,
"eval_steps_per_second": 6.497,
"step": 12900
},
{
"epoch": 16.06,
"learning_rate": 2.472546476437527e-05,
"loss": 1.7071,
"step": 13200
},
{
"epoch": 16.06,
"eval_cer": 0.3073937705244303,
"eval_loss": 1.2641910314559937,
"eval_runtime": 42.6973,
"eval_samples_per_second": 52.017,
"eval_steps_per_second": 6.511,
"step": 13200
},
{
"epoch": 16.43,
"learning_rate": 2.4076956333765675e-05,
"loss": 1.7196,
"step": 13500
},
{
"epoch": 16.43,
"eval_cer": 0.2859488506319037,
"eval_loss": 1.2662409543991089,
"eval_runtime": 42.6819,
"eval_samples_per_second": 52.036,
"eval_steps_per_second": 6.513,
"step": 13500
},
{
"epoch": 16.79,
"learning_rate": 2.3428447903156076e-05,
"loss": 1.7264,
"step": 13800
},
{
"epoch": 16.79,
"eval_cer": 0.2878893422231068,
"eval_loss": 1.2460156679153442,
"eval_runtime": 42.7771,
"eval_samples_per_second": 51.92,
"eval_steps_per_second": 6.499,
"step": 13800
},
{
"epoch": 17.16,
"learning_rate": 2.2782101167315176e-05,
"loss": 1.6875,
"step": 14100
},
{
"epoch": 17.16,
"eval_cer": 0.2931137426609613,
"eval_loss": 1.3022774457931519,
"eval_runtime": 42.5345,
"eval_samples_per_second": 52.216,
"eval_steps_per_second": 6.536,
"step": 14100
},
{
"epoch": 17.52,
"learning_rate": 2.2133592736705577e-05,
"loss": 1.6659,
"step": 14400
},
{
"epoch": 17.52,
"eval_cer": 0.2927654492984377,
"eval_loss": 1.32107675075531,
"eval_runtime": 42.821,
"eval_samples_per_second": 51.867,
"eval_steps_per_second": 6.492,
"step": 14400
},
{
"epoch": 17.89,
"learning_rate": 2.148508430609598e-05,
"loss": 1.6694,
"step": 14700
},
{
"epoch": 17.89,
"eval_cer": 0.2882873917802766,
"eval_loss": 1.3291140794754028,
"eval_runtime": 42.7715,
"eval_samples_per_second": 51.927,
"eval_steps_per_second": 6.5,
"step": 14700
},
{
"epoch": 18.25,
"learning_rate": 2.0836575875486384e-05,
"loss": 1.643,
"step": 15000
},
{
"epoch": 18.25,
"eval_cer": 0.294755697084287,
"eval_loss": 1.2615532875061035,
"eval_runtime": 42.8646,
"eval_samples_per_second": 51.814,
"eval_steps_per_second": 6.486,
"step": 15000
},
{
"epoch": 18.62,
"learning_rate": 2.0188067444876785e-05,
"loss": 1.676,
"step": 15300
},
{
"epoch": 18.62,
"eval_cer": 0.2835107970942382,
"eval_loss": 1.2185758352279663,
"eval_runtime": 42.7823,
"eval_samples_per_second": 51.914,
"eval_steps_per_second": 6.498,
"step": 15300
},
{
"epoch": 18.98,
"learning_rate": 1.9539559014267185e-05,
"loss": 1.6397,
"step": 15600
},
{
"epoch": 18.98,
"eval_cer": 0.2810727435565728,
"eval_loss": 1.3059513568878174,
"eval_runtime": 42.9668,
"eval_samples_per_second": 51.691,
"eval_steps_per_second": 6.47,
"step": 15600
},
{
"epoch": 19.35,
"learning_rate": 1.8893212278426286e-05,
"loss": 1.6347,
"step": 15900
},
{
"epoch": 19.35,
"eval_cer": 0.28838690416956914,
"eval_loss": 1.2377227544784546,
"eval_runtime": 42.8094,
"eval_samples_per_second": 51.881,
"eval_steps_per_second": 6.494,
"step": 15900
},
{
"epoch": 19.71,
"learning_rate": 1.824470384781669e-05,
"loss": 1.6328,
"step": 16200
},
{
"epoch": 19.71,
"eval_cer": 0.27599761170265696,
"eval_loss": 1.2721112966537476,
"eval_runtime": 42.6795,
"eval_samples_per_second": 52.039,
"eval_steps_per_second": 6.514,
"step": 16200
},
{
"epoch": 20.08,
"learning_rate": 1.7596195417207094e-05,
"loss": 1.6092,
"step": 16500
},
{
"epoch": 20.08,
"eval_cer": 0.28231664842272863,
"eval_loss": 1.2696741819381714,
"eval_runtime": 42.6768,
"eval_samples_per_second": 52.042,
"eval_steps_per_second": 6.514,
"step": 16500
},
{
"epoch": 20.44,
"learning_rate": 1.6947686986597494e-05,
"loss": 1.5737,
"step": 16800
},
{
"epoch": 20.44,
"eval_cer": 0.28306299134242213,
"eval_loss": 1.2230887413024902,
"eval_runtime": 42.9425,
"eval_samples_per_second": 51.72,
"eval_steps_per_second": 6.474,
"step": 16800
},
{
"epoch": 20.81,
"learning_rate": 1.6299178555987894e-05,
"loss": 1.6166,
"step": 17100
},
{
"epoch": 20.81,
"eval_cer": 0.2663449099412877,
"eval_loss": 1.2277541160583496,
"eval_runtime": 42.813,
"eval_samples_per_second": 51.877,
"eval_steps_per_second": 6.493,
"step": 17100
},
{
"epoch": 21.17,
"learning_rate": 1.56506701253783e-05,
"loss": 1.5964,
"step": 17400
},
{
"epoch": 21.17,
"eval_cer": 0.27355955816499156,
"eval_loss": 1.2313120365142822,
"eval_runtime": 42.7309,
"eval_samples_per_second": 51.976,
"eval_steps_per_second": 6.506,
"step": 17400
},
{
"epoch": 21.54,
"learning_rate": 1.5002161694768699e-05,
"loss": 1.5237,
"step": 17700
},
{
"epoch": 21.54,
"eval_cer": 0.27863469001890734,
"eval_loss": 1.2411593198776245,
"eval_runtime": 42.9368,
"eval_samples_per_second": 51.727,
"eval_steps_per_second": 6.475,
"step": 17700
},
{
"epoch": 21.9,
"learning_rate": 1.4353653264159101e-05,
"loss": 1.5419,
"step": 18000
},
{
"epoch": 21.9,
"eval_cer": 0.28634690018907355,
"eval_loss": 1.2718561887741089,
"eval_runtime": 42.5781,
"eval_samples_per_second": 52.163,
"eval_steps_per_second": 6.529,
"step": 18000
},
{
"epoch": 22.27,
"learning_rate": 1.3705144833549505e-05,
"loss": 1.5654,
"step": 18300
},
{
"epoch": 22.27,
"eval_cer": 0.26734003383421234,
"eval_loss": 1.2373576164245605,
"eval_runtime": 42.5574,
"eval_samples_per_second": 52.188,
"eval_steps_per_second": 6.532,
"step": 18300
},
{
"epoch": 22.63,
"learning_rate": 1.3060959792477304e-05,
"loss": 1.5331,
"step": 18600
},
{
"epoch": 22.63,
"eval_cer": 0.2708727236540949,
"eval_loss": 1.197614073753357,
"eval_runtime": 42.6921,
"eval_samples_per_second": 52.024,
"eval_steps_per_second": 6.512,
"step": 18600
},
{
"epoch": 23.0,
"learning_rate": 1.2412451361867706e-05,
"loss": 1.5378,
"step": 18900
},
{
"epoch": 23.0,
"eval_cer": 0.26838491392178326,
"eval_loss": 1.1672557592391968,
"eval_runtime": 42.9497,
"eval_samples_per_second": 51.712,
"eval_steps_per_second": 6.473,
"step": 18900
},
{
"epoch": 23.36,
"learning_rate": 1.1763942931258106e-05,
"loss": 1.4972,
"step": 19200
},
{
"epoch": 23.36,
"eval_cer": 0.26938003781470793,
"eval_loss": 1.1548832654953003,
"eval_runtime": 42.5425,
"eval_samples_per_second": 52.207,
"eval_steps_per_second": 6.535,
"step": 19200
},
{
"epoch": 23.73,
"learning_rate": 1.1115434500648508e-05,
"loss": 1.5112,
"step": 19500
},
{
"epoch": 23.73,
"eval_cer": 0.2684844263110757,
"eval_loss": 1.2580962181091309,
"eval_runtime": 43.0178,
"eval_samples_per_second": 51.63,
"eval_steps_per_second": 6.462,
"step": 19500
},
{
"epoch": 24.09,
"learning_rate": 1.046692607003891e-05,
"loss": 1.5026,
"step": 19800
},
{
"epoch": 24.09,
"eval_cer": 0.26957906259329284,
"eval_loss": 1.2475780248641968,
"eval_runtime": 42.8521,
"eval_samples_per_second": 51.829,
"eval_steps_per_second": 6.487,
"step": 19800
},
{
"epoch": 24.46,
"learning_rate": 9.818417639429313e-06,
"loss": 1.5062,
"step": 20100
},
{
"epoch": 24.46,
"eval_cer": 0.2755995621454871,
"eval_loss": 1.2111254930496216,
"eval_runtime": 42.9059,
"eval_samples_per_second": 51.764,
"eval_steps_per_second": 6.479,
"step": 20100
},
{
"epoch": 24.82,
"learning_rate": 9.169909208819715e-06,
"loss": 1.4816,
"step": 20400
},
{
"epoch": 24.82,
"eval_cer": 0.26345905065180614,
"eval_loss": 1.2007070779800415,
"eval_runtime": 42.6871,
"eval_samples_per_second": 52.03,
"eval_steps_per_second": 6.513,
"step": 20400
},
{
"epoch": 25.19,
"learning_rate": 8.521400778210117e-06,
"loss": 1.4836,
"step": 20700
},
{
"epoch": 25.19,
"eval_cer": 0.27435565727933126,
"eval_loss": 1.2548900842666626,
"eval_runtime": 42.6925,
"eval_samples_per_second": 52.023,
"eval_steps_per_second": 6.512,
"step": 20700
},
{
"epoch": 25.55,
"learning_rate": 7.87289234760052e-06,
"loss": 1.479,
"step": 21000
},
{
"epoch": 25.55,
"eval_cer": 0.26987759976117026,
"eval_loss": 1.1535056829452515,
"eval_runtime": 42.5922,
"eval_samples_per_second": 52.146,
"eval_steps_per_second": 6.527,
"step": 21000
},
{
"epoch": 25.92,
"learning_rate": 7.2243839169909205e-06,
"loss": 1.493,
"step": 21300
},
{
"epoch": 25.92,
"eval_cer": 0.26972833117723155,
"eval_loss": 1.198728084564209,
"eval_runtime": 42.6034,
"eval_samples_per_second": 52.132,
"eval_steps_per_second": 6.525,
"step": 21300
},
{
"epoch": 26.28,
"learning_rate": 6.5758754863813235e-06,
"loss": 1.4524,
"step": 21600
},
{
"epoch": 26.28,
"eval_cer": 0.27470395064185493,
"eval_loss": 1.2245545387268066,
"eval_runtime": 42.9242,
"eval_samples_per_second": 51.742,
"eval_steps_per_second": 6.477,
"step": 21600
},
{
"epoch": 26.65,
"learning_rate": 5.927367055771725e-06,
"loss": 1.4569,
"step": 21900
},
{
"epoch": 26.65,
"eval_cer": 0.2605234351676784,
"eval_loss": 1.1879122257232666,
"eval_runtime": 42.634,
"eval_samples_per_second": 52.095,
"eval_steps_per_second": 6.521,
"step": 21900
},
{
"epoch": 27.01,
"learning_rate": 5.278858625162128e-06,
"loss": 1.4535,
"step": 22200
},
{
"epoch": 27.01,
"eval_cer": 0.27281321524529806,
"eval_loss": 1.2265853881835938,
"eval_runtime": 42.7816,
"eval_samples_per_second": 51.915,
"eval_steps_per_second": 6.498,
"step": 22200
},
{
"epoch": 27.38,
"learning_rate": 4.63035019455253e-06,
"loss": 1.4452,
"step": 22500
},
{
"epoch": 27.38,
"eval_cer": 0.2566424519852722,
"eval_loss": 1.1812487840652466,
"eval_runtime": 42.5285,
"eval_samples_per_second": 52.224,
"eval_steps_per_second": 6.537,
"step": 22500
},
{
"epoch": 27.74,
"learning_rate": 3.981841763942931e-06,
"loss": 1.4513,
"step": 22800
},
{
"epoch": 27.74,
"eval_cer": 0.26286197631605135,
"eval_loss": 1.1672886610031128,
"eval_runtime": 43.0029,
"eval_samples_per_second": 51.648,
"eval_steps_per_second": 6.465,
"step": 22800
},
{
"epoch": 28.11,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.4561,
"step": 23100
},
{
"epoch": 28.11,
"eval_cer": 0.26181709622848043,
"eval_loss": 1.1963270902633667,
"eval_runtime": 42.7473,
"eval_samples_per_second": 51.957,
"eval_steps_per_second": 6.503,
"step": 23100
},
{
"epoch": 28.47,
"learning_rate": 2.6848249027237355e-06,
"loss": 1.4357,
"step": 23400
},
{
"epoch": 28.47,
"eval_cer": 0.26913125684147676,
"eval_loss": 1.201293706893921,
"eval_runtime": 42.6759,
"eval_samples_per_second": 52.043,
"eval_steps_per_second": 6.514,
"step": 23400
},
{
"epoch": 28.84,
"learning_rate": 2.0363164721141376e-06,
"loss": 1.4427,
"step": 23700
},
{
"epoch": 28.84,
"eval_cer": 0.2726141904667131,
"eval_loss": 1.2448346614837646,
"eval_runtime": 42.4761,
"eval_samples_per_second": 52.288,
"eval_steps_per_second": 6.545,
"step": 23700
},
{
"epoch": 29.2,
"learning_rate": 1.3899697362732382e-06,
"loss": 1.4171,
"step": 24000
},
{
"epoch": 29.2,
"eval_cer": 0.26684247188775,
"eval_loss": 1.2063277959823608,
"eval_runtime": 42.7033,
"eval_samples_per_second": 52.01,
"eval_steps_per_second": 6.51,
"step": 24000
},
{
"epoch": 29.57,
"learning_rate": 7.414613056636403e-07,
"loss": 1.4639,
"step": 24300
},
{
"epoch": 29.57,
"eval_cer": 0.26694198427704247,
"eval_loss": 1.2228556871414185,
"eval_runtime": 42.5785,
"eval_samples_per_second": 52.162,
"eval_steps_per_second": 6.529,
"step": 24300
},
{
"epoch": 29.93,
"learning_rate": 9.295287505404236e-08,
"loss": 1.4234,
"step": 24600
},
{
"epoch": 29.93,
"eval_cer": 0.2594785550801075,
"eval_loss": 1.1955249309539795,
"eval_runtime": 43.02,
"eval_samples_per_second": 51.627,
"eval_steps_per_second": 6.462,
"step": 24600
}
],
"logging_steps": 300,
"max_steps": 24630,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 600,
"total_flos": 3.9184197928838064e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}