xls-r-uyghur-cv7 / .ipynb_checkpoints /trainer_state-checkpoint.json
lucio's picture
Training in progress, step 500
20d6d7c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 9150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.55,
"learning_rate": 3.7125e-06,
"loss": 14.9109,
"step": 100
},
{
"epoch": 1.09,
"learning_rate": 7.4625e-06,
"loss": 7.5771,
"step": 200
},
{
"epoch": 1.64,
"learning_rate": 1.1212499999999998e-05,
"loss": 4.939,
"step": 300
},
{
"epoch": 2.19,
"learning_rate": 1.49625e-05,
"loss": 4.1873,
"step": 400
},
{
"epoch": 2.73,
"learning_rate": 1.8712499999999997e-05,
"loss": 3.6863,
"step": 500
},
{
"epoch": 2.73,
"eval_loss": 3.5362327098846436,
"eval_runtime": 132.4771,
"eval_samples_per_second": 19.777,
"eval_steps_per_second": 2.476,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 3.28,
"learning_rate": 2.2462499999999997e-05,
"loss": 3.3718,
"step": 600
},
{
"epoch": 3.83,
"learning_rate": 2.6212499999999997e-05,
"loss": 3.2608,
"step": 700
},
{
"epoch": 4.37,
"learning_rate": 2.99625e-05,
"loss": 3.2251,
"step": 800
},
{
"epoch": 4.92,
"learning_rate": 3.37125e-05,
"loss": 3.1741,
"step": 900
},
{
"epoch": 5.46,
"learning_rate": 3.7462499999999996e-05,
"loss": 3.1409,
"step": 1000
},
{
"epoch": 5.46,
"eval_loss": 3.132805347442627,
"eval_runtime": 132.2993,
"eval_samples_per_second": 19.804,
"eval_steps_per_second": 2.479,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 6.01,
"learning_rate": 4.12125e-05,
"loss": 3.1047,
"step": 1100
},
{
"epoch": 6.56,
"learning_rate": 4.4962499999999995e-05,
"loss": 2.8994,
"step": 1200
},
{
"epoch": 7.1,
"learning_rate": 4.871249999999999e-05,
"loss": 2.5085,
"step": 1300
},
{
"epoch": 7.65,
"learning_rate": 5.2462499999999994e-05,
"loss": 2.1499,
"step": 1400
},
{
"epoch": 8.2,
"learning_rate": 5.6175e-05,
"loss": 1.8979,
"step": 1500
},
{
"epoch": 8.2,
"eval_loss": 0.9715010523796082,
"eval_runtime": 131.829,
"eval_samples_per_second": 19.874,
"eval_steps_per_second": 2.488,
"eval_wer": 0.8863658371259805,
"step": 1500
},
{
"epoch": 8.74,
"learning_rate": 5.9925e-05,
"loss": 1.7702,
"step": 1600
},
{
"epoch": 9.29,
"learning_rate": 6.367499999999999e-05,
"loss": 1.6541,
"step": 1700
},
{
"epoch": 9.84,
"learning_rate": 6.738749999999999e-05,
"loss": 1.6072,
"step": 1800
},
{
"epoch": 10.38,
"learning_rate": 7.11375e-05,
"loss": 1.5188,
"step": 1900
},
{
"epoch": 10.93,
"learning_rate": 7.48875e-05,
"loss": 1.4859,
"step": 2000
},
{
"epoch": 10.93,
"eval_loss": 0.5233765244483948,
"eval_runtime": 130.5844,
"eval_samples_per_second": 20.064,
"eval_steps_per_second": 2.512,
"eval_wer": 0.7062554468868016,
"step": 2000
},
{
"epoch": 11.48,
"learning_rate": 7.398251748251747e-05,
"loss": 1.4386,
"step": 2100
},
{
"epoch": 12.02,
"learning_rate": 7.293356643356642e-05,
"loss": 1.4333,
"step": 2200
},
{
"epoch": 12.57,
"learning_rate": 7.188461538461538e-05,
"loss": 1.3923,
"step": 2300
},
{
"epoch": 13.11,
"learning_rate": 7.083566433566433e-05,
"loss": 1.3623,
"step": 2400
},
{
"epoch": 13.66,
"learning_rate": 6.978671328671328e-05,
"loss": 1.3388,
"step": 2500
},
{
"epoch": 13.66,
"eval_loss": 0.4093627333641052,
"eval_runtime": 131.4899,
"eval_samples_per_second": 19.925,
"eval_steps_per_second": 2.494,
"eval_wer": 0.6203156773506343,
"step": 2500
},
{
"epoch": 14.21,
"learning_rate": 6.873776223776224e-05,
"loss": 1.3338,
"step": 2600
},
{
"epoch": 14.75,
"learning_rate": 6.768881118881118e-05,
"loss": 1.3039,
"step": 2700
},
{
"epoch": 15.3,
"learning_rate": 6.663986013986013e-05,
"loss": 1.2935,
"step": 2800
},
{
"epoch": 15.85,
"learning_rate": 6.559090909090908e-05,
"loss": 1.2818,
"step": 2900
},
{
"epoch": 16.39,
"learning_rate": 6.454195804195804e-05,
"loss": 1.2531,
"step": 3000
},
{
"epoch": 16.39,
"eval_loss": 0.3596232533454895,
"eval_runtime": 131.7209,
"eval_samples_per_second": 19.891,
"eval_steps_per_second": 2.49,
"eval_wer": 0.5184952067396146,
"step": 3000
},
{
"epoch": 16.94,
"learning_rate": 6.349300699300699e-05,
"loss": 1.2531,
"step": 3100
},
{
"epoch": 17.49,
"learning_rate": 6.244405594405593e-05,
"loss": 1.2282,
"step": 3200
},
{
"epoch": 18.03,
"learning_rate": 6.139510489510488e-05,
"loss": 1.2088,
"step": 3300
},
{
"epoch": 18.58,
"learning_rate": 6.034615384615384e-05,
"loss": 1.2041,
"step": 3400
},
{
"epoch": 19.13,
"learning_rate": 5.9297202797202796e-05,
"loss": 1.1992,
"step": 3500
},
{
"epoch": 19.13,
"eval_loss": 0.32211676239967346,
"eval_runtime": 130.9541,
"eval_samples_per_second": 20.007,
"eval_steps_per_second": 2.505,
"eval_wer": 0.4854265517575288,
"step": 3500
},
{
"epoch": 19.67,
"learning_rate": 5.8248251748251737e-05,
"loss": 1.1776,
"step": 3600
},
{
"epoch": 20.22,
"learning_rate": 5.719930069930069e-05,
"loss": 1.1815,
"step": 3700
},
{
"epoch": 20.77,
"learning_rate": 5.6160839160839156e-05,
"loss": 1.1624,
"step": 3800
},
{
"epoch": 21.31,
"learning_rate": 5.511188811188811e-05,
"loss": 1.1576,
"step": 3900
},
{
"epoch": 21.86,
"learning_rate": 5.4062937062937064e-05,
"loss": 1.1589,
"step": 4000
},
{
"epoch": 21.86,
"eval_loss": 0.30402758717536926,
"eval_runtime": 130.3078,
"eval_samples_per_second": 20.106,
"eval_steps_per_second": 2.517,
"eval_wer": 0.4610244988864143,
"step": 4000
},
{
"epoch": 22.4,
"learning_rate": 5.3013986013986004e-05,
"loss": 1.1705,
"step": 4100
},
{
"epoch": 22.95,
"learning_rate": 5.196503496503496e-05,
"loss": 1.1486,
"step": 4200
},
{
"epoch": 23.5,
"learning_rate": 5.091608391608391e-05,
"loss": 1.1266,
"step": 4300
},
{
"epoch": 24.04,
"learning_rate": 4.9867132867132866e-05,
"loss": 1.1348,
"step": 4400
},
{
"epoch": 24.59,
"learning_rate": 4.8818181818181813e-05,
"loss": 1.1345,
"step": 4500
},
{
"epoch": 24.59,
"eval_loss": 0.2906973361968994,
"eval_runtime": 132.6128,
"eval_samples_per_second": 19.757,
"eval_steps_per_second": 2.473,
"eval_wer": 0.4449501307252832,
"step": 4500
},
{
"epoch": 25.14,
"learning_rate": 4.776923076923077e-05,
"loss": 1.108,
"step": 4600
},
{
"epoch": 25.68,
"learning_rate": 4.672027972027972e-05,
"loss": 1.1114,
"step": 4700
},
{
"epoch": 26.23,
"learning_rate": 4.567132867132866e-05,
"loss": 1.1071,
"step": 4800
},
{
"epoch": 26.78,
"learning_rate": 4.4622377622377616e-05,
"loss": 1.1009,
"step": 4900
},
{
"epoch": 27.32,
"learning_rate": 4.357342657342657e-05,
"loss": 1.086,
"step": 5000
},
{
"epoch": 27.32,
"eval_loss": 0.27438458800315857,
"eval_runtime": 131.0041,
"eval_samples_per_second": 19.999,
"eval_steps_per_second": 2.504,
"eval_wer": 0.4298925147671153,
"step": 5000
},
{
"epoch": 27.87,
"learning_rate": 4.2524475524475524e-05,
"loss": 1.0843,
"step": 5100
},
{
"epoch": 28.42,
"learning_rate": 4.147552447552447e-05,
"loss": 1.0761,
"step": 5200
},
{
"epoch": 28.96,
"learning_rate": 4.0426573426573425e-05,
"loss": 1.0779,
"step": 5300
},
{
"epoch": 29.51,
"learning_rate": 3.937762237762237e-05,
"loss": 1.074,
"step": 5400
},
{
"epoch": 30.05,
"learning_rate": 3.832867132867133e-05,
"loss": 1.0697,
"step": 5500
},
{
"epoch": 30.05,
"eval_loss": 0.2617148756980896,
"eval_runtime": 131.2229,
"eval_samples_per_second": 19.966,
"eval_steps_per_second": 2.5,
"eval_wer": 0.4147864820373777,
"step": 5500
},
{
"epoch": 30.6,
"learning_rate": 3.727972027972028e-05,
"loss": 1.0599,
"step": 5600
},
{
"epoch": 31.15,
"learning_rate": 3.623076923076923e-05,
"loss": 1.0666,
"step": 5700
},
{
"epoch": 31.69,
"learning_rate": 3.518181818181818e-05,
"loss": 1.0574,
"step": 5800
},
{
"epoch": 32.24,
"learning_rate": 3.413286713286713e-05,
"loss": 1.0534,
"step": 5900
},
{
"epoch": 32.79,
"learning_rate": 3.308391608391608e-05,
"loss": 1.0518,
"step": 6000
},
{
"epoch": 32.79,
"eval_loss": 0.25627899169921875,
"eval_runtime": 130.5236,
"eval_samples_per_second": 20.073,
"eval_steps_per_second": 2.513,
"eval_wer": 0.4033117071753655,
"step": 6000
},
{
"epoch": 33.33,
"learning_rate": 3.204545454545454e-05,
"loss": 1.0304,
"step": 6100
},
{
"epoch": 33.88,
"learning_rate": 3.0996503496503495e-05,
"loss": 1.0336,
"step": 6200
},
{
"epoch": 34.43,
"learning_rate": 2.9947552447552443e-05,
"loss": 1.0289,
"step": 6300
},
{
"epoch": 34.97,
"learning_rate": 2.8898601398601397e-05,
"loss": 1.027,
"step": 6400
},
{
"epoch": 35.52,
"learning_rate": 2.7849650349650347e-05,
"loss": 1.0101,
"step": 6500
},
{
"epoch": 35.52,
"eval_loss": 0.24797162413597107,
"eval_runtime": 130.8457,
"eval_samples_per_second": 20.024,
"eval_steps_per_second": 2.507,
"eval_wer": 0.3934346857751525,
"step": 6500
},
{
"epoch": 36.07,
"learning_rate": 2.6800699300699298e-05,
"loss": 1.0278,
"step": 6600
},
{
"epoch": 36.61,
"learning_rate": 2.575174825174825e-05,
"loss": 0.9947,
"step": 6700
},
{
"epoch": 37.16,
"learning_rate": 2.4702797202797203e-05,
"loss": 1.0011,
"step": 6800
},
{
"epoch": 37.7,
"learning_rate": 2.365384615384615e-05,
"loss": 1.0041,
"step": 6900
},
{
"epoch": 38.25,
"learning_rate": 2.2604895104895104e-05,
"loss": 1.0013,
"step": 7000
},
{
"epoch": 38.25,
"eval_loss": 0.2412029653787613,
"eval_runtime": 130.9316,
"eval_samples_per_second": 20.01,
"eval_steps_per_second": 2.505,
"eval_wer": 0.38549433523772636,
"step": 7000
},
{
"epoch": 38.8,
"learning_rate": 2.1555944055944055e-05,
"loss": 0.9881,
"step": 7100
},
{
"epoch": 39.34,
"learning_rate": 2.0506993006993005e-05,
"loss": 0.9897,
"step": 7200
},
{
"epoch": 39.89,
"learning_rate": 1.9458041958041956e-05,
"loss": 0.9853,
"step": 7300
},
{
"epoch": 40.44,
"learning_rate": 1.8409090909090907e-05,
"loss": 0.9873,
"step": 7400
},
{
"epoch": 40.98,
"learning_rate": 1.7360139860139857e-05,
"loss": 0.9845,
"step": 7500
},
{
"epoch": 40.98,
"eval_loss": 0.23970751464366913,
"eval_runtime": 131.9142,
"eval_samples_per_second": 19.861,
"eval_steps_per_second": 2.486,
"eval_wer": 0.37711823375617315,
"step": 7500
},
{
"epoch": 41.53,
"learning_rate": 1.631118881118881e-05,
"loss": 0.9802,
"step": 7600
},
{
"epoch": 42.08,
"learning_rate": 1.5262237762237762e-05,
"loss": 0.9856,
"step": 7700
},
{
"epoch": 42.62,
"learning_rate": 1.4213286713286713e-05,
"loss": 0.9738,
"step": 7800
},
{
"epoch": 43.17,
"learning_rate": 1.3164335664335663e-05,
"loss": 0.9703,
"step": 7900
},
{
"epoch": 43.72,
"learning_rate": 1.2115384615384615e-05,
"loss": 0.9739,
"step": 8000
},
{
"epoch": 43.72,
"eval_loss": 0.2302592694759369,
"eval_runtime": 132.2355,
"eval_samples_per_second": 19.813,
"eval_steps_per_second": 2.48,
"eval_wer": 0.3726154740001937,
"step": 8000
},
{
"epoch": 44.26,
"learning_rate": 1.1066433566433566e-05,
"loss": 0.9687,
"step": 8100
},
{
"epoch": 44.81,
"learning_rate": 1.0038461538461537e-05,
"loss": 0.9697,
"step": 8200
},
{
"epoch": 45.36,
"learning_rate": 8.98951048951049e-06,
"loss": 0.9671,
"step": 8300
},
{
"epoch": 45.9,
"learning_rate": 7.94055944055944e-06,
"loss": 0.9578,
"step": 8400
},
{
"epoch": 46.45,
"learning_rate": 6.891608391608391e-06,
"loss": 0.9636,
"step": 8500
},
{
"epoch": 46.45,
"eval_loss": 0.22845281660556793,
"eval_runtime": 132.3979,
"eval_samples_per_second": 19.789,
"eval_steps_per_second": 2.477,
"eval_wer": 0.36869371550305025,
"step": 8500
},
{
"epoch": 46.99,
"learning_rate": 5.842657342657342e-06,
"loss": 0.955,
"step": 8600
},
{
"epoch": 47.54,
"learning_rate": 4.7937062937062936e-06,
"loss": 0.9495,
"step": 8700
},
{
"epoch": 48.09,
"learning_rate": 3.7447552447552446e-06,
"loss": 0.9612,
"step": 8800
},
{
"epoch": 48.63,
"learning_rate": 2.6958041958041957e-06,
"loss": 0.9415,
"step": 8900
},
{
"epoch": 49.18,
"learning_rate": 1.6468531468531468e-06,
"loss": 0.9466,
"step": 9000
},
{
"epoch": 49.18,
"eval_loss": 0.2261359691619873,
"eval_runtime": 131.8096,
"eval_samples_per_second": 19.877,
"eval_steps_per_second": 2.488,
"eval_wer": 0.36477195700590687,
"step": 9000
},
{
"epoch": 49.73,
"learning_rate": 5.979020979020978e-07,
"loss": 0.9533,
"step": 9100
},
{
"epoch": 50.0,
"step": 9150,
"total_flos": 5.14420411350113e+19,
"train_loss": 1.6428977303426773,
"train_runtime": 23684.6778,
"train_samples_per_second": 12.36,
"train_steps_per_second": 0.386
}
],
"max_steps": 9150,
"num_train_epochs": 50,
"total_flos": 5.14420411350113e+19,
"trial_name": null,
"trial_params": null
}