wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
93d2d61 verified
raw
history blame
No virus
15.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.739938080495356,
"eval_steps": 100,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15479876160990713,
"eval_loss": 3.5586698055267334,
"eval_runtime": 163.4718,
"eval_samples_per_second": 34.599,
"eval_steps_per_second": 4.325,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.30959752321981426,
"eval_loss": 3.2505505084991455,
"eval_runtime": 161.1897,
"eval_samples_per_second": 35.089,
"eval_steps_per_second": 4.386,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.46439628482972134,
"eval_loss": 2.774010181427002,
"eval_runtime": 161.9005,
"eval_samples_per_second": 34.935,
"eval_steps_per_second": 4.367,
"eval_wer": 0.9999518544077289,
"step": 300
},
{
"epoch": 0.6191950464396285,
"eval_loss": 1.11959707736969,
"eval_runtime": 162.1425,
"eval_samples_per_second": 34.883,
"eval_steps_per_second": 4.36,
"eval_wer": 0.7807449727977404,
"step": 400
},
{
"epoch": 0.7739938080495357,
"grad_norm": 2.676631212234497,
"learning_rate": 0.00029699999999999996,
"loss": 3.6484,
"step": 500
},
{
"epoch": 0.7739938080495357,
"eval_loss": 0.9134386777877808,
"eval_runtime": 162.7792,
"eval_samples_per_second": 34.746,
"eval_steps_per_second": 4.343,
"eval_wer": 0.6538813371635827,
"step": 500
},
{
"epoch": 0.9287925696594427,
"eval_loss": 0.7674785256385803,
"eval_runtime": 162.7057,
"eval_samples_per_second": 34.762,
"eval_steps_per_second": 4.345,
"eval_wer": 0.5923191731796954,
"step": 600
},
{
"epoch": 1.08359133126935,
"eval_loss": 0.7207810282707214,
"eval_runtime": 163.9828,
"eval_samples_per_second": 34.491,
"eval_steps_per_second": 4.311,
"eval_wer": 0.5289595737510231,
"step": 700
},
{
"epoch": 1.238390092879257,
"eval_loss": 0.6209472417831421,
"eval_runtime": 163.6218,
"eval_samples_per_second": 34.568,
"eval_steps_per_second": 4.321,
"eval_wer": 0.4744748118309769,
"step": 800
},
{
"epoch": 1.3931888544891642,
"eval_loss": 0.6220189332962036,
"eval_runtime": 162.6081,
"eval_samples_per_second": 34.783,
"eval_steps_per_second": 4.348,
"eval_wer": 0.47879186660461237,
"step": 900
},
{
"epoch": 1.5479876160990713,
"grad_norm": 0.3950090706348419,
"learning_rate": 0.0002672,
"loss": 0.6286,
"step": 1000
},
{
"epoch": 1.5479876160990713,
"eval_loss": 0.5738953351974487,
"eval_runtime": 162.8483,
"eval_samples_per_second": 34.732,
"eval_steps_per_second": 4.341,
"eval_wer": 0.4588114458121359,
"step": 1000
},
{
"epoch": 1.7027863777089784,
"eval_loss": 0.564153790473938,
"eval_runtime": 164.0752,
"eval_samples_per_second": 34.472,
"eval_steps_per_second": 4.309,
"eval_wer": 0.4262008313138932,
"step": 1100
},
{
"epoch": 1.8575851393188856,
"eval_loss": 0.5511888265609741,
"eval_runtime": 164.5846,
"eval_samples_per_second": 34.365,
"eval_steps_per_second": 4.296,
"eval_wer": 0.42080852497953813,
"step": 1200
},
{
"epoch": 2.0123839009287927,
"eval_loss": 0.527522623538971,
"eval_runtime": 162.1572,
"eval_samples_per_second": 34.88,
"eval_steps_per_second": 4.36,
"eval_wer": 0.38652886328256647,
"step": 1300
},
{
"epoch": 2.1671826625387,
"eval_loss": 0.4955059587955475,
"eval_runtime": 160.6015,
"eval_samples_per_second": 35.218,
"eval_steps_per_second": 4.402,
"eval_wer": 0.37545537706023013,
"step": 1400
},
{
"epoch": 2.321981424148607,
"grad_norm": 0.4153783619403839,
"learning_rate": 0.00023393333333333332,
"loss": 0.4816,
"step": 1500
},
{
"epoch": 2.321981424148607,
"eval_loss": 0.4908938407897949,
"eval_runtime": 160.2948,
"eval_samples_per_second": 35.285,
"eval_steps_per_second": 4.411,
"eval_wer": 0.37325672834651985,
"step": 1500
},
{
"epoch": 2.476780185758514,
"eval_loss": 0.4982919991016388,
"eval_runtime": 162.1379,
"eval_samples_per_second": 34.884,
"eval_steps_per_second": 4.36,
"eval_wer": 0.37279132095456663,
"step": 1600
},
{
"epoch": 2.6315789473684212,
"eval_loss": 0.48909762501716614,
"eval_runtime": 161.7559,
"eval_samples_per_second": 34.966,
"eval_steps_per_second": 4.371,
"eval_wer": 0.36550528799088444,
"step": 1700
},
{
"epoch": 2.7863777089783284,
"eval_loss": 0.47961312532424927,
"eval_runtime": 160.2886,
"eval_samples_per_second": 35.286,
"eval_steps_per_second": 4.411,
"eval_wer": 0.3570958578742116,
"step": 1800
},
{
"epoch": 2.9411764705882355,
"eval_loss": 0.46432051062583923,
"eval_runtime": 162.1185,
"eval_samples_per_second": 34.888,
"eval_steps_per_second": 4.361,
"eval_wer": 0.3591982154033798,
"step": 1900
},
{
"epoch": 3.0959752321981426,
"grad_norm": 0.8931769728660583,
"learning_rate": 0.00020079999999999997,
"loss": 0.4017,
"step": 2000
},
{
"epoch": 3.0959752321981426,
"eval_loss": 0.5084750652313232,
"eval_runtime": 162.3398,
"eval_samples_per_second": 34.841,
"eval_steps_per_second": 4.355,
"eval_wer": 0.3697902457030059,
"step": 2000
},
{
"epoch": 3.2507739938080498,
"eval_loss": 0.6755269169807434,
"eval_runtime": 163.0508,
"eval_samples_per_second": 34.689,
"eval_steps_per_second": 4.336,
"eval_wer": 0.4530018776780986,
"step": 2100
},
{
"epoch": 3.405572755417957,
"eval_loss": 0.710012674331665,
"eval_runtime": 161.8251,
"eval_samples_per_second": 34.951,
"eval_steps_per_second": 4.369,
"eval_wer": 0.5108247339956027,
"step": 2200
},
{
"epoch": 3.560371517027864,
"eval_loss": 0.8310704231262207,
"eval_runtime": 162.1264,
"eval_samples_per_second": 34.886,
"eval_steps_per_second": 4.361,
"eval_wer": 0.5642984384779574,
"step": 2300
},
{
"epoch": 3.715170278637771,
"eval_loss": 0.7031980156898499,
"eval_runtime": 166.0393,
"eval_samples_per_second": 34.064,
"eval_steps_per_second": 4.258,
"eval_wer": 0.5028807112708832,
"step": 2400
},
{
"epoch": 3.8699690402476783,
"grad_norm": 4.817399024963379,
"learning_rate": 0.00016766666666666666,
"loss": 0.6839,
"step": 2500
},
{
"epoch": 3.8699690402476783,
"eval_loss": 0.7070674896240234,
"eval_runtime": 164.0683,
"eval_samples_per_second": 34.473,
"eval_steps_per_second": 4.309,
"eval_wer": 0.5006660140264159,
"step": 2500
},
{
"epoch": 4.024767801857585,
"eval_loss": 0.8223607540130615,
"eval_runtime": 164.3114,
"eval_samples_per_second": 34.422,
"eval_steps_per_second": 4.303,
"eval_wer": 0.5069409895524065,
"step": 2600
},
{
"epoch": 4.179566563467493,
"eval_loss": 0.8343736529350281,
"eval_runtime": 163.8342,
"eval_samples_per_second": 34.523,
"eval_steps_per_second": 4.315,
"eval_wer": 0.5162491373914718,
"step": 2700
},
{
"epoch": 4.3343653250774,
"eval_loss": 0.9089197516441345,
"eval_runtime": 162.2031,
"eval_samples_per_second": 34.87,
"eval_steps_per_second": 4.359,
"eval_wer": 0.5620195471104621,
"step": 2800
},
{
"epoch": 4.489164086687307,
"eval_loss": 0.9664539098739624,
"eval_runtime": 163.3849,
"eval_samples_per_second": 34.618,
"eval_steps_per_second": 4.327,
"eval_wer": 0.5640095649243312,
"step": 2900
},
{
"epoch": 4.643962848297214,
"grad_norm": 7.185929775238037,
"learning_rate": 0.00013446666666666666,
"loss": 0.8292,
"step": 3000
},
{
"epoch": 4.643962848297214,
"eval_loss": 0.9127740859985352,
"eval_runtime": 162.7094,
"eval_samples_per_second": 34.761,
"eval_steps_per_second": 4.345,
"eval_wer": 0.5414774277414902,
"step": 3000
},
{
"epoch": 4.798761609907121,
"eval_loss": 1.1924536228179932,
"eval_runtime": 161.8545,
"eval_samples_per_second": 34.945,
"eval_steps_per_second": 4.368,
"eval_wer": 0.5938598321323683,
"step": 3100
},
{
"epoch": 4.953560371517028,
"eval_loss": 1.4327375888824463,
"eval_runtime": 167.5589,
"eval_samples_per_second": 33.755,
"eval_steps_per_second": 4.219,
"eval_wer": 0.6999406204361991,
"step": 3200
},
{
"epoch": 5.108359133126935,
"eval_loss": 1.2741221189498901,
"eval_runtime": 166.0465,
"eval_samples_per_second": 34.063,
"eval_steps_per_second": 4.258,
"eval_wer": 0.7826707964885815,
"step": 3300
},
{
"epoch": 5.2631578947368425,
"eval_loss": 1.9348175525665283,
"eval_runtime": 163.4154,
"eval_samples_per_second": 34.611,
"eval_steps_per_second": 4.326,
"eval_wer": 0.8741795188650479,
"step": 3400
},
{
"epoch": 5.41795665634675,
"grad_norm": 0.6057500839233398,
"learning_rate": 0.00010126666666666666,
"loss": 1.4131,
"step": 3500
},
{
"epoch": 5.41795665634675,
"eval_loss": 1.9216177463531494,
"eval_runtime": 161.6687,
"eval_samples_per_second": 34.985,
"eval_steps_per_second": 4.373,
"eval_wer": 0.9870167386175795,
"step": 3500
},
{
"epoch": 5.572755417956657,
"eval_loss": 1.8565247058868408,
"eval_runtime": 164.0672,
"eval_samples_per_second": 34.474,
"eval_steps_per_second": 4.309,
"eval_wer": 0.9367045946943557,
"step": 3600
},
{
"epoch": 5.727554179566564,
"eval_loss": 1.7827845811843872,
"eval_runtime": 163.0587,
"eval_samples_per_second": 34.687,
"eval_steps_per_second": 4.336,
"eval_wer": 0.8240278602493941,
"step": 3700
},
{
"epoch": 5.882352941176471,
"eval_loss": 1.6846531629562378,
"eval_runtime": 162.8654,
"eval_samples_per_second": 34.728,
"eval_steps_per_second": 4.341,
"eval_wer": 0.8059090690247308,
"step": 3800
},
{
"epoch": 6.037151702786378,
"eval_loss": 1.6440324783325195,
"eval_runtime": 163.0292,
"eval_samples_per_second": 34.693,
"eval_steps_per_second": 4.337,
"eval_wer": 0.7983823080996935,
"step": 3900
},
{
"epoch": 6.191950464396285,
"grad_norm": 0.5905019640922546,
"learning_rate": 6.806666666666666e-05,
"loss": 1.7728,
"step": 4000
},
{
"epoch": 6.191950464396285,
"eval_loss": 1.6765395402908325,
"eval_runtime": 166.4929,
"eval_samples_per_second": 33.971,
"eval_steps_per_second": 4.246,
"eval_wer": 0.8053313219174785,
"step": 4000
},
{
"epoch": 6.346749226006192,
"eval_loss": 1.6733070611953735,
"eval_runtime": 163.1125,
"eval_samples_per_second": 34.675,
"eval_steps_per_second": 4.334,
"eval_wer": 0.8024265378504598,
"step": 4100
},
{
"epoch": 6.5015479876160995,
"eval_loss": 1.6601324081420898,
"eval_runtime": 163.6995,
"eval_samples_per_second": 34.551,
"eval_steps_per_second": 4.319,
"eval_wer": 0.7899568294522636,
"step": 4200
},
{
"epoch": 6.656346749226007,
"eval_loss": 1.6604827642440796,
"eval_runtime": 162.8495,
"eval_samples_per_second": 34.731,
"eval_steps_per_second": 4.341,
"eval_wer": 0.7972910080082168,
"step": 4300
},
{
"epoch": 6.811145510835914,
"eval_loss": 1.6598834991455078,
"eval_runtime": 163.8109,
"eval_samples_per_second": 34.528,
"eval_steps_per_second": 4.316,
"eval_wer": 0.7804560992441142,
"step": 4400
},
{
"epoch": 6.965944272445821,
"grad_norm": 0.9840993881225586,
"learning_rate": 3.493333333333333e-05,
"loss": 1.6777,
"step": 4500
},
{
"epoch": 6.965944272445821,
"eval_loss": 1.635949730873108,
"eval_runtime": 163.8566,
"eval_samples_per_second": 34.518,
"eval_steps_per_second": 4.315,
"eval_wer": 0.7693184188987499,
"step": 4500
},
{
"epoch": 7.120743034055727,
"eval_loss": 1.6399564743041992,
"eval_runtime": 164.6612,
"eval_samples_per_second": 34.349,
"eval_steps_per_second": 4.294,
"eval_wer": 0.7651137038404134,
"step": 4600
},
{
"epoch": 7.275541795665634,
"eval_loss": 1.6759086847305298,
"eval_runtime": 161.5503,
"eval_samples_per_second": 35.011,
"eval_steps_per_second": 4.376,
"eval_wer": 0.7672481584310956,
"step": 4700
},
{
"epoch": 7.430340557275541,
"eval_loss": 1.6848710775375366,
"eval_runtime": 162.0136,
"eval_samples_per_second": 34.911,
"eval_steps_per_second": 4.364,
"eval_wer": 0.7686283320761984,
"step": 4800
},
{
"epoch": 7.585139318885449,
"eval_loss": 1.6857717037200928,
"eval_runtime": 161.3723,
"eval_samples_per_second": 35.049,
"eval_steps_per_second": 4.381,
"eval_wer": 0.7689974482836096,
"step": 4900
},
{
"epoch": 7.739938080495356,
"grad_norm": 1.753746747970581,
"learning_rate": 1.8e-06,
"loss": 1.683,
"step": 5000
},
{
"epoch": 7.739938080495356,
"eval_loss": 1.6955511569976807,
"eval_runtime": 160.8529,
"eval_samples_per_second": 35.163,
"eval_steps_per_second": 4.395,
"eval_wer": 0.7701529424981143,
"step": 5000
},
{
"epoch": 7.739938080495356,
"step": 5000,
"total_flos": 4.528229347801944e+19,
"train_loss": 1.3220068420410156,
"train_runtime": 20543.6538,
"train_samples_per_second": 15.577,
"train_steps_per_second": 0.243
}
],
"logging_steps": 500,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 400,
"total_flos": 4.528229347801944e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}