infinitejoy's picture
End of training
458e515
raw
history blame
13.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 7900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.27,
"learning_rate": 1.3719999999999999e-05,
"loss": 10.7972,
"step": 100
},
{
"epoch": 2.53,
"learning_rate": 2.772e-05,
"loss": 4.2661,
"step": 200
},
{
"epoch": 3.8,
"learning_rate": 4.1719999999999994e-05,
"loss": 3.3145,
"step": 300
},
{
"epoch": 5.06,
"learning_rate": 5.5719999999999995e-05,
"loss": 3.039,
"step": 400
},
{
"epoch": 6.33,
"learning_rate": 6.971999999999999e-05,
"loss": 2.9774,
"step": 500
},
{
"epoch": 6.33,
"eval_loss": 2.9768528938293457,
"eval_runtime": 42.649,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 0.703,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 7.59,
"learning_rate": 6.907297297297297e-05,
"loss": 2.9216,
"step": 600
},
{
"epoch": 8.86,
"learning_rate": 6.812702702702703e-05,
"loss": 2.453,
"step": 700
},
{
"epoch": 10.13,
"learning_rate": 6.718108108108108e-05,
"loss": 1.7073,
"step": 800
},
{
"epoch": 11.39,
"learning_rate": 6.623513513513513e-05,
"loss": 1.4457,
"step": 900
},
{
"epoch": 12.66,
"learning_rate": 6.528918918918918e-05,
"loss": 1.3453,
"step": 1000
},
{
"epoch": 12.66,
"eval_loss": 0.6522914171218872,
"eval_runtime": 42.9729,
"eval_samples_per_second": 22.177,
"eval_steps_per_second": 0.698,
"eval_wer": 0.6979675258317247,
"step": 1000
},
{
"epoch": 13.92,
"learning_rate": 6.434324324324325e-05,
"loss": 1.2867,
"step": 1100
},
{
"epoch": 15.19,
"learning_rate": 6.339729729729728e-05,
"loss": 1.2507,
"step": 1200
},
{
"epoch": 16.46,
"learning_rate": 6.245135135135135e-05,
"loss": 1.21,
"step": 1300
},
{
"epoch": 17.72,
"learning_rate": 6.15054054054054e-05,
"loss": 1.1863,
"step": 1400
},
{
"epoch": 18.99,
"learning_rate": 6.055945945945946e-05,
"loss": 1.1658,
"step": 1500
},
{
"epoch": 18.99,
"eval_loss": 0.5635793805122375,
"eval_runtime": 43.1337,
"eval_samples_per_second": 22.094,
"eval_steps_per_second": 0.696,
"eval_wer": 0.6358578403542636,
"step": 1500
},
{
"epoch": 20.25,
"learning_rate": 5.9613513513513504e-05,
"loss": 1.1476,
"step": 1600
},
{
"epoch": 21.52,
"learning_rate": 5.866756756756756e-05,
"loss": 1.1346,
"step": 1700
},
{
"epoch": 22.78,
"learning_rate": 5.772162162162162e-05,
"loss": 1.1074,
"step": 1800
},
{
"epoch": 24.05,
"learning_rate": 5.677567567567567e-05,
"loss": 1.0892,
"step": 1900
},
{
"epoch": 25.32,
"learning_rate": 5.5829729729729724e-05,
"loss": 1.0797,
"step": 2000
},
{
"epoch": 25.32,
"eval_loss": 0.5003558993339539,
"eval_runtime": 43.4309,
"eval_samples_per_second": 21.943,
"eval_steps_per_second": 0.691,
"eval_wer": 0.5759055296922903,
"step": 2000
},
{
"epoch": 26.58,
"learning_rate": 5.489324324324324e-05,
"loss": 1.0695,
"step": 2100
},
{
"epoch": 27.85,
"learning_rate": 5.394729729729729e-05,
"loss": 1.065,
"step": 2200
},
{
"epoch": 29.11,
"learning_rate": 5.300135135135134e-05,
"loss": 1.0534,
"step": 2300
},
{
"epoch": 30.38,
"learning_rate": 5.20554054054054e-05,
"loss": 1.0323,
"step": 2400
},
{
"epoch": 31.65,
"learning_rate": 5.110945945945946e-05,
"loss": 1.044,
"step": 2500
},
{
"epoch": 31.65,
"eval_loss": 0.49577832221984863,
"eval_runtime": 42.5512,
"eval_samples_per_second": 22.397,
"eval_steps_per_second": 0.705,
"eval_wer": 0.5569433405245827,
"step": 2500
},
{
"epoch": 32.91,
"learning_rate": 5.0163513513513505e-05,
"loss": 1.0187,
"step": 2600
},
{
"epoch": 34.18,
"learning_rate": 4.921756756756756e-05,
"loss": 1.0094,
"step": 2700
},
{
"epoch": 35.44,
"learning_rate": 4.8271621621621615e-05,
"loss": 1.0002,
"step": 2800
},
{
"epoch": 36.71,
"learning_rate": 4.732567567567567e-05,
"loss": 0.9978,
"step": 2900
},
{
"epoch": 37.97,
"learning_rate": 4.6379729729729725e-05,
"loss": 0.9915,
"step": 3000
},
{
"epoch": 37.97,
"eval_loss": 0.49712273478507996,
"eval_runtime": 41.1197,
"eval_samples_per_second": 23.176,
"eval_steps_per_second": 0.73,
"eval_wer": 0.5350289542409447,
"step": 3000
},
{
"epoch": 39.24,
"learning_rate": 4.5433783783783776e-05,
"loss": 0.9786,
"step": 3100
},
{
"epoch": 40.51,
"learning_rate": 4.4487837837837834e-05,
"loss": 0.9819,
"step": 3200
},
{
"epoch": 41.77,
"learning_rate": 4.354189189189189e-05,
"loss": 0.9735,
"step": 3300
},
{
"epoch": 43.04,
"learning_rate": 4.259594594594594e-05,
"loss": 0.9624,
"step": 3400
},
{
"epoch": 44.3,
"learning_rate": 4.1649999999999996e-05,
"loss": 0.9429,
"step": 3500
},
{
"epoch": 44.3,
"eval_loss": 0.48288407921791077,
"eval_runtime": 41.6865,
"eval_samples_per_second": 22.861,
"eval_steps_per_second": 0.72,
"eval_wer": 0.5228795276484615,
"step": 3500
},
{
"epoch": 45.57,
"learning_rate": 4.070405405405405e-05,
"loss": 0.9448,
"step": 3600
},
{
"epoch": 46.84,
"learning_rate": 3.9758108108108106e-05,
"loss": 0.9383,
"step": 3700
},
{
"epoch": 48.1,
"learning_rate": 3.8812162162162164e-05,
"loss": 0.939,
"step": 3800
},
{
"epoch": 49.37,
"learning_rate": 3.786621621621621e-05,
"loss": 0.9322,
"step": 3900
},
{
"epoch": 50.63,
"learning_rate": 3.692027027027027e-05,
"loss": 0.9266,
"step": 4000
},
{
"epoch": 50.63,
"eval_loss": 0.4515039622783661,
"eval_runtime": 41.6834,
"eval_samples_per_second": 22.863,
"eval_steps_per_second": 0.72,
"eval_wer": 0.5074372658112865,
"step": 4000
},
{
"epoch": 51.9,
"learning_rate": 3.5974324324324326e-05,
"loss": 0.9147,
"step": 4100
},
{
"epoch": 53.16,
"learning_rate": 3.502837837837838e-05,
"loss": 0.9129,
"step": 4200
},
{
"epoch": 54.43,
"learning_rate": 3.4091891891891893e-05,
"loss": 0.9071,
"step": 4300
},
{
"epoch": 55.7,
"learning_rate": 3.3145945945945945e-05,
"loss": 0.9098,
"step": 4400
},
{
"epoch": 56.96,
"learning_rate": 3.220945945945946e-05,
"loss": 0.8965,
"step": 4500
},
{
"epoch": 56.96,
"eval_loss": 0.45992422103881836,
"eval_runtime": 41.9829,
"eval_samples_per_second": 22.7,
"eval_steps_per_second": 0.715,
"eval_wer": 0.5039173384807539,
"step": 4500
},
{
"epoch": 58.23,
"learning_rate": 3.126351351351351e-05,
"loss": 0.8992,
"step": 4600
},
{
"epoch": 59.49,
"learning_rate": 3.0327027027027026e-05,
"loss": 0.9006,
"step": 4700
},
{
"epoch": 60.76,
"learning_rate": 2.9381081081081077e-05,
"loss": 0.8931,
"step": 4800
},
{
"epoch": 62.03,
"learning_rate": 2.8435135135135132e-05,
"loss": 0.8846,
"step": 4900
},
{
"epoch": 63.29,
"learning_rate": 2.7489189189189187e-05,
"loss": 0.878,
"step": 5000
},
{
"epoch": 63.29,
"eval_loss": 0.4735390543937683,
"eval_runtime": 41.3368,
"eval_samples_per_second": 23.054,
"eval_steps_per_second": 0.726,
"eval_wer": 0.4954013852617236,
"step": 5000
},
{
"epoch": 64.56,
"learning_rate": 2.6543243243243242e-05,
"loss": 0.8696,
"step": 5100
},
{
"epoch": 65.82,
"learning_rate": 2.5597297297297294e-05,
"loss": 0.8756,
"step": 5200
},
{
"epoch": 67.09,
"learning_rate": 2.465135135135135e-05,
"loss": 0.8588,
"step": 5300
},
{
"epoch": 68.35,
"learning_rate": 2.3705405405405404e-05,
"loss": 0.8616,
"step": 5400
},
{
"epoch": 69.62,
"learning_rate": 2.275945945945946e-05,
"loss": 0.8494,
"step": 5500
},
{
"epoch": 69.62,
"eval_loss": 0.44596442580223083,
"eval_runtime": 41.8864,
"eval_samples_per_second": 22.752,
"eval_steps_per_second": 0.716,
"eval_wer": 0.4877938003860565,
"step": 5500
},
{
"epoch": 70.89,
"learning_rate": 2.181351351351351e-05,
"loss": 0.8535,
"step": 5600
},
{
"epoch": 72.15,
"learning_rate": 2.0867567567567565e-05,
"loss": 0.8511,
"step": 5700
},
{
"epoch": 73.42,
"learning_rate": 1.992162162162162e-05,
"loss": 0.8436,
"step": 5800
},
{
"epoch": 74.68,
"learning_rate": 1.8975675675675675e-05,
"loss": 0.8527,
"step": 5900
},
{
"epoch": 75.95,
"learning_rate": 1.8029729729729727e-05,
"loss": 0.8343,
"step": 6000
},
{
"epoch": 75.95,
"eval_loss": 0.4510088264942169,
"eval_runtime": 41.5079,
"eval_samples_per_second": 22.959,
"eval_steps_per_second": 0.723,
"eval_wer": 0.479504939252867,
"step": 6000
},
{
"epoch": 77.22,
"learning_rate": 1.708378378378378e-05,
"loss": 0.8474,
"step": 6100
},
{
"epoch": 78.48,
"learning_rate": 1.6137837837837837e-05,
"loss": 0.83,
"step": 6200
},
{
"epoch": 79.75,
"learning_rate": 1.5191891891891891e-05,
"loss": 0.8168,
"step": 6300
},
{
"epoch": 81.01,
"learning_rate": 1.4245945945945945e-05,
"loss": 0.8363,
"step": 6400
},
{
"epoch": 82.28,
"learning_rate": 1.33e-05,
"loss": 0.8236,
"step": 6500
},
{
"epoch": 82.28,
"eval_loss": 0.4537811577320099,
"eval_runtime": 40.81,
"eval_samples_per_second": 23.352,
"eval_steps_per_second": 0.735,
"eval_wer": 0.47893720903826503,
"step": 6500
},
{
"epoch": 83.54,
"learning_rate": 1.2354054054054053e-05,
"loss": 0.8189,
"step": 6600
},
{
"epoch": 84.81,
"learning_rate": 1.1408108108108108e-05,
"loss": 0.8121,
"step": 6700
},
{
"epoch": 86.08,
"learning_rate": 1.0462162162162161e-05,
"loss": 0.8098,
"step": 6800
},
{
"epoch": 87.34,
"learning_rate": 9.516216216216216e-06,
"loss": 0.8091,
"step": 6900
},
{
"epoch": 88.61,
"learning_rate": 8.57027027027027e-06,
"loss": 0.8069,
"step": 7000
},
{
"epoch": 88.61,
"eval_loss": 0.45264118909835815,
"eval_runtime": 41.4122,
"eval_samples_per_second": 23.013,
"eval_steps_per_second": 0.724,
"eval_wer": 0.47484955149313046,
"step": 7000
},
{
"epoch": 89.87,
"learning_rate": 7.6243243243243236e-06,
"loss": 0.8042,
"step": 7100
},
{
"epoch": 91.14,
"learning_rate": 6.678378378378378e-06,
"loss": 0.8042,
"step": 7200
},
{
"epoch": 92.41,
"learning_rate": 5.732432432432432e-06,
"loss": 0.7968,
"step": 7300
},
{
"epoch": 93.67,
"learning_rate": 4.786486486486486e-06,
"loss": 0.798,
"step": 7400
},
{
"epoch": 94.94,
"learning_rate": 3.84054054054054e-06,
"loss": 0.7958,
"step": 7500
},
{
"epoch": 94.94,
"eval_loss": 0.449596643447876,
"eval_runtime": 41.0608,
"eval_samples_per_second": 23.209,
"eval_steps_per_second": 0.731,
"eval_wer": 0.4699670716475531,
"step": 7500
},
{
"epoch": 96.2,
"learning_rate": 2.8945945945945945e-06,
"loss": 0.7953,
"step": 7600
},
{
"epoch": 97.47,
"learning_rate": 1.9486486486486487e-06,
"loss": 0.7917,
"step": 7700
},
{
"epoch": 98.73,
"learning_rate": 1.0027027027027026e-06,
"loss": 0.7881,
"step": 7800
},
{
"epoch": 100.0,
"learning_rate": 5.675675675675675e-08,
"loss": 0.795,
"step": 7900
},
{
"epoch": 100.0,
"step": 7900,
"total_flos": 3.986125981994791e+19,
"train_loss": 1.2487838832034341,
"train_runtime": 15247.9542,
"train_samples_per_second": 16.383,
"train_steps_per_second": 0.518
}
],
"max_steps": 7900,
"num_train_epochs": 100,
"total_flos": 3.986125981994791e+19,
"trial_name": null,
"trial_params": null
}