|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 11250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4299999999999998e-06, |
|
"loss": 12.3928, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.93e-06, |
|
"loss": 7.13, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.0429999999999998e-05, |
|
"loss": 4.5055, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.3929999999999999e-05, |
|
"loss": 3.8494, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.7429999999999997e-05, |
|
"loss": 3.4447, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.0929999999999998e-05, |
|
"loss": 3.2021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.4429999999999995e-05, |
|
"loss": 3.0761, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.793e-05, |
|
"loss": 3.0165, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.1429999999999996e-05, |
|
"loss": 2.9623, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.493e-05, |
|
"loss": 2.9094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.843e-05, |
|
"loss": 2.8831, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.192999999999999e-05, |
|
"loss": 2.8667, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 4.543e-05, |
|
"loss": 2.7722, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 4.8929999999999994e-05, |
|
"loss": 2.2496, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 5.243e-05, |
|
"loss": 1.7213, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 5.593e-05, |
|
"loss": 1.5195, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 5.942999999999999e-05, |
|
"loss": 1.3847, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.293e-05, |
|
"loss": 1.3296, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 6.642999999999999e-05, |
|
"loss": 1.2533, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 6.992999999999999e-05, |
|
"loss": 1.1973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 0.44813069701194763, |
|
"eval_runtime": 151.5294, |
|
"eval_samples_per_second": 20.933, |
|
"eval_steps_per_second": 20.933, |
|
"eval_wer": 0.4849491021888243, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 6.925837837837837e-05, |
|
"loss": 1.08, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.850162162162162e-05, |
|
"loss": 0.9484, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 6.774486486486486e-05, |
|
"loss": 0.8818, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 6.698810810810811e-05, |
|
"loss": 0.8043, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 6.623135135135134e-05, |
|
"loss": 0.7752, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 6.547459459459459e-05, |
|
"loss": 0.7497, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 6.471783783783783e-05, |
|
"loss": 0.7486, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 6.396108108108108e-05, |
|
"loss": 0.7016, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 6.320432432432433e-05, |
|
"loss": 0.6927, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 6.244756756756756e-05, |
|
"loss": 0.6609, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 6.169081081081081e-05, |
|
"loss": 0.6633, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 6.093405405405405e-05, |
|
"loss": 0.6781, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 6.017729729729729e-05, |
|
"loss": 0.6486, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 5.942054054054054e-05, |
|
"loss": 0.6217, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 5.866378378378378e-05, |
|
"loss": 0.6348, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5.7907027027027026e-05, |
|
"loss": 0.6555, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 5.715027027027027e-05, |
|
"loss": 0.6179, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 5.639351351351351e-05, |
|
"loss": 0.6116, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 5.5636756756756754e-05, |
|
"loss": 0.586, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 5.4879999999999996e-05, |
|
"loss": 0.6005, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"eval_loss": 0.1420038342475891, |
|
"eval_runtime": 156.5701, |
|
"eval_samples_per_second": 20.259, |
|
"eval_steps_per_second": 20.259, |
|
"eval_wer": 0.17772729258595832, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 5.412324324324324e-05, |
|
"loss": 0.6199, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 5.336648648648648e-05, |
|
"loss": 0.6017, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 5.2609729729729724e-05, |
|
"loss": 0.5722, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 5.1852972972972974e-05, |
|
"loss": 0.5755, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.1096216216216216e-05, |
|
"loss": 0.6083, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"learning_rate": 5.033945945945946e-05, |
|
"loss": 0.5677, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"learning_rate": 4.9590270270270266e-05, |
|
"loss": 0.5652, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 4.8833513513513516e-05, |
|
"loss": 0.5509, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 4.807675675675676e-05, |
|
"loss": 0.5526, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 4.732e-05, |
|
"loss": 0.5589, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 4.6563243243243244e-05, |
|
"loss": 0.5394, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 4.5806486486486486e-05, |
|
"loss": 0.5329, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 4.504972972972973e-05, |
|
"loss": 0.5353, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.429297297297297e-05, |
|
"loss": 0.5643, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 4.3536216216216214e-05, |
|
"loss": 0.537, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 4.277945945945946e-05, |
|
"loss": 0.5502, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 4.20227027027027e-05, |
|
"loss": 0.5126, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"learning_rate": 4.126594594594594e-05, |
|
"loss": 0.5315, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 4.050918918918919e-05, |
|
"loss": 0.5424, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3.9752432432432434e-05, |
|
"loss": 0.5248, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_loss": 0.13026614487171173, |
|
"eval_runtime": 153.4664, |
|
"eval_samples_per_second": 20.669, |
|
"eval_steps_per_second": 20.669, |
|
"eval_wer": 0.16505745117742146, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 27.11, |
|
"learning_rate": 3.8995675675675676e-05, |
|
"loss": 0.5111, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 3.823891891891892e-05, |
|
"loss": 0.5226, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 3.748216216216216e-05, |
|
"loss": 0.5335, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 3.6725405405405404e-05, |
|
"loss": 0.5031, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 3.596864864864865e-05, |
|
"loss": 0.5219, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 3.521189189189189e-05, |
|
"loss": 0.4853, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 3.445513513513513e-05, |
|
"loss": 0.5062, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"learning_rate": 3.370594594594594e-05, |
|
"loss": 0.5395, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"learning_rate": 3.294918918918919e-05, |
|
"loss": 0.4876, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 3.219243243243243e-05, |
|
"loss": 0.4981, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 31.56, |
|
"learning_rate": 3.1435675675675674e-05, |
|
"loss": 0.5011, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.067891891891892e-05, |
|
"loss": 0.511, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 2.992216216216216e-05, |
|
"loss": 0.4935, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"learning_rate": 2.9165405405405402e-05, |
|
"loss": 0.4951, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 2.8408648648648645e-05, |
|
"loss": 0.4655, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 2.765189189189189e-05, |
|
"loss": 0.4926, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 2.6895135135135133e-05, |
|
"loss": 0.5083, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 2.6138378378378376e-05, |
|
"loss": 0.4849, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 2.538162162162162e-05, |
|
"loss": 0.4673, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 2.462486486486486e-05, |
|
"loss": 0.4871, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"eval_loss": 0.12074683606624603, |
|
"eval_runtime": 154.3437, |
|
"eval_samples_per_second": 20.552, |
|
"eval_steps_per_second": 20.552, |
|
"eval_wer": 0.1523439206605793, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.3875675675675676e-05, |
|
"loss": 0.4911, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 36.44, |
|
"learning_rate": 2.3118918918918918e-05, |
|
"loss": 0.4724, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"learning_rate": 2.236216216216216e-05, |
|
"loss": 0.4784, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 2.1605405405405403e-05, |
|
"loss": 0.466, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 2.0848648648648646e-05, |
|
"loss": 0.4761, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 38.22, |
|
"learning_rate": 2.0091891891891892e-05, |
|
"loss": 0.4772, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 38.67, |
|
"learning_rate": 1.9335135135135135e-05, |
|
"loss": 0.4524, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 39.11, |
|
"learning_rate": 1.8578378378378377e-05, |
|
"loss": 0.4436, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"learning_rate": 1.782162162162162e-05, |
|
"loss": 0.4673, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.7064864864864862e-05, |
|
"loss": 0.4848, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 40.44, |
|
"learning_rate": 1.630810810810811e-05, |
|
"loss": 0.461, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 1.555135135135135e-05, |
|
"loss": 0.465, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 41.33, |
|
"learning_rate": 1.4794594594594594e-05, |
|
"loss": 0.4398, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 41.78, |
|
"learning_rate": 1.4045405405405405e-05, |
|
"loss": 0.4552, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 42.22, |
|
"learning_rate": 1.3288648648648647e-05, |
|
"loss": 0.47, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 1.253189189189189e-05, |
|
"loss": 0.4599, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 1.1775135135135134e-05, |
|
"loss": 0.4273, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 43.56, |
|
"learning_rate": 1.1018378378378377e-05, |
|
"loss": 0.4533, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.0261621621621621e-05, |
|
"loss": 0.4573, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 9.504864864864864e-06, |
|
"loss": 0.4428, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"eval_loss": 0.11431078612804413, |
|
"eval_runtime": 152.8495, |
|
"eval_samples_per_second": 20.752, |
|
"eval_steps_per_second": 20.752, |
|
"eval_wer": 0.14247018218358162, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 44.89, |
|
"learning_rate": 8.748108108108106e-06, |
|
"loss": 0.4431, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 7.99135135135135e-06, |
|
"loss": 0.4124, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 45.78, |
|
"learning_rate": 7.234594594594593e-06, |
|
"loss": 0.4437, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"learning_rate": 6.4778378378378375e-06, |
|
"loss": 0.4694, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 5.721081081081081e-06, |
|
"loss": 0.4408, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 47.11, |
|
"learning_rate": 4.9643243243243245e-06, |
|
"loss": 0.428, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 47.56, |
|
"learning_rate": 4.207567567567567e-06, |
|
"loss": 0.4418, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3.4508108108108105e-06, |
|
"loss": 0.4527, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 48.44, |
|
"learning_rate": 2.6940540540540536e-06, |
|
"loss": 0.448, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"learning_rate": 1.937297297297297e-06, |
|
"loss": 0.4399, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 49.33, |
|
"learning_rate": 1.1805405405405403e-06, |
|
"loss": 0.4111, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 49.78, |
|
"learning_rate": 4.237837837837838e-07, |
|
"loss": 0.4214, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 11250, |
|
"total_flos": 4.148416605366081e+19, |
|
"train_loss": 1.0303706246270075, |
|
"train_runtime": 15356.7914, |
|
"train_samples_per_second": 23.358, |
|
"train_steps_per_second": 0.733 |
|
} |
|
], |
|
"max_steps": 11250, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.148416605366081e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|