{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 11250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 3.4299999999999998e-06, "loss": 12.3928, "step": 100 }, { "epoch": 0.89, "learning_rate": 6.93e-06, "loss": 7.13, "step": 200 }, { "epoch": 1.33, "learning_rate": 1.0429999999999998e-05, "loss": 4.5055, "step": 300 }, { "epoch": 1.78, "learning_rate": 1.3929999999999999e-05, "loss": 3.8494, "step": 400 }, { "epoch": 2.22, "learning_rate": 1.7429999999999997e-05, "loss": 3.4447, "step": 500 }, { "epoch": 2.67, "learning_rate": 2.0929999999999998e-05, "loss": 3.2021, "step": 600 }, { "epoch": 3.11, "learning_rate": 2.4429999999999995e-05, "loss": 3.0761, "step": 700 }, { "epoch": 3.56, "learning_rate": 2.793e-05, "loss": 3.0165, "step": 800 }, { "epoch": 4.0, "learning_rate": 3.1429999999999996e-05, "loss": 2.9623, "step": 900 }, { "epoch": 4.44, "learning_rate": 3.493e-05, "loss": 2.9094, "step": 1000 }, { "epoch": 4.89, "learning_rate": 3.843e-05, "loss": 2.8831, "step": 1100 }, { "epoch": 5.33, "learning_rate": 4.192999999999999e-05, "loss": 2.8667, "step": 1200 }, { "epoch": 5.78, "learning_rate": 4.543e-05, "loss": 2.7722, "step": 1300 }, { "epoch": 6.22, "learning_rate": 4.8929999999999994e-05, "loss": 2.2496, "step": 1400 }, { "epoch": 6.67, "learning_rate": 5.243e-05, "loss": 1.7213, "step": 1500 }, { "epoch": 7.11, "learning_rate": 5.593e-05, "loss": 1.5195, "step": 1600 }, { "epoch": 7.56, "learning_rate": 5.942999999999999e-05, "loss": 1.3847, "step": 1700 }, { "epoch": 8.0, "learning_rate": 6.293e-05, "loss": 1.3296, "step": 1800 }, { "epoch": 8.44, "learning_rate": 6.642999999999999e-05, "loss": 1.2533, "step": 1900 }, { "epoch": 8.89, "learning_rate": 6.992999999999999e-05, "loss": 1.1973, "step": 2000 }, { "epoch": 8.89, "eval_loss": 0.44813069701194763, "eval_runtime": 151.5294, "eval_samples_per_second": 20.933, "eval_steps_per_second": 20.933, "eval_wer": 0.4849491021888243, "step": 2000 }, { "epoch": 9.33, "learning_rate": 6.925837837837837e-05, "loss": 1.08, "step": 2100 }, { "epoch": 9.78, "learning_rate": 6.850162162162162e-05, "loss": 0.9484, "step": 2200 }, { "epoch": 10.22, "learning_rate": 6.774486486486486e-05, "loss": 0.8818, "step": 2300 }, { "epoch": 10.67, "learning_rate": 6.698810810810811e-05, "loss": 0.8043, "step": 2400 }, { "epoch": 11.11, "learning_rate": 6.623135135135134e-05, "loss": 0.7752, "step": 2500 }, { "epoch": 11.56, "learning_rate": 6.547459459459459e-05, "loss": 0.7497, "step": 2600 }, { "epoch": 12.0, "learning_rate": 6.471783783783783e-05, "loss": 0.7486, "step": 2700 }, { "epoch": 12.44, "learning_rate": 6.396108108108108e-05, "loss": 0.7016, "step": 2800 }, { "epoch": 12.89, "learning_rate": 6.320432432432433e-05, "loss": 0.6927, "step": 2900 }, { "epoch": 13.33, "learning_rate": 6.244756756756756e-05, "loss": 0.6609, "step": 3000 }, { "epoch": 13.78, "learning_rate": 6.169081081081081e-05, "loss": 0.6633, "step": 3100 }, { "epoch": 14.22, "learning_rate": 6.093405405405405e-05, "loss": 0.6781, "step": 3200 }, { "epoch": 14.67, "learning_rate": 6.017729729729729e-05, "loss": 0.6486, "step": 3300 }, { "epoch": 15.11, "learning_rate": 5.942054054054054e-05, "loss": 0.6217, "step": 3400 }, { "epoch": 15.56, "learning_rate": 5.866378378378378e-05, "loss": 0.6348, "step": 3500 }, { "epoch": 16.0, "learning_rate": 5.7907027027027026e-05, "loss": 0.6555, "step": 3600 }, { "epoch": 16.44, "learning_rate": 5.715027027027027e-05, "loss": 0.6179, "step": 3700 }, { "epoch": 16.89, "learning_rate": 5.639351351351351e-05, "loss": 0.6116, "step": 3800 }, { "epoch": 17.33, "learning_rate": 5.5636756756756754e-05, "loss": 0.586, "step": 3900 }, { "epoch": 17.78, "learning_rate": 5.4879999999999996e-05, "loss": 0.6005, "step": 4000 }, { "epoch": 17.78, "eval_loss": 0.1420038342475891, "eval_runtime": 156.5701, "eval_samples_per_second": 20.259, "eval_steps_per_second": 20.259, "eval_wer": 0.17772729258595832, "step": 4000 }, { "epoch": 18.22, "learning_rate": 5.412324324324324e-05, "loss": 0.6199, "step": 4100 }, { "epoch": 18.67, "learning_rate": 5.336648648648648e-05, "loss": 0.6017, "step": 4200 }, { "epoch": 19.11, "learning_rate": 5.2609729729729724e-05, "loss": 0.5722, "step": 4300 }, { "epoch": 19.56, "learning_rate": 5.1852972972972974e-05, "loss": 0.5755, "step": 4400 }, { "epoch": 20.0, "learning_rate": 5.1096216216216216e-05, "loss": 0.6083, "step": 4500 }, { "epoch": 20.44, "learning_rate": 5.033945945945946e-05, "loss": 0.5677, "step": 4600 }, { "epoch": 20.89, "learning_rate": 4.9590270270270266e-05, "loss": 0.5652, "step": 4700 }, { "epoch": 21.33, "learning_rate": 4.8833513513513516e-05, "loss": 0.5509, "step": 4800 }, { "epoch": 21.78, "learning_rate": 4.807675675675676e-05, "loss": 0.5526, "step": 4900 }, { "epoch": 22.22, "learning_rate": 4.732e-05, "loss": 0.5589, "step": 5000 }, { "epoch": 22.67, "learning_rate": 4.6563243243243244e-05, "loss": 0.5394, "step": 5100 }, { "epoch": 23.11, "learning_rate": 4.5806486486486486e-05, "loss": 0.5329, "step": 5200 }, { "epoch": 23.56, "learning_rate": 4.504972972972973e-05, "loss": 0.5353, "step": 5300 }, { "epoch": 24.0, "learning_rate": 4.429297297297297e-05, "loss": 0.5643, "step": 5400 }, { "epoch": 24.44, "learning_rate": 4.3536216216216214e-05, "loss": 0.537, "step": 5500 }, { "epoch": 24.89, "learning_rate": 4.277945945945946e-05, "loss": 0.5502, "step": 5600 }, { "epoch": 25.33, "learning_rate": 4.20227027027027e-05, "loss": 0.5126, "step": 5700 }, { "epoch": 25.78, "learning_rate": 4.126594594594594e-05, "loss": 0.5315, "step": 5800 }, { "epoch": 26.22, "learning_rate": 4.050918918918919e-05, "loss": 0.5424, "step": 5900 }, { "epoch": 26.67, "learning_rate": 3.9752432432432434e-05, "loss": 0.5248, "step": 6000 }, { "epoch": 26.67, "eval_loss": 0.13026614487171173, "eval_runtime": 153.4664, "eval_samples_per_second": 20.669, "eval_steps_per_second": 20.669, "eval_wer": 0.16505745117742146, "step": 6000 }, { "epoch": 27.11, "learning_rate": 3.8995675675675676e-05, "loss": 0.5111, "step": 6100 }, { "epoch": 27.56, "learning_rate": 3.823891891891892e-05, "loss": 0.5226, "step": 6200 }, { "epoch": 28.0, "learning_rate": 3.748216216216216e-05, "loss": 0.5335, "step": 6300 }, { "epoch": 28.44, "learning_rate": 3.6725405405405404e-05, "loss": 0.5031, "step": 6400 }, { "epoch": 28.89, "learning_rate": 3.596864864864865e-05, "loss": 0.5219, "step": 6500 }, { "epoch": 29.33, "learning_rate": 3.521189189189189e-05, "loss": 0.4853, "step": 6600 }, { "epoch": 29.78, "learning_rate": 3.445513513513513e-05, "loss": 0.5062, "step": 6700 }, { "epoch": 30.22, "learning_rate": 3.370594594594594e-05, "loss": 0.5395, "step": 6800 }, { "epoch": 30.67, "learning_rate": 3.294918918918919e-05, "loss": 0.4876, "step": 6900 }, { "epoch": 31.11, "learning_rate": 3.219243243243243e-05, "loss": 0.4981, "step": 7000 }, { "epoch": 31.56, "learning_rate": 3.1435675675675674e-05, "loss": 0.5011, "step": 7100 }, { "epoch": 32.0, "learning_rate": 3.067891891891892e-05, "loss": 0.511, "step": 7200 }, { "epoch": 32.44, "learning_rate": 2.992216216216216e-05, "loss": 0.4935, "step": 7300 }, { "epoch": 32.89, "learning_rate": 2.9165405405405402e-05, "loss": 0.4951, "step": 7400 }, { "epoch": 33.33, "learning_rate": 2.8408648648648645e-05, "loss": 0.4655, "step": 7500 }, { "epoch": 33.78, "learning_rate": 2.765189189189189e-05, "loss": 0.4926, "step": 7600 }, { "epoch": 34.22, "learning_rate": 2.6895135135135133e-05, "loss": 0.5083, "step": 7700 }, { "epoch": 34.67, "learning_rate": 2.6138378378378376e-05, "loss": 0.4849, "step": 7800 }, { "epoch": 35.11, "learning_rate": 2.538162162162162e-05, "loss": 0.4673, "step": 7900 }, { "epoch": 35.56, "learning_rate": 2.462486486486486e-05, "loss": 0.4871, "step": 8000 }, { "epoch": 35.56, "eval_loss": 0.12074683606624603, "eval_runtime": 154.3437, "eval_samples_per_second": 20.552, "eval_steps_per_second": 20.552, "eval_wer": 0.1523439206605793, "step": 8000 }, { "epoch": 36.0, "learning_rate": 2.3875675675675676e-05, "loss": 0.4911, "step": 8100 }, { "epoch": 36.44, "learning_rate": 2.3118918918918918e-05, "loss": 0.4724, "step": 8200 }, { "epoch": 36.89, "learning_rate": 2.236216216216216e-05, "loss": 0.4784, "step": 8300 }, { "epoch": 37.33, "learning_rate": 2.1605405405405403e-05, "loss": 0.466, "step": 8400 }, { "epoch": 37.78, "learning_rate": 2.0848648648648646e-05, "loss": 0.4761, "step": 8500 }, { "epoch": 38.22, "learning_rate": 2.0091891891891892e-05, "loss": 0.4772, "step": 8600 }, { "epoch": 38.67, "learning_rate": 1.9335135135135135e-05, "loss": 0.4524, "step": 8700 }, { "epoch": 39.11, "learning_rate": 1.8578378378378377e-05, "loss": 0.4436, "step": 8800 }, { "epoch": 39.56, "learning_rate": 1.782162162162162e-05, "loss": 0.4673, "step": 8900 }, { "epoch": 40.0, "learning_rate": 1.7064864864864862e-05, "loss": 0.4848, "step": 9000 }, { "epoch": 40.44, "learning_rate": 1.630810810810811e-05, "loss": 0.461, "step": 9100 }, { "epoch": 40.89, "learning_rate": 1.555135135135135e-05, "loss": 0.465, "step": 9200 }, { "epoch": 41.33, "learning_rate": 1.4794594594594594e-05, "loss": 0.4398, "step": 9300 }, { "epoch": 41.78, "learning_rate": 1.4045405405405405e-05, "loss": 0.4552, "step": 9400 }, { "epoch": 42.22, "learning_rate": 1.3288648648648647e-05, "loss": 0.47, "step": 9500 }, { "epoch": 42.67, "learning_rate": 1.253189189189189e-05, "loss": 0.4599, "step": 9600 }, { "epoch": 43.11, "learning_rate": 1.1775135135135134e-05, "loss": 0.4273, "step": 9700 }, { "epoch": 43.56, "learning_rate": 1.1018378378378377e-05, "loss": 0.4533, "step": 9800 }, { "epoch": 44.0, "learning_rate": 1.0261621621621621e-05, "loss": 0.4573, "step": 9900 }, { "epoch": 44.44, "learning_rate": 9.504864864864864e-06, "loss": 0.4428, "step": 10000 }, { "epoch": 44.44, "eval_loss": 0.11431078612804413, "eval_runtime": 152.8495, "eval_samples_per_second": 20.752, "eval_steps_per_second": 20.752, "eval_wer": 0.14247018218358162, "step": 10000 }, { "epoch": 44.89, "learning_rate": 8.748108108108106e-06, "loss": 0.4431, "step": 10100 }, { "epoch": 45.33, "learning_rate": 7.99135135135135e-06, "loss": 0.4124, "step": 10200 }, { "epoch": 45.78, "learning_rate": 7.234594594594593e-06, "loss": 0.4437, "step": 10300 }, { "epoch": 46.22, "learning_rate": 6.4778378378378375e-06, "loss": 0.4694, "step": 10400 }, { "epoch": 46.67, "learning_rate": 5.721081081081081e-06, "loss": 0.4408, "step": 10500 }, { "epoch": 47.11, "learning_rate": 4.9643243243243245e-06, "loss": 0.428, "step": 10600 }, { "epoch": 47.56, "learning_rate": 4.207567567567567e-06, "loss": 0.4418, "step": 10700 }, { "epoch": 48.0, "learning_rate": 3.4508108108108105e-06, "loss": 0.4527, "step": 10800 }, { "epoch": 48.44, "learning_rate": 2.6940540540540536e-06, "loss": 0.448, "step": 10900 }, { "epoch": 48.89, "learning_rate": 1.937297297297297e-06, "loss": 0.4399, "step": 11000 }, { "epoch": 49.33, "learning_rate": 1.1805405405405403e-06, "loss": 0.4111, "step": 11100 }, { "epoch": 49.78, "learning_rate": 4.237837837837838e-07, "loss": 0.4214, "step": 11200 }, { "epoch": 50.0, "step": 11250, "total_flos": 4.148416605366081e+19, "train_loss": 1.0303706246270075, "train_runtime": 15356.7914, "train_samples_per_second": 23.358, "train_steps_per_second": 0.733 } ], "max_steps": 11250, "num_train_epochs": 50, "total_flos": 4.148416605366081e+19, "trial_name": null, "trial_params": null }