{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99602649006623, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 3.7125e-06, "loss": 15.287, "step": 100 }, { "epoch": 1.06, "learning_rate": 7.4625e-06, "loss": 7.6558, "step": 200 }, { "epoch": 1.59, "learning_rate": 1.1212499999999998e-05, "loss": 4.9409, "step": 300 }, { "epoch": 2.13, "learning_rate": 1.49625e-05, "loss": 4.2283, "step": 400 }, { "epoch": 2.66, "learning_rate": 1.8712499999999997e-05, "loss": 3.6871, "step": 500 }, { "epoch": 2.66, "eval_loss": 3.5374293327331543, "eval_runtime": 140.96, "eval_samples_per_second": 19.467, "eval_steps_per_second": 2.433, "eval_wer": 1.0, "step": 500 }, { "epoch": 3.19, "learning_rate": 2.2462499999999997e-05, "loss": 3.4073, "step": 600 }, { "epoch": 3.72, "learning_rate": 2.6212499999999997e-05, "loss": 3.2613, "step": 700 }, { "epoch": 4.25, "learning_rate": 2.99625e-05, "loss": 3.2504, "step": 800 }, { "epoch": 4.78, "learning_rate": 3.37125e-05, "loss": 3.1732, "step": 900 }, { "epoch": 5.32, "learning_rate": 3.7462499999999996e-05, "loss": 3.1501, "step": 1000 }, { "epoch": 5.32, "eval_loss": 3.127795934677124, "eval_runtime": 140.7753, "eval_samples_per_second": 19.492, "eval_steps_per_second": 2.437, "eval_wer": 1.0, "step": 1000 }, { "epoch": 5.85, "learning_rate": 4.12125e-05, "loss": 3.0666, "step": 1100 }, { "epoch": 6.38, "learning_rate": 4.4962499999999995e-05, "loss": 2.7047, "step": 1200 }, { "epoch": 6.91, "learning_rate": 4.871249999999999e-05, "loss": 2.0895, "step": 1300 }, { "epoch": 7.45, "learning_rate": 5.2462499999999994e-05, "loss": 1.7459, "step": 1400 }, { "epoch": 7.97, "learning_rate": 5.62125e-05, "loss": 1.5843, "step": 1500 }, { "epoch": 7.97, "eval_loss": 0.6358404755592346, "eval_runtime": 138.8212, "eval_samples_per_second": 19.766, "eval_steps_per_second": 2.471, "eval_wer": 0.6914246512691516, "step": 1500 }, { "epoch": 8.51, "learning_rate": 5.9962499999999994e-05, "loss": 1.5166, "step": 1600 }, { "epoch": 9.04, "learning_rate": 6.37125e-05, "loss": 1.4555, "step": 1700 }, { "epoch": 9.57, "learning_rate": 6.746249999999999e-05, "loss": 1.4013, "step": 1800 }, { "epoch": 10.11, "learning_rate": 7.121249999999999e-05, "loss": 1.3841, "step": 1900 }, { "epoch": 10.64, "learning_rate": 7.49625e-05, "loss": 1.3378, "step": 2000 }, { "epoch": 10.64, "eval_loss": 0.442169189453125, "eval_runtime": 140.0078, "eval_samples_per_second": 19.599, "eval_steps_per_second": 2.45, "eval_wer": 0.5924536931168534, "step": 2000 }, { "epoch": 11.17, "learning_rate": 7.400675675675676e-05, "loss": 1.3249, "step": 2100 }, { "epoch": 11.7, "learning_rate": 7.299324324324324e-05, "loss": 1.2996, "step": 2200 }, { "epoch": 12.23, "learning_rate": 7.197972972972972e-05, "loss": 1.2952, "step": 2300 }, { "epoch": 12.76, "learning_rate": 7.096621621621621e-05, "loss": 1.2626, "step": 2400 }, { "epoch": 13.3, "learning_rate": 6.99527027027027e-05, "loss": 1.2595, "step": 2500 }, { "epoch": 13.3, "eval_loss": 0.3921487033367157, "eval_runtime": 140.1536, "eval_samples_per_second": 19.579, "eval_steps_per_second": 2.447, "eval_wer": 0.5511548136290876, "step": 2500 }, { "epoch": 13.83, "learning_rate": 6.893918918918919e-05, "loss": 1.2282, "step": 2600 }, { "epoch": 14.36, "learning_rate": 6.792567567567567e-05, "loss": 1.2352, "step": 2700 }, { "epoch": 14.89, "learning_rate": 6.691216216216216e-05, "loss": 1.2129, "step": 2800 }, { "epoch": 15.42, "learning_rate": 6.589864864864864e-05, "loss": 1.2184, "step": 2900 }, { "epoch": 15.95, "learning_rate": 6.488513513513514e-05, "loss": 1.1643, "step": 3000 }, { "epoch": 15.95, "eval_loss": 0.35074228048324585, "eval_runtime": 142.5452, "eval_samples_per_second": 19.25, "eval_steps_per_second": 2.406, "eval_wer": 0.514932540589984, "step": 3000 }, { "epoch": 16.49, "learning_rate": 6.388175675675675e-05, "loss": 1.1704, "step": 3100 }, { "epoch": 17.02, "learning_rate": 6.287837837837837e-05, "loss": 1.1774, "step": 3200 }, { "epoch": 17.55, "learning_rate": 6.186486486486485e-05, "loss": 1.157, "step": 3300 }, { "epoch": 18.08, "learning_rate": 6.085135135135135e-05, "loss": 1.1605, "step": 3400 }, { "epoch": 18.61, "learning_rate": 5.983783783783783e-05, "loss": 1.1352, "step": 3500 }, { "epoch": 18.61, "eval_loss": 0.3350585401058197, "eval_runtime": 140.4732, "eval_samples_per_second": 19.534, "eval_steps_per_second": 2.442, "eval_wer": 0.5018980105190944, "step": 3500 }, { "epoch": 19.15, "learning_rate": 5.882432432432432e-05, "loss": 1.1474, "step": 3600 }, { "epoch": 19.68, "learning_rate": 5.781081081081081e-05, "loss": 1.1273, "step": 3700 }, { "epoch": 20.21, "learning_rate": 5.679729729729729e-05, "loss": 1.133, "step": 3800 }, { "epoch": 20.74, "learning_rate": 5.578378378378378e-05, "loss": 1.1112, "step": 3900 }, { "epoch": 21.28, "learning_rate": 5.477027027027026e-05, "loss": 1.1113, "step": 4000 }, { "epoch": 21.28, "eval_loss": 0.3152759373188019, "eval_runtime": 139.3947, "eval_samples_per_second": 19.685, "eval_steps_per_second": 2.461, "eval_wer": 0.4845186370912417, "step": 4000 }, { "epoch": 21.81, "learning_rate": 5.375675675675675e-05, "loss": 1.1029, "step": 4100 }, { "epoch": 22.34, "learning_rate": 5.274324324324324e-05, "loss": 1.1124, "step": 4200 }, { "epoch": 22.87, "learning_rate": 5.172972972972972e-05, "loss": 1.091, "step": 4300 }, { "epoch": 23.4, "learning_rate": 5.071621621621621e-05, "loss": 1.097, "step": 4400 }, { "epoch": 23.93, "learning_rate": 4.97027027027027e-05, "loss": 1.0914, "step": 4500 }, { "epoch": 23.93, "eval_loss": 0.3050296902656555, "eval_runtime": 137.2603, "eval_samples_per_second": 19.991, "eval_steps_per_second": 2.499, "eval_wer": 0.4594100160073176, "step": 4500 }, { "epoch": 24.47, "learning_rate": 4.8689189189189184e-05, "loss": 1.0749, "step": 4600 }, { "epoch": 25.0, "learning_rate": 4.767567567567567e-05, "loss": 1.0807, "step": 4700 }, { "epoch": 25.53, "learning_rate": 4.666216216216216e-05, "loss": 1.0647, "step": 4800 }, { "epoch": 26.06, "learning_rate": 4.564864864864864e-05, "loss": 1.068, "step": 4900 }, { "epoch": 26.59, "learning_rate": 4.463513513513513e-05, "loss": 1.0468, "step": 5000 }, { "epoch": 26.59, "eval_loss": 0.2889558672904968, "eval_runtime": 137.6066, "eval_samples_per_second": 19.941, "eval_steps_per_second": 2.493, "eval_wer": 0.44701577864166475, "step": 5000 }, { "epoch": 27.13, "learning_rate": 4.3621621621621624e-05, "loss": 1.0548, "step": 5100 }, { "epoch": 27.66, "learning_rate": 4.26081081081081e-05, "loss": 1.0423, "step": 5200 }, { "epoch": 28.19, "learning_rate": 4.160472972972973e-05, "loss": 1.0436, "step": 5300 }, { "epoch": 28.72, "learning_rate": 4.059121621621621e-05, "loss": 1.0338, "step": 5400 }, { "epoch": 29.25, "learning_rate": 3.95777027027027e-05, "loss": 1.0473, "step": 5500 }, { "epoch": 29.25, "eval_loss": 0.27552109956741333, "eval_runtime": 139.7371, "eval_samples_per_second": 19.637, "eval_steps_per_second": 2.455, "eval_wer": 0.43306654470615136, "step": 5500 }, { "epoch": 29.78, "learning_rate": 3.856418918918919e-05, "loss": 1.027, "step": 5600 }, { "epoch": 30.32, "learning_rate": 3.755067567567568e-05, "loss": 1.0418, "step": 5700 }, { "epoch": 30.85, "learning_rate": 3.653716216216216e-05, "loss": 1.0147, "step": 5800 }, { "epoch": 31.38, "learning_rate": 3.552364864864865e-05, "loss": 1.0197, "step": 5900 }, { "epoch": 31.91, "learning_rate": 3.451013513513513e-05, "loss": 1.0065, "step": 6000 }, { "epoch": 31.91, "eval_loss": 0.27181389927864075, "eval_runtime": 138.4335, "eval_samples_per_second": 19.822, "eval_steps_per_second": 2.478, "eval_wer": 0.42638920649439743, "step": 6000 }, { "epoch": 32.45, "learning_rate": 3.3496621621621615e-05, "loss": 1.0138, "step": 6100 }, { "epoch": 32.97, "learning_rate": 3.2483108108108105e-05, "loss": 1.0027, "step": 6200 }, { "epoch": 33.51, "learning_rate": 3.146959459459459e-05, "loss": 1.0157, "step": 6300 }, { "epoch": 34.04, "learning_rate": 3.045608108108108e-05, "loss": 1.0034, "step": 6400 }, { "epoch": 34.57, "learning_rate": 2.9442567567567563e-05, "loss": 0.9794, "step": 6500 }, { "epoch": 34.57, "eval_loss": 0.2646064758300781, "eval_runtime": 138.3623, "eval_samples_per_second": 19.832, "eval_steps_per_second": 2.479, "eval_wer": 0.41934598673679396, "step": 6500 }, { "epoch": 35.11, "learning_rate": 2.8429054054054054e-05, "loss": 0.9911, "step": 6600 }, { "epoch": 35.64, "learning_rate": 2.7415540540540538e-05, "loss": 0.9807, "step": 6700 }, { "epoch": 36.17, "learning_rate": 2.6402027027027025e-05, "loss": 0.9843, "step": 6800 }, { "epoch": 36.7, "learning_rate": 2.538851351351351e-05, "loss": 0.9735, "step": 6900 }, { "epoch": 37.23, "learning_rate": 2.4375e-05, "loss": 0.9849, "step": 7000 }, { "epoch": 37.23, "eval_loss": 0.2609545886516571, "eval_runtime": 144.7382, "eval_samples_per_second": 18.958, "eval_steps_per_second": 2.37, "eval_wer": 0.4058083695403613, "step": 7000 }, { "epoch": 37.76, "learning_rate": 2.3361486486486483e-05, "loss": 0.983, "step": 7100 }, { "epoch": 38.3, "learning_rate": 2.234797297297297e-05, "loss": 0.9775, "step": 7200 }, { "epoch": 38.83, "learning_rate": 2.1334459459459458e-05, "loss": 0.9597, "step": 7300 }, { "epoch": 39.36, "learning_rate": 2.0320945945945945e-05, "loss": 0.9606, "step": 7400 }, { "epoch": 39.89, "learning_rate": 1.930743243243243e-05, "loss": 0.9496, "step": 7500 }, { "epoch": 39.89, "eval_loss": 0.2522386610507965, "eval_runtime": 139.7036, "eval_samples_per_second": 19.642, "eval_steps_per_second": 2.455, "eval_wer": 0.3984907386233707, "step": 7500 }, { "epoch": 40.42, "learning_rate": 1.829391891891892e-05, "loss": 0.9581, "step": 7600 }, { "epoch": 40.95, "learning_rate": 1.7280405405405403e-05, "loss": 0.9477, "step": 7700 }, { "epoch": 41.49, "learning_rate": 1.626689189189189e-05, "loss": 0.9538, "step": 7800 }, { "epoch": 42.02, "learning_rate": 1.5253378378378378e-05, "loss": 0.9631, "step": 7900 }, { "epoch": 42.55, "learning_rate": 1.4239864864864863e-05, "loss": 0.9367, "step": 8000 }, { "epoch": 42.55, "eval_loss": 0.25142449140548706, "eval_runtime": 138.6762, "eval_samples_per_second": 19.787, "eval_steps_per_second": 2.473, "eval_wer": 0.3946947175851818, "step": 8000 }, { "epoch": 43.08, "learning_rate": 1.322635135135135e-05, "loss": 0.9498, "step": 8100 }, { "epoch": 43.61, "learning_rate": 1.2212837837837838e-05, "loss": 0.9389, "step": 8200 }, { "epoch": 44.15, "learning_rate": 1.1199324324324323e-05, "loss": 0.937, "step": 8300 }, { "epoch": 44.68, "learning_rate": 1.0195945945945945e-05, "loss": 0.9394, "step": 8400 }, { "epoch": 45.21, "learning_rate": 9.182432432432432e-06, "loss": 0.9295, "step": 8500 }, { "epoch": 45.21, "eval_loss": 0.24582630395889282, "eval_runtime": 140.7119, "eval_samples_per_second": 19.501, "eval_steps_per_second": 2.438, "eval_wer": 0.3883375257260462, "step": 8500 }, { "epoch": 45.74, "learning_rate": 8.168918918918917e-06, "loss": 0.9244, "step": 8600 }, { "epoch": 46.28, "learning_rate": 7.1554054054054045e-06, "loss": 0.9273, "step": 8700 }, { "epoch": 46.81, "learning_rate": 6.141891891891891e-06, "loss": 0.9229, "step": 8800 }, { "epoch": 47.34, "learning_rate": 5.128378378378377e-06, "loss": 0.9392, "step": 8900 }, { "epoch": 47.87, "learning_rate": 4.1148648648648645e-06, "loss": 0.9187, "step": 9000 }, { "epoch": 47.87, "eval_loss": 0.24390903115272522, "eval_runtime": 139.6323, "eval_samples_per_second": 19.652, "eval_steps_per_second": 2.456, "eval_wer": 0.3832609192773839, "step": 9000 }, { "epoch": 48.4, "learning_rate": 3.1013513513513513e-06, "loss": 0.9194, "step": 9100 }, { "epoch": 48.93, "learning_rate": 2.0878378378378376e-06, "loss": 0.9234, "step": 9200 }, { "epoch": 49.47, "learning_rate": 1.0743243243243242e-06, "loss": 0.9257, "step": 9300 }, { "epoch": 50.0, "learning_rate": 6.08108108108108e-08, "loss": 0.9097, "step": 9400 }, { "epoch": 50.0, "step": 9400, "total_flos": 5.3948756860365595e+19, "train_loss": 1.5738848333155855, "train_runtime": 24806.553, "train_samples_per_second": 12.162, "train_steps_per_second": 0.379 } ], "max_steps": 9400, "num_train_epochs": 50, "total_flos": 5.3948756860365595e+19, "trial_name": null, "trial_params": null }