{ "best_metric": null, "best_model_checkpoint": null, "epoch": 500.0, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.55, "learning_rate": 3.675e-06, "loss": 15.8714, "step": 100 }, { "epoch": 9.09, "learning_rate": 7.425e-06, "loss": 8.905, "step": 200 }, { "epoch": 13.64, "learning_rate": 1.1174999999999999e-05, "loss": 5.8648, "step": 300 }, { "epoch": 18.18, "learning_rate": 1.4925e-05, "loss": 4.9504, "step": 400 }, { "epoch": 18.18, "eval_loss": 4.673031330108643, "eval_runtime": 13.8453, "eval_samples_per_second": 22.101, "eval_steps_per_second": 1.445, "eval_wer": 1.0, "step": 400 }, { "epoch": 22.73, "learning_rate": 1.8675e-05, "loss": 4.2429, "step": 500 }, { "epoch": 27.27, "learning_rate": 2.2424999999999996e-05, "loss": 3.72, "step": 600 }, { "epoch": 31.82, "learning_rate": 2.6174999999999996e-05, "loss": 3.4596, "step": 700 }, { "epoch": 36.36, "learning_rate": 2.9925e-05, "loss": 3.3766, "step": 800 }, { "epoch": 36.36, "eval_loss": 3.3464324474334717, "eval_runtime": 13.5196, "eval_samples_per_second": 22.634, "eval_steps_per_second": 1.479, "eval_wer": 1.0, "step": 800 }, { "epoch": 40.91, "learning_rate": 3.3675e-05, "loss": 3.3035, "step": 900 }, { "epoch": 45.45, "learning_rate": 3.7424999999999995e-05, "loss": 3.2559, "step": 1000 }, { "epoch": 50.0, "learning_rate": 4.1175e-05, "loss": 3.2202, "step": 1100 }, { "epoch": 54.55, "learning_rate": 4.4924999999999994e-05, "loss": 3.1128, "step": 1200 }, { "epoch": 54.55, "eval_loss": 3.0176751613616943, "eval_runtime": 13.6849, "eval_samples_per_second": 22.36, "eval_steps_per_second": 1.461, "eval_wer": 0.9979943842759728, "step": 1200 }, { "epoch": 59.09, "learning_rate": 4.8675e-05, "loss": 2.9121, "step": 1300 }, { "epoch": 63.64, "learning_rate": 5.2424999999999994e-05, "loss": 2.4741, "step": 1400 }, { "epoch": 68.18, "learning_rate": 5.6175e-05, "loss": 2.0461, "step": 1500 }, { "epoch": 72.73, "learning_rate": 5.9925e-05, "loss": 1.7966, "step": 1600 }, { "epoch": 72.73, "eval_loss": 0.8733049631118774, "eval_runtime": 13.6227, "eval_samples_per_second": 22.463, "eval_steps_per_second": 1.468, "eval_wer": 0.8038507821901324, "step": 1600 }, { "epoch": 77.27, "learning_rate": 6.367499999999999e-05, "loss": 1.6388, "step": 1700 }, { "epoch": 81.82, "learning_rate": 6.7425e-05, "loss": 1.5466, "step": 1800 }, { "epoch": 86.36, "learning_rate": 7.1175e-05, "loss": 1.461, "step": 1900 }, { "epoch": 90.91, "learning_rate": 7.48875e-05, "loss": 1.4085, "step": 2000 }, { "epoch": 90.91, "eval_loss": 0.5555232763290405, "eval_runtime": 13.4681, "eval_samples_per_second": 22.72, "eval_steps_per_second": 1.485, "eval_wer": 0.645808263136783, "step": 2000 }, { "epoch": 95.45, "learning_rate": 7.419166666666666e-05, "loss": 1.3233, "step": 2100 }, { "epoch": 100.0, "learning_rate": 7.335833333333333e-05, "loss": 1.2821, "step": 2200 }, { "epoch": 104.55, "learning_rate": 7.252499999999999e-05, "loss": 1.2194, "step": 2300 }, { "epoch": 109.09, "learning_rate": 7.169166666666666e-05, "loss": 1.1731, "step": 2400 }, { "epoch": 109.09, "eval_loss": 0.4929651618003845, "eval_runtime": 13.4716, "eval_samples_per_second": 22.715, "eval_steps_per_second": 1.485, "eval_wer": 0.6438026474127557, "step": 2400 }, { "epoch": 113.64, "learning_rate": 7.085833333333333e-05, "loss": 1.135, "step": 2500 }, { "epoch": 118.18, "learning_rate": 7.0025e-05, "loss": 1.1009, "step": 2600 }, { "epoch": 122.73, "learning_rate": 6.919166666666666e-05, "loss": 1.0712, "step": 2700 }, { "epoch": 127.27, "learning_rate": 6.835833333333332e-05, "loss": 1.0271, "step": 2800 }, { "epoch": 127.27, "eval_loss": 0.47801950573921204, "eval_runtime": 13.4955, "eval_samples_per_second": 22.674, "eval_steps_per_second": 1.482, "eval_wer": 0.6093060569594866, "step": 2800 }, { "epoch": 131.82, "learning_rate": 6.753333333333333e-05, "loss": 1.0016, "step": 2900 }, { "epoch": 136.36, "learning_rate": 6.67e-05, "loss": 0.9638, "step": 3000 }, { "epoch": 140.91, "learning_rate": 6.586666666666666e-05, "loss": 0.9327, "step": 3100 }, { "epoch": 145.45, "learning_rate": 6.503333333333333e-05, "loss": 0.9045, "step": 3200 }, { "epoch": 145.45, "eval_loss": 0.46472442150115967, "eval_runtime": 13.3444, "eval_samples_per_second": 22.931, "eval_steps_per_second": 1.499, "eval_wer": 0.6578419574809466, "step": 3200 }, { "epoch": 150.0, "learning_rate": 6.419999999999999e-05, "loss": 0.8865, "step": 3300 }, { "epoch": 154.55, "learning_rate": 6.336666666666666e-05, "loss": 0.8703, "step": 3400 }, { "epoch": 159.09, "learning_rate": 6.253333333333333e-05, "loss": 0.8379, "step": 3500 }, { "epoch": 163.64, "learning_rate": 6.17e-05, "loss": 0.807, "step": 3600 }, { "epoch": 163.64, "eval_loss": 0.4505322575569153, "eval_runtime": 13.3663, "eval_samples_per_second": 22.893, "eval_steps_per_second": 1.496, "eval_wer": 0.5924588848776574, "step": 3600 }, { "epoch": 168.18, "learning_rate": 6.0866666666666664e-05, "loss": 0.805, "step": 3700 }, { "epoch": 172.73, "learning_rate": 6.003333333333333e-05, "loss": 0.7889, "step": 3800 }, { "epoch": 177.27, "learning_rate": 5.9199999999999996e-05, "loss": 0.764, "step": 3900 }, { "epoch": 181.82, "learning_rate": 5.8366666666666665e-05, "loss": 0.741, "step": 4000 }, { "epoch": 181.82, "eval_loss": 0.4745561182498932, "eval_runtime": 13.2681, "eval_samples_per_second": 23.063, "eval_steps_per_second": 1.507, "eval_wer": 0.6024869634977938, "step": 4000 }, { "epoch": 186.36, "learning_rate": 5.753333333333333e-05, "loss": 0.7192, "step": 4100 }, { "epoch": 190.91, "learning_rate": 5.6699999999999996e-05, "loss": 0.6977, "step": 4200 }, { "epoch": 195.45, "learning_rate": 5.5866666666666665e-05, "loss": 0.6882, "step": 4300 }, { "epoch": 200.0, "learning_rate": 5.503333333333333e-05, "loss": 0.6706, "step": 4400 }, { "epoch": 200.0, "eval_loss": 0.5003653168678284, "eval_runtime": 13.3324, "eval_samples_per_second": 22.952, "eval_steps_per_second": 1.5, "eval_wer": 0.5844364219815483, "step": 4400 }, { "epoch": 204.55, "learning_rate": 5.4199999999999996e-05, "loss": 0.6627, "step": 4500 }, { "epoch": 209.09, "learning_rate": 5.3366666666666665e-05, "loss": 0.6453, "step": 4600 }, { "epoch": 213.64, "learning_rate": 5.253333333333333e-05, "loss": 0.6311, "step": 4700 }, { "epoch": 218.18, "learning_rate": 5.1699999999999996e-05, "loss": 0.6186, "step": 4800 }, { "epoch": 218.18, "eval_loss": 0.4983522891998291, "eval_runtime": 13.4916, "eval_samples_per_second": 22.681, "eval_steps_per_second": 1.482, "eval_wer": 0.5996791014841556, "step": 4800 }, { "epoch": 222.73, "learning_rate": 5.086666666666666e-05, "loss": 0.5976, "step": 4900 }, { "epoch": 227.27, "learning_rate": 5.003333333333333e-05, "loss": 0.5832, "step": 5000 }, { "epoch": 231.82, "learning_rate": 4.9199999999999997e-05, "loss": 0.5663, "step": 5100 }, { "epoch": 236.36, "learning_rate": 4.836666666666666e-05, "loss": 0.5508, "step": 5200 }, { "epoch": 236.36, "eval_loss": 0.5298363566398621, "eval_runtime": 13.4008, "eval_samples_per_second": 22.834, "eval_steps_per_second": 1.492, "eval_wer": 0.5635780184516647, "step": 5200 }, { "epoch": 240.91, "learning_rate": 4.753333333333333e-05, "loss": 0.554, "step": 5300 }, { "epoch": 245.45, "learning_rate": 4.67e-05, "loss": 0.543, "step": 5400 }, { "epoch": 250.0, "learning_rate": 4.586666666666666e-05, "loss": 0.5221, "step": 5500 }, { "epoch": 254.55, "learning_rate": 4.503333333333333e-05, "loss": 0.5123, "step": 5600 }, { "epoch": 254.55, "eval_loss": 0.5410219430923462, "eval_runtime": 13.3891, "eval_samples_per_second": 22.854, "eval_steps_per_second": 1.494, "eval_wer": 0.51103088648215, "step": 5600 }, { "epoch": 259.09, "learning_rate": 4.42e-05, "loss": 0.5023, "step": 5700 }, { "epoch": 263.64, "learning_rate": 4.336666666666666e-05, "loss": 0.4854, "step": 5800 }, { "epoch": 268.18, "learning_rate": 4.253333333333333e-05, "loss": 0.4795, "step": 5900 }, { "epoch": 272.73, "learning_rate": 4.17e-05, "loss": 0.4623, "step": 6000 }, { "epoch": 272.73, "eval_loss": 0.5591160655021667, "eval_runtime": 13.3234, "eval_samples_per_second": 22.967, "eval_steps_per_second": 1.501, "eval_wer": 0.538307260328921, "step": 6000 }, { "epoch": 277.27, "learning_rate": 4.086666666666666e-05, "loss": 0.4593, "step": 6100 }, { "epoch": 281.82, "learning_rate": 4.003333333333333e-05, "loss": 0.4435, "step": 6200 }, { "epoch": 286.36, "learning_rate": 3.919999999999999e-05, "loss": 0.4432, "step": 6300 }, { "epoch": 290.91, "learning_rate": 3.836666666666666e-05, "loss": 0.4281, "step": 6400 }, { "epoch": 290.91, "eval_loss": 0.5775493383407593, "eval_runtime": 13.3759, "eval_samples_per_second": 22.877, "eval_steps_per_second": 1.495, "eval_wer": 0.5599679101484155, "step": 6400 }, { "epoch": 295.45, "learning_rate": 3.753333333333333e-05, "loss": 0.4264, "step": 6500 }, { "epoch": 300.0, "learning_rate": 3.67e-05, "loss": 0.4187, "step": 6600 }, { "epoch": 304.55, "learning_rate": 3.586666666666666e-05, "loss": 0.3965, "step": 6700 }, { "epoch": 309.09, "learning_rate": 3.503333333333333e-05, "loss": 0.4045, "step": 6800 }, { "epoch": 309.09, "eval_loss": 0.5923808217048645, "eval_runtime": 13.6647, "eval_samples_per_second": 22.394, "eval_steps_per_second": 1.464, "eval_wer": 0.5579622944243883, "step": 6800 }, { "epoch": 313.64, "learning_rate": 3.42e-05, "loss": 0.3857, "step": 6900 }, { "epoch": 318.18, "learning_rate": 3.336666666666667e-05, "loss": 0.373, "step": 7000 }, { "epoch": 322.73, "learning_rate": 3.253333333333333e-05, "loss": 0.3752, "step": 7100 }, { "epoch": 327.27, "learning_rate": 3.17e-05, "loss": 0.3651, "step": 7200 }, { "epoch": 327.27, "eval_loss": 0.567070484161377, "eval_runtime": 13.4998, "eval_samples_per_second": 22.667, "eval_steps_per_second": 1.482, "eval_wer": 0.5683914961893302, "step": 7200 }, { "epoch": 331.82, "learning_rate": 3.086666666666667e-05, "loss": 0.3567, "step": 7300 }, { "epoch": 336.36, "learning_rate": 3.003333333333333e-05, "loss": 0.3551, "step": 7400 }, { "epoch": 340.91, "learning_rate": 2.9199999999999995e-05, "loss": 0.351, "step": 7500 }, { "epoch": 345.45, "learning_rate": 2.8366666666666664e-05, "loss": 0.343, "step": 7600 }, { "epoch": 345.45, "eval_loss": 0.6082874536514282, "eval_runtime": 13.5565, "eval_samples_per_second": 22.572, "eval_steps_per_second": 1.475, "eval_wer": 0.5944645006016848, "step": 7600 }, { "epoch": 350.0, "learning_rate": 2.7541666666666664e-05, "loss": 0.3379, "step": 7700 }, { "epoch": 354.55, "learning_rate": 2.6716666666666664e-05, "loss": 0.3257, "step": 7800 }, { "epoch": 359.09, "learning_rate": 2.588333333333333e-05, "loss": 0.3285, "step": 7900 }, { "epoch": 363.64, "learning_rate": 2.505e-05, "loss": 0.3085, "step": 8000 }, { "epoch": 363.64, "eval_loss": 0.6242865324020386, "eval_runtime": 13.4343, "eval_samples_per_second": 22.778, "eval_steps_per_second": 1.489, "eval_wer": 0.5728038507821901, "step": 8000 }, { "epoch": 368.18, "learning_rate": 2.4216666666666665e-05, "loss": 0.3127, "step": 8100 }, { "epoch": 372.73, "learning_rate": 2.338333333333333e-05, "loss": 0.3149, "step": 8200 }, { "epoch": 377.27, "learning_rate": 2.255e-05, "loss": 0.307, "step": 8300 }, { "epoch": 381.82, "learning_rate": 2.1716666666666665e-05, "loss": 0.2941, "step": 8400 }, { "epoch": 381.82, "eval_loss": 0.6244971752166748, "eval_runtime": 13.5184, "eval_samples_per_second": 22.636, "eval_steps_per_second": 1.479, "eval_wer": 0.5579622944243883, "step": 8400 }, { "epoch": 386.36, "learning_rate": 2.088333333333333e-05, "loss": 0.295, "step": 8500 }, { "epoch": 390.91, "learning_rate": 2.0049999999999996e-05, "loss": 0.2842, "step": 8600 }, { "epoch": 395.45, "learning_rate": 1.9216666666666665e-05, "loss": 0.2817, "step": 8700 }, { "epoch": 400.0, "learning_rate": 1.838333333333333e-05, "loss": 0.2735, "step": 8800 }, { "epoch": 400.0, "eval_loss": 0.6457676291465759, "eval_runtime": 13.5206, "eval_samples_per_second": 22.632, "eval_steps_per_second": 1.479, "eval_wer": 0.5804251905334937, "step": 8800 }, { "epoch": 404.55, "learning_rate": 1.755e-05, "loss": 0.2786, "step": 8900 }, { "epoch": 409.09, "learning_rate": 1.6716666666666665e-05, "loss": 0.2708, "step": 9000 }, { "epoch": 413.64, "learning_rate": 1.5883333333333334e-05, "loss": 0.2661, "step": 9100 }, { "epoch": 418.18, "learning_rate": 1.5049999999999998e-05, "loss": 0.262, "step": 9200 }, { "epoch": 418.18, "eval_loss": 0.656562328338623, "eval_runtime": 13.3117, "eval_samples_per_second": 22.987, "eval_steps_per_second": 1.502, "eval_wer": 0.5824308062575211, "step": 9200 }, { "epoch": 422.73, "learning_rate": 1.4216666666666666e-05, "loss": 0.2577, "step": 9300 }, { "epoch": 427.27, "learning_rate": 1.3383333333333331e-05, "loss": 0.2576, "step": 9400 }, { "epoch": 431.82, "learning_rate": 1.2549999999999998e-05, "loss": 0.2573, "step": 9500 }, { "epoch": 436.36, "learning_rate": 1.1716666666666666e-05, "loss": 0.2578, "step": 9600 }, { "epoch": 436.36, "eval_loss": 0.6557896733283997, "eval_runtime": 13.599, "eval_samples_per_second": 22.502, "eval_steps_per_second": 1.471, "eval_wer": 0.596470116325712, "step": 9600 }, { "epoch": 440.91, "learning_rate": 1.0883333333333331e-05, "loss": 0.2445, "step": 9700 }, { "epoch": 445.45, "learning_rate": 1.005e-05, "loss": 0.2422, "step": 9800 }, { "epoch": 450.0, "learning_rate": 9.216666666666666e-06, "loss": 0.2481, "step": 9900 }, { "epoch": 454.55, "learning_rate": 8.383333333333333e-06, "loss": 0.2388, "step": 10000 }, { "epoch": 454.55, "eval_loss": 0.659763514995575, "eval_runtime": 13.4458, "eval_samples_per_second": 22.758, "eval_steps_per_second": 1.487, "eval_wer": 0.5992779783393501, "step": 10000 }, { "epoch": 459.09, "learning_rate": 7.55e-06, "loss": 0.2362, "step": 10100 }, { "epoch": 463.64, "learning_rate": 6.716666666666666e-06, "loss": 0.2374, "step": 10200 }, { "epoch": 468.18, "learning_rate": 5.883333333333333e-06, "loss": 0.2352, "step": 10300 }, { "epoch": 472.73, "learning_rate": 5.049999999999999e-06, "loss": 0.2328, "step": 10400 }, { "epoch": 472.73, "eval_loss": 0.6699528694152832, "eval_runtime": 13.3932, "eval_samples_per_second": 22.847, "eval_steps_per_second": 1.493, "eval_wer": 0.6040914560770156, "step": 10400 }, { "epoch": 477.27, "learning_rate": 4.216666666666666e-06, "loss": 0.2286, "step": 10500 }, { "epoch": 481.82, "learning_rate": 3.3916666666666662e-06, "loss": 0.232, "step": 10600 }, { "epoch": 486.36, "learning_rate": 2.558333333333333e-06, "loss": 0.2323, "step": 10700 }, { "epoch": 490.91, "learning_rate": 1.7249999999999998e-06, "loss": 0.2286, "step": 10800 }, { "epoch": 490.91, "eval_loss": 0.6684302091598511, "eval_runtime": 13.5254, "eval_samples_per_second": 22.624, "eval_steps_per_second": 1.479, "eval_wer": 0.5956678700361011, "step": 10800 }, { "epoch": 495.45, "learning_rate": 8.916666666666666e-07, "loss": 0.229, "step": 10900 }, { "epoch": 500.0, "learning_rate": 5.8333333333333326e-08, "loss": 0.2255, "step": 11000 }, { "epoch": 500.0, "step": 11000, "total_flos": 6.534167693112984e+19, "train_loss": 1.1389288659529253, "train_runtime": 23734.473, "train_samples_per_second": 14.704, "train_steps_per_second": 0.463 } ], "max_steps": 11000, "num_train_epochs": 500, "total_flos": 6.534167693112984e+19, "trial_name": null, "trial_params": null }