{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.998955067920583, "global_step": 14340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.84, "learning_rate": 0.000148875, "loss": 6.9674, "step": 400 }, { "epoch": 0.84, "eval_loss": 3.427687168121338, "eval_runtime": 218.9995, "eval_samples_per_second": 15.53, "eval_steps_per_second": 0.973, "eval_wer": 1.0, "step": 400 }, { "epoch": 1.67, "learning_rate": 0.000298875, "loss": 2.566, "step": 800 }, { "epoch": 1.67, "eval_loss": 1.0569312572479248, "eval_runtime": 218.6402, "eval_samples_per_second": 15.555, "eval_steps_per_second": 0.974, "eval_wer": 0.7833242711949475, "step": 800 }, { "epoch": 2.51, "learning_rate": 0.0002912038404726735, "loss": 1.0118, "step": 1200 }, { "epoch": 2.51, "eval_loss": 0.6927362680435181, "eval_runtime": 220.9263, "eval_samples_per_second": 15.394, "eval_steps_per_second": 0.964, "eval_wer": 0.5602283658873227, "step": 1200 }, { "epoch": 3.35, "learning_rate": 0.000282341211225997, "loss": 0.7536, "step": 1600 }, { "epoch": 3.35, "eval_loss": 0.5688415765762329, "eval_runtime": 210.7589, "eval_samples_per_second": 16.137, "eval_steps_per_second": 1.011, "eval_wer": 0.5082395750554185, "step": 1600 }, { "epoch": 4.18, "learning_rate": 0.00027347858197932054, "loss": 0.6251, "step": 2000 }, { "epoch": 4.18, "eval_loss": 0.5367330312728882, "eval_runtime": 211.3794, "eval_samples_per_second": 16.09, "eval_steps_per_second": 1.008, "eval_wer": 0.46095612530846125, "step": 2000 }, { "epoch": 5.02, "learning_rate": 0.00026461595273264397, "loss": 0.5453, "step": 2400 }, { "epoch": 5.02, "eval_loss": 0.47355735301971436, "eval_runtime": 211.7978, "eval_samples_per_second": 16.058, "eval_steps_per_second": 1.006, "eval_wer": 0.4430758291856623, "step": 2400 }, { "epoch": 5.86, "learning_rate": 0.0002557533234859675, "loss": 0.4779, "step": 2800 }, { "epoch": 5.86, "eval_loss": 0.4465107023715973, "eval_runtime": 211.1636, "eval_samples_per_second": 16.106, "eval_steps_per_second": 1.009, "eval_wer": 0.4200719394370321, "step": 2800 }, { "epoch": 6.69, "learning_rate": 0.00024689069423929094, "loss": 0.4458, "step": 3200 }, { "epoch": 6.69, "eval_loss": 0.4270594120025635, "eval_runtime": 210.6558, "eval_samples_per_second": 16.145, "eval_steps_per_second": 1.011, "eval_wer": 0.4025262453469405, "step": 3200 }, { "epoch": 7.53, "learning_rate": 0.00023802806499261444, "loss": 0.4036, "step": 3600 }, { "epoch": 7.53, "eval_loss": 0.4414581060409546, "eval_runtime": 212.0272, "eval_samples_per_second": 16.04, "eval_steps_per_second": 1.005, "eval_wer": 0.3957505541846167, "step": 3600 }, { "epoch": 8.37, "learning_rate": 0.00022916543574593795, "loss": 0.377, "step": 4000 }, { "epoch": 8.37, "eval_loss": 0.4214448034763336, "eval_runtime": 210.8625, "eval_samples_per_second": 16.129, "eval_steps_per_second": 1.01, "eval_wer": 0.39071061106696225, "step": 4000 }, { "epoch": 9.2, "learning_rate": 0.0002203028064992614, "loss": 0.347, "step": 4400 }, { "epoch": 9.2, "eval_loss": 0.42906612157821655, "eval_runtime": 212.1547, "eval_samples_per_second": 16.031, "eval_steps_per_second": 1.004, "eval_wer": 0.38757371701033083, "step": 4400 }, { "epoch": 10.04, "learning_rate": 0.00021144017725258492, "loss": 0.3322, "step": 4800 }, { "epoch": 10.04, "eval_loss": 0.4414941370487213, "eval_runtime": 210.3342, "eval_samples_per_second": 16.17, "eval_steps_per_second": 1.013, "eval_wer": 0.3664728763227237, "step": 4800 }, { "epoch": 10.88, "learning_rate": 0.00020257754800590837, "loss": 0.311, "step": 5200 }, { "epoch": 10.88, "eval_loss": 0.40909305214881897, "eval_runtime": 211.7535, "eval_samples_per_second": 16.061, "eval_steps_per_second": 1.006, "eval_wer": 0.37214019825170436, "step": 5200 }, { "epoch": 11.71, "learning_rate": 0.00019371491875923188, "loss": 0.2956, "step": 5600 }, { "epoch": 11.71, "eval_loss": 0.46582677960395813, "eval_runtime": 210.74, "eval_samples_per_second": 16.138, "eval_steps_per_second": 1.011, "eval_wer": 0.3568530678823874, "step": 5600 }, { "epoch": 12.55, "learning_rate": 0.0001848522895125554, "loss": 0.2811, "step": 6000 }, { "epoch": 12.55, "eval_loss": 0.44131794571876526, "eval_runtime": 210.7883, "eval_samples_per_second": 16.135, "eval_steps_per_second": 1.01, "eval_wer": 0.3576268350830231, "step": 6000 }, { "epoch": 13.39, "learning_rate": 0.00017598966026587885, "loss": 0.2732, "step": 6400 }, { "epoch": 13.39, "eval_loss": 0.48606938123703003, "eval_runtime": 210.0412, "eval_samples_per_second": 16.192, "eval_steps_per_second": 1.014, "eval_wer": 0.3552846208540717, "step": 6400 }, { "epoch": 14.23, "learning_rate": 0.00016712703101920236, "loss": 0.2672, "step": 6800 }, { "epoch": 14.23, "eval_loss": 0.4633455276489258, "eval_runtime": 212.5303, "eval_samples_per_second": 16.002, "eval_steps_per_second": 1.002, "eval_wer": 0.3534861349282697, "step": 6800 }, { "epoch": 15.06, "learning_rate": 0.00015826440177252584, "loss": 0.2497, "step": 7200 }, { "epoch": 15.06, "eval_loss": 0.468420147895813, "eval_runtime": 210.9196, "eval_samples_per_second": 16.125, "eval_steps_per_second": 1.01, "eval_wer": 0.3575850098289347, "step": 7200 }, { "epoch": 15.9, "learning_rate": 0.00014940177252584932, "loss": 0.2334, "step": 7600 }, { "epoch": 15.9, "eval_loss": 0.4702986776828766, "eval_runtime": 211.8705, "eval_samples_per_second": 16.052, "eval_steps_per_second": 1.005, "eval_wer": 0.34524655987285124, "step": 7600 }, { "epoch": 16.74, "learning_rate": 0.0001405391432791728, "loss": 0.2324, "step": 8000 }, { "epoch": 16.74, "eval_loss": 0.4267388880252838, "eval_runtime": 211.5039, "eval_samples_per_second": 16.08, "eval_steps_per_second": 1.007, "eval_wer": 0.35070475553138986, "step": 8000 }, { "epoch": 17.57, "learning_rate": 0.0001316765140324963, "loss": 0.2166, "step": 8400 }, { "epoch": 17.57, "eval_loss": 0.4422346353530884, "eval_runtime": 213.9215, "eval_samples_per_second": 15.898, "eval_steps_per_second": 0.996, "eval_wer": 0.3400393157388431, "step": 8400 }, { "epoch": 18.41, "learning_rate": 0.00012281388478581977, "loss": 0.2116, "step": 8800 }, { "epoch": 18.41, "eval_loss": 0.4669197201728821, "eval_runtime": 213.3022, "eval_samples_per_second": 15.945, "eval_steps_per_second": 0.999, "eval_wer": 0.33359822660922667, "step": 8800 }, { "epoch": 19.25, "learning_rate": 0.00011395125553914327, "loss": 0.2055, "step": 9200 }, { "epoch": 19.25, "eval_loss": 0.46659788489341736, "eval_runtime": 209.6264, "eval_samples_per_second": 16.224, "eval_steps_per_second": 1.016, "eval_wer": 0.3343510811828182, "step": 9200 }, { "epoch": 20.08, "learning_rate": 0.00010508862629246675, "loss": 0.2, "step": 9600 }, { "epoch": 20.08, "eval_loss": 0.4791451096534729, "eval_runtime": 212.2536, "eval_samples_per_second": 16.023, "eval_steps_per_second": 1.004, "eval_wer": 0.3353967125350287, "step": 9600 }, { "epoch": 20.92, "learning_rate": 9.622599704579024e-05, "loss": 0.1851, "step": 10000 }, { "epoch": 20.92, "eval_loss": 0.4670654237270355, "eval_runtime": 209.9735, "eval_samples_per_second": 16.197, "eval_steps_per_second": 1.014, "eval_wer": 0.33182065331046884, "step": 10000 }, { "epoch": 21.76, "learning_rate": 8.736336779911373e-05, "loss": 0.1768, "step": 10400 }, { "epoch": 21.76, "eval_loss": 0.48615434765815735, "eval_runtime": 211.2759, "eval_samples_per_second": 16.097, "eval_steps_per_second": 1.008, "eval_wer": 0.33194612907273413, "step": 10400 }, { "epoch": 22.59, "learning_rate": 7.850073855243721e-05, "loss": 0.1759, "step": 10800 }, { "epoch": 22.59, "eval_loss": 0.4796726107597351, "eval_runtime": 210.8467, "eval_samples_per_second": 16.13, "eval_steps_per_second": 1.01, "eval_wer": 0.32908109916767747, "step": 10800 }, { "epoch": 23.43, "learning_rate": 6.96381093057607e-05, "loss": 0.1697, "step": 11200 }, { "epoch": 23.43, "eval_loss": 0.5016443133354187, "eval_runtime": 210.1285, "eval_samples_per_second": 16.185, "eval_steps_per_second": 1.014, "eval_wer": 0.32728261324187546, "step": 11200 }, { "epoch": 24.27, "learning_rate": 6.077548005908419e-05, "loss": 0.162, "step": 11600 }, { "epoch": 24.27, "eval_loss": 0.4838166832923889, "eval_runtime": 210.5105, "eval_samples_per_second": 16.156, "eval_steps_per_second": 1.012, "eval_wer": 0.32224267012422103, "step": 11600 }, { "epoch": 25.1, "learning_rate": 5.191285081240768e-05, "loss": 0.1552, "step": 12000 }, { "epoch": 25.1, "eval_loss": 0.4953179657459259, "eval_runtime": 214.2232, "eval_samples_per_second": 15.876, "eval_steps_per_second": 0.994, "eval_wer": 0.3248985737588356, "step": 12000 }, { "epoch": 25.94, "learning_rate": 4.3050221565731165e-05, "loss": 0.1505, "step": 12400 }, { "epoch": 25.94, "eval_loss": 0.5147430300712585, "eval_runtime": 213.2541, "eval_samples_per_second": 15.948, "eval_steps_per_second": 0.999, "eval_wer": 0.320444184198419, "step": 12400 }, { "epoch": 26.78, "learning_rate": 3.418759231905465e-05, "loss": 0.1505, "step": 12800 }, { "epoch": 26.78, "eval_loss": 0.5216009616851807, "eval_runtime": 214.5736, "eval_samples_per_second": 15.85, "eval_steps_per_second": 0.993, "eval_wer": 0.32184533021038103, "step": 12800 }, { "epoch": 27.61, "learning_rate": 2.534711964549483e-05, "loss": 0.1441, "step": 13200 }, { "epoch": 27.61, "eval_loss": 0.5204435586929321, "eval_runtime": 210.4984, "eval_samples_per_second": 16.157, "eval_steps_per_second": 1.012, "eval_wer": 0.32084152411225897, "step": 13200 }, { "epoch": 28.45, "learning_rate": 1.6506646971935004e-05, "loss": 0.1432, "step": 13600 }, { "epoch": 28.45, "eval_loss": 0.5269867777824402, "eval_runtime": 210.8697, "eval_samples_per_second": 16.128, "eval_steps_per_second": 1.01, "eval_wer": 0.31854113513739596, "step": 13600 }, { "epoch": 29.29, "learning_rate": 7.644017725258493e-06, "loss": 0.1379, "step": 14000 }, { "epoch": 29.29, "eval_loss": 0.5423755049705505, "eval_runtime": 212.2394, "eval_samples_per_second": 16.024, "eval_steps_per_second": 1.004, "eval_wer": 0.3176209795474508, "step": 14000 }, { "epoch": 30.0, "step": 14340, "total_flos": 2.519939618068477e+20, "train_loss": 0.5493880579162342, "train_runtime": 54395.2991, "train_samples_per_second": 16.879, "train_steps_per_second": 0.264 } ], "max_steps": 14340, "num_train_epochs": 30, "total_flos": 2.519939618068477e+20, "trial_name": null, "trial_params": null }