{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.56, "learning_rate": 5.82e-05, "loss": 9.8118, "step": 100 }, { "epoch": 11.11, "learning_rate": 0.0001182, "loss": 3.4789, "step": 200 }, { "epoch": 16.67, "learning_rate": 0.00017819999999999997, "loss": 3.114, "step": 300 }, { "epoch": 22.22, "learning_rate": 0.0002382, "loss": 2.721, "step": 400 }, { "epoch": 27.78, "learning_rate": 0.0002982, "loss": 1.7294, "step": 500 }, { "epoch": 27.78, "eval_loss": 0.8540233373641968, "eval_runtime": 10.9769, "eval_samples_per_second": 19.313, "eval_steps_per_second": 0.638, "eval_wer": 0.9943609022556391, "step": 500 }, { "epoch": 33.33, "learning_rate": 0.0002906129032258064, "loss": 1.351, "step": 600 }, { "epoch": 38.89, "learning_rate": 0.0002809354838709677, "loss": 1.1775, "step": 700 }, { "epoch": 44.44, "learning_rate": 0.000271258064516129, "loss": 1.0539, "step": 800 }, { "epoch": 50.0, "learning_rate": 0.0002615806451612903, "loss": 0.9587, "step": 900 }, { "epoch": 55.56, "learning_rate": 0.0002519032258064516, "loss": 0.8863, "step": 1000 }, { "epoch": 55.56, "eval_loss": 0.7282317280769348, "eval_runtime": 11.4744, "eval_samples_per_second": 18.476, "eval_steps_per_second": 0.61, "eval_wer": 0.731203007518797, "step": 1000 }, { "epoch": 61.11, "learning_rate": 0.00024222580645161287, "loss": 0.7985, "step": 1100 }, { "epoch": 66.67, "learning_rate": 0.00023254838709677416, "loss": 0.7323, "step": 1200 }, { "epoch": 72.22, "learning_rate": 0.00022287096774193548, "loss": 0.6712, "step": 1300 }, { "epoch": 77.78, "learning_rate": 0.00021319354838709678, "loss": 0.6214, "step": 1400 }, { "epoch": 83.33, "learning_rate": 0.00020351612903225804, "loss": 0.5789, "step": 1500 }, { "epoch": 83.33, "eval_loss": 0.817844033241272, "eval_runtime": 11.023, "eval_samples_per_second": 19.232, "eval_steps_per_second": 0.635, "eval_wer": 0.8101503759398496, "step": 1500 }, { "epoch": 88.89, "learning_rate": 0.00019383870967741934, "loss": 0.5519, "step": 1600 }, { "epoch": 94.44, "learning_rate": 0.00018416129032258063, "loss": 0.4957, "step": 1700 }, { "epoch": 100.0, "learning_rate": 0.00017448387096774193, "loss": 0.4599, "step": 1800 }, { "epoch": 105.56, "learning_rate": 0.0001648064516129032, "loss": 0.4272, "step": 1900 }, { "epoch": 111.11, "learning_rate": 0.0001551290322580645, "loss": 0.3899, "step": 2000 }, { "epoch": 111.11, "eval_loss": 0.803404688835144, "eval_runtime": 10.8752, "eval_samples_per_second": 19.494, "eval_steps_per_second": 0.644, "eval_wer": 0.7700501253132832, "step": 2000 }, { "epoch": 116.67, "learning_rate": 0.0001454516129032258, "loss": 0.3725, "step": 2100 }, { "epoch": 122.22, "learning_rate": 0.00013577419354838708, "loss": 0.3373, "step": 2200 }, { "epoch": 127.78, "learning_rate": 0.00012609677419354837, "loss": 0.3134, "step": 2300 }, { "epoch": 133.33, "learning_rate": 0.00011641935483870967, "loss": 0.3057, "step": 2400 }, { "epoch": 138.89, "learning_rate": 0.00010674193548387096, "loss": 0.2869, "step": 2500 }, { "epoch": 138.89, "eval_loss": 0.9060825705528259, "eval_runtime": 10.9301, "eval_samples_per_second": 19.396, "eval_steps_per_second": 0.64, "eval_wer": 0.699874686716792, "step": 2500 }, { "epoch": 144.44, "learning_rate": 9.706451612903224e-05, "loss": 0.2579, "step": 2600 }, { "epoch": 150.0, "learning_rate": 8.738709677419355e-05, "loss": 0.2383, "step": 2700 }, { "epoch": 155.56, "learning_rate": 7.770967741935483e-05, "loss": 0.2253, "step": 2800 }, { "epoch": 161.11, "learning_rate": 6.803225806451613e-05, "loss": 0.2144, "step": 2900 }, { "epoch": 166.67, "learning_rate": 5.835483870967742e-05, "loss": 0.1934, "step": 3000 }, { "epoch": 166.67, "eval_loss": 0.9400397539138794, "eval_runtime": 11.0289, "eval_samples_per_second": 19.222, "eval_steps_per_second": 0.635, "eval_wer": 0.7105263157894737, "step": 3000 }, { "epoch": 172.22, "learning_rate": 4.8677419354838705e-05, "loss": 0.1874, "step": 3100 }, { "epoch": 177.78, "learning_rate": 3.9e-05, "loss": 0.1778, "step": 3200 }, { "epoch": 183.33, "learning_rate": 2.932258064516129e-05, "loss": 0.171, "step": 3300 }, { "epoch": 188.89, "learning_rate": 1.9645161290322582e-05, "loss": 0.1568, "step": 3400 }, { "epoch": 194.44, "learning_rate": 9.96774193548387e-06, "loss": 0.1551, "step": 3500 }, { "epoch": 194.44, "eval_loss": 0.966656506061554, "eval_runtime": 11.4149, "eval_samples_per_second": 18.572, "eval_steps_per_second": 0.613, "eval_wer": 0.6954887218045113, "step": 3500 }, { "epoch": 200.0, "learning_rate": 2.903225806451613e-07, "loss": 0.145, "step": 3600 }, { "epoch": 200.0, "step": 3600, "total_flos": 2.405940879390409e+19, "train_loss": 0.9929841698540581, "train_runtime": 8863.3906, "train_samples_per_second": 12.501, "train_steps_per_second": 0.406 } ], "max_steps": 3600, "num_train_epochs": 200, "total_flos": 2.405940879390409e+19, "trial_name": null, "trial_params": null }