{ "best_metric": 19.97153700189753, "best_model_checkpoint": "./checkpoint-800", "epoch": 15.016666666666667, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2e-08, "loss": 1.2498, "step": 25 }, { "epoch": 0.06, "learning_rate": 4.5e-08, "loss": 0.9037, "step": 50 }, { "epoch": 1.02, "learning_rate": 6.999999999999999e-08, "loss": 1.0907, "step": 75 }, { "epoch": 1.05, "learning_rate": 9.499999999999999e-08, "loss": 0.927, "step": 100 }, { "epoch": 2.01, "learning_rate": 9.983007697636658e-08, "loss": 0.8001, "step": 125 }, { "epoch": 2.04, "learning_rate": 9.918643144747679e-08, "loss": 0.8719, "step": 150 }, { "epoch": 2.06, "learning_rate": 9.812276182268236e-08, "loss": 0.5706, "step": 175 }, { "epoch": 3.03, "learning_rate": 9.656074673794016e-08, "loss": 0.7919, "step": 200 }, { "epoch": 3.03, "eval_loss": 0.279296875, "eval_runtime": 701.0908, "eval_samples_per_second": 3.164, "eval_steps_per_second": 0.1, "eval_wer": 22.58064516129032, "step": 200 }, { "epoch": 3.05, "learning_rate": 9.455032620941839e-08, "loss": 0.5192, "step": 225 }, { "epoch": 4.02, "learning_rate": 9.200467769494708e-08, "loss": 0.6054, "step": 250 }, { "epoch": 4.04, "learning_rate": 8.914398527981508e-08, "loss": 0.5903, "step": 275 }, { "epoch": 5.01, "learning_rate": 8.590631488815944e-08, "loss": 0.469, "step": 300 }, { "epoch": 5.03, "learning_rate": 8.23228470557425e-08, "loss": 0.6128, "step": 325 }, { "epoch": 5.06, "learning_rate": 7.84280925367132e-08, "loss": 0.4026, "step": 350 }, { "epoch": 6.02, "learning_rate": 7.408768370508576e-08, "loss": 0.5923, "step": 375 }, { "epoch": 6.05, "learning_rate": 6.967704118273256e-08, "loss": 0.4409, "step": 400 }, { "epoch": 6.05, "eval_loss": 0.26513671875, "eval_runtime": 702.5872, "eval_samples_per_second": 3.157, "eval_steps_per_second": 0.1, "eval_wer": 20.60721062618596, "step": 400 }, { "epoch": 7.01, "learning_rate": 6.507689799722478e-08, "loss": 0.4854, "step": 425 }, { "epoch": 7.04, "learning_rate": 6.03315560421329e-08, "loss": 0.4989, "step": 450 }, { "epoch": 8.0, "learning_rate": 5.5291509972967234e-08, "loss": 0.3982, "step": 475 }, { "epoch": 8.03, "learning_rate": 5.0392695044435566e-08, "loss": 0.5425, "step": 500 }, { "epoch": 8.06, "learning_rate": 4.5490098247957034e-08, "loss": 0.3773, "step": 525 }, { "epoch": 9.02, "learning_rate": 4.0630934270713755e-08, "loss": 0.5168, "step": 550 }, { "epoch": 9.05, "learning_rate": 3.586199951809582e-08, "loss": 0.4259, "step": 575 }, { "epoch": 10.01, "learning_rate": 3.1047378773808274e-08, "loss": 0.4393, "step": 600 }, { "epoch": 10.01, "eval_loss": 0.260009765625, "eval_runtime": 701.0779, "eval_samples_per_second": 3.164, "eval_steps_per_second": 0.1, "eval_wer": 20.06641366223909, "step": 600 }, { "epoch": 10.04, "learning_rate": 2.6603509286971338e-08, "loss": 0.497, "step": 625 }, { "epoch": 11.0, "learning_rate": 2.238496079552367e-08, "loss": 0.3649, "step": 650 }, { "epoch": 11.03, "learning_rate": 1.8432360227531113e-08, "loss": 0.5444, "step": 675 }, { "epoch": 11.06, "learning_rate": 1.4783773287174683e-08, "loss": 0.3612, "step": 700 }, { "epoch": 12.02, "learning_rate": 1.134947733186315e-08, "loss": 0.4897, "step": 725 }, { "epoch": 12.05, "learning_rate": 8.426519384872732e-09, "loss": 0.4383, "step": 750 }, { "epoch": 13.01, "learning_rate": 5.903936782582253e-09, "loss": 0.4205, "step": 775 }, { "epoch": 13.04, "learning_rate": 3.8060233744356625e-09, "loss": 0.4975, "step": 800 }, { "epoch": 13.04, "eval_loss": 0.25927734375, "eval_runtime": 700.0881, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.1, "eval_wer": 19.97153700189753, "step": 800 }, { "epoch": 13.06, "learning_rate": 2.152983213389559e-09, "loss": 0.3587, "step": 825 }, { "epoch": 14.03, "learning_rate": 9.228083061983804e-10, "loss": 0.5509, "step": 850 }, { "epoch": 14.05, "learning_rate": 2.2190176984600017e-10, "loss": 0.3727, "step": 875 }, { "epoch": 15.02, "learning_rate": 3.8553092647219954e-13, "loss": 0.4754, "step": 900 }, { "epoch": 15.02, "step": 900, "total_flos": 1.2318638198947106e+20, "train_loss": 0.5692690700954861, "train_runtime": 21274.2497, "train_samples_per_second": 2.707, "train_steps_per_second": 0.042 } ], "max_steps": 900, "num_train_epochs": 9223372036854775807, "total_flos": 1.2318638198947106e+20, "trial_name": null, "trial_params": null }