{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.997830802603037, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.43, "eval_loss": 3.3600032329559326, "eval_runtime": 177.4912, "eval_samples_per_second": 27.601, "eval_steps_per_second": 0.868, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.87, "eval_loss": 3.088737726211548, "eval_runtime": 176.8112, "eval_samples_per_second": 27.708, "eval_steps_per_second": 0.871, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.3, "eval_loss": 3.0779149532318115, "eval_runtime": 176.3294, "eval_samples_per_second": 27.783, "eval_steps_per_second": 0.873, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.74, "eval_loss": 3.05513334274292, "eval_runtime": 176.7029, "eval_samples_per_second": 27.724, "eval_steps_per_second": 0.872, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.17, "learning_rate": 0.00029699999999999996, "loss": 4.8553, "step": 500 }, { "epoch": 2.17, "eval_loss": 3.052617311477661, "eval_runtime": 176.6413, "eval_samples_per_second": 27.734, "eval_steps_per_second": 0.872, "eval_wer": 1.0, "step": 500 }, { "epoch": 2.61, "eval_loss": 3.0559935569763184, "eval_runtime": 175.4042, "eval_samples_per_second": 27.93, "eval_steps_per_second": 0.878, "eval_wer": 1.0, "step": 600 }, { "epoch": 3.04, "eval_loss": 3.125081777572632, "eval_runtime": 174.6598, "eval_samples_per_second": 28.049, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 700 }, { "epoch": 3.48, "eval_loss": 3.087021589279175, "eval_runtime": 177.8472, "eval_samples_per_second": 27.546, "eval_steps_per_second": 0.866, "eval_wer": 1.0, "step": 800 }, { "epoch": 3.91, "eval_loss": 3.08219575881958, "eval_runtime": 180.0973, "eval_samples_per_second": 27.202, "eval_steps_per_second": 0.855, "eval_wer": 1.0, "step": 900 }, { "epoch": 4.35, "learning_rate": 0.00027679687499999997, "loss": 3.1133, "step": 1000 }, { "epoch": 4.35, "eval_loss": 3.048403739929199, "eval_runtime": 177.987, "eval_samples_per_second": 27.524, "eval_steps_per_second": 0.865, "eval_wer": 1.0, "step": 1000 }, { "epoch": 4.78, "eval_loss": 3.0558109283447266, "eval_runtime": 176.2514, "eval_samples_per_second": 27.796, "eval_steps_per_second": 0.874, "eval_wer": 1.0, "step": 1100 }, { "epoch": 5.22, "eval_loss": 3.1018614768981934, "eval_runtime": 174.5505, "eval_samples_per_second": 28.066, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 1200 }, { "epoch": 5.65, "eval_loss": 3.0914077758789062, "eval_runtime": 174.0307, "eval_samples_per_second": 28.15, "eval_steps_per_second": 0.885, "eval_wer": 1.0, "step": 1300 }, { "epoch": 6.09, "eval_loss": 3.069120168685913, "eval_runtime": 175.4381, "eval_samples_per_second": 27.924, "eval_steps_per_second": 0.878, "eval_wer": 1.0, "step": 1400 }, { "epoch": 6.52, "learning_rate": 0.00025335937499999995, "loss": 3.109, "step": 1500 }, { "epoch": 6.52, "eval_loss": 3.0588901042938232, "eval_runtime": 175.6572, "eval_samples_per_second": 27.89, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 1500 }, { "epoch": 6.95, "eval_loss": 3.050849199295044, "eval_runtime": 175.685, "eval_samples_per_second": 27.885, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 1600 }, { "epoch": 7.39, "eval_loss": 3.054013252258301, "eval_runtime": 177.236, "eval_samples_per_second": 27.641, "eval_steps_per_second": 0.869, "eval_wer": 1.0, "step": 1700 }, { "epoch": 7.82, "eval_loss": 3.0545613765716553, "eval_runtime": 177.8822, "eval_samples_per_second": 27.541, "eval_steps_per_second": 0.866, "eval_wer": 1.0, "step": 1800 }, { "epoch": 8.26, "eval_loss": 3.0523643493652344, "eval_runtime": 172.9222, "eval_samples_per_second": 28.331, "eval_steps_per_second": 0.891, "eval_wer": 1.0, "step": 1900 }, { "epoch": 8.69, "learning_rate": 0.00022992187499999996, "loss": 3.1106, "step": 2000 }, { "epoch": 8.69, "eval_loss": 3.056912422180176, "eval_runtime": 175.8694, "eval_samples_per_second": 27.856, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 2000 }, { "epoch": 9.13, "eval_loss": 3.0621554851531982, "eval_runtime": 175.7147, "eval_samples_per_second": 27.88, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 2100 }, { "epoch": 9.56, "eval_loss": 3.0517823696136475, "eval_runtime": 174.9875, "eval_samples_per_second": 27.996, "eval_steps_per_second": 0.88, "eval_wer": 1.0, "step": 2200 }, { "epoch": 10.0, "eval_loss": 3.0749499797821045, "eval_runtime": 176.3933, "eval_samples_per_second": 27.773, "eval_steps_per_second": 0.873, "eval_wer": 1.0, "step": 2300 }, { "epoch": 10.43, "eval_loss": 3.0697524547576904, "eval_runtime": 175.8486, "eval_samples_per_second": 27.859, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 2400 }, { "epoch": 10.87, "learning_rate": 0.00020648437499999996, "loss": 3.1058, "step": 2500 }, { "epoch": 10.87, "eval_loss": 3.0664749145507812, "eval_runtime": 176.8396, "eval_samples_per_second": 27.703, "eval_steps_per_second": 0.871, "eval_wer": 1.0, "step": 2500 }, { "epoch": 11.3, "eval_loss": 3.055528402328491, "eval_runtime": 176.8954, "eval_samples_per_second": 27.694, "eval_steps_per_second": 0.871, "eval_wer": 1.0, "step": 2600 }, { "epoch": 11.74, "eval_loss": 3.0589022636413574, "eval_runtime": 177.2054, "eval_samples_per_second": 27.646, "eval_steps_per_second": 0.869, "eval_wer": 1.0, "step": 2700 }, { "epoch": 12.17, "eval_loss": 3.061063051223755, "eval_runtime": 176.5606, "eval_samples_per_second": 27.747, "eval_steps_per_second": 0.872, "eval_wer": 1.0, "step": 2800 }, { "epoch": 12.61, "eval_loss": 3.056131601333618, "eval_runtime": 175.9193, "eval_samples_per_second": 27.848, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 2900 }, { "epoch": 13.04, "learning_rate": 0.00018304687499999997, "loss": 3.1071, "step": 3000 }, { "epoch": 13.04, "eval_loss": 3.0480217933654785, "eval_runtime": 175.6518, "eval_samples_per_second": 27.89, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 3000 }, { "epoch": 13.48, "eval_loss": 3.0491693019866943, "eval_runtime": 173.0223, "eval_samples_per_second": 28.314, "eval_steps_per_second": 0.89, "eval_wer": 1.0, "step": 3100 }, { "epoch": 13.91, "eval_loss": 3.057448387145996, "eval_runtime": 175.4684, "eval_samples_per_second": 27.92, "eval_steps_per_second": 0.878, "eval_wer": 1.0, "step": 3200 }, { "epoch": 14.35, "eval_loss": 3.053784132003784, "eval_runtime": 176.0074, "eval_samples_per_second": 27.834, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 3300 }, { "epoch": 14.78, "eval_loss": 3.050539016723633, "eval_runtime": 175.3243, "eval_samples_per_second": 27.943, "eval_steps_per_second": 0.878, "eval_wer": 1.0, "step": 3400 }, { "epoch": 15.22, "learning_rate": 0.00015960937499999997, "loss": 3.1061, "step": 3500 }, { "epoch": 15.22, "eval_loss": 3.059952735900879, "eval_runtime": 176.5589, "eval_samples_per_second": 27.747, "eval_steps_per_second": 0.872, "eval_wer": 1.0, "step": 3500 }, { "epoch": 15.65, "eval_loss": 3.0595669746398926, "eval_runtime": 177.5778, "eval_samples_per_second": 27.588, "eval_steps_per_second": 0.867, "eval_wer": 1.0, "step": 3600 }, { "epoch": 16.09, "eval_loss": 3.0623462200164795, "eval_runtime": 174.5228, "eval_samples_per_second": 28.071, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 3700 }, { "epoch": 16.52, "eval_loss": 3.079986095428467, "eval_runtime": 175.7994, "eval_samples_per_second": 27.867, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 3800 }, { "epoch": 16.95, "eval_loss": 3.0583465099334717, "eval_runtime": 176.6021, "eval_samples_per_second": 27.74, "eval_steps_per_second": 0.872, "eval_wer": 1.0, "step": 3900 }, { "epoch": 17.39, "learning_rate": 0.00013617187499999998, "loss": 3.1036, "step": 4000 }, { "epoch": 17.39, "eval_loss": 3.053365468978882, "eval_runtime": 175.9714, "eval_samples_per_second": 27.84, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 4000 }, { "epoch": 17.82, "eval_loss": 3.0563225746154785, "eval_runtime": 175.7204, "eval_samples_per_second": 27.88, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 4100 }, { "epoch": 18.26, "eval_loss": 3.0481250286102295, "eval_runtime": 175.8171, "eval_samples_per_second": 27.864, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 4200 }, { "epoch": 18.69, "eval_loss": 3.0476744174957275, "eval_runtime": 175.2697, "eval_samples_per_second": 27.951, "eval_steps_per_second": 0.879, "eval_wer": 1.0, "step": 4300 }, { "epoch": 19.13, "eval_loss": 3.0504729747772217, "eval_runtime": 175.8225, "eval_samples_per_second": 27.863, "eval_steps_per_second": 0.876, "eval_wer": 1.0, "step": 4400 }, { "epoch": 19.56, "learning_rate": 0.00011273437499999999, "loss": 3.1086, "step": 4500 }, { "epoch": 19.56, "eval_loss": 3.0484793186187744, "eval_runtime": 176.184, "eval_samples_per_second": 27.806, "eval_steps_per_second": 0.874, "eval_wer": 1.0, "step": 4500 }, { "epoch": 20.0, "eval_loss": 3.0480639934539795, "eval_runtime": 175.5218, "eval_samples_per_second": 27.911, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 4600 }, { "epoch": 20.43, "eval_loss": 3.061495780944824, "eval_runtime": 176.4313, "eval_samples_per_second": 27.767, "eval_steps_per_second": 0.873, "eval_wer": 1.0, "step": 4700 }, { "epoch": 20.87, "eval_loss": 3.0657691955566406, "eval_runtime": 175.5853, "eval_samples_per_second": 27.901, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 4800 }, { "epoch": 21.3, "eval_loss": 3.050532341003418, "eval_runtime": 176.2169, "eval_samples_per_second": 27.801, "eval_steps_per_second": 0.874, "eval_wer": 1.0, "step": 4900 }, { "epoch": 21.74, "learning_rate": 8.9296875e-05, "loss": 3.1028, "step": 5000 }, { "epoch": 21.74, "eval_loss": 3.0491702556610107, "eval_runtime": 175.9502, "eval_samples_per_second": 27.843, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 5000 }, { "epoch": 22.17, "eval_loss": 3.048527479171753, "eval_runtime": 174.9586, "eval_samples_per_second": 28.001, "eval_steps_per_second": 0.88, "eval_wer": 1.0, "step": 5100 }, { "epoch": 22.61, "eval_loss": 3.0482711791992188, "eval_runtime": 176.76, "eval_samples_per_second": 27.716, "eval_steps_per_second": 0.871, "eval_wer": 1.0, "step": 5200 }, { "epoch": 23.04, "eval_loss": 3.0478527545928955, "eval_runtime": 174.8893, "eval_samples_per_second": 28.012, "eval_steps_per_second": 0.881, "eval_wer": 1.0, "step": 5300 }, { "epoch": 23.48, "eval_loss": 3.05094313621521, "eval_runtime": 175.0794, "eval_samples_per_second": 27.982, "eval_steps_per_second": 0.88, "eval_wer": 1.0, "step": 5400 }, { "epoch": 23.91, "learning_rate": 6.5859375e-05, "loss": 3.1087, "step": 5500 }, { "epoch": 23.91, "eval_loss": 3.0529990196228027, "eval_runtime": 176.1904, "eval_samples_per_second": 27.805, "eval_steps_per_second": 0.874, "eval_wer": 1.0, "step": 5500 }, { "epoch": 24.35, "eval_loss": 3.048621654510498, "eval_runtime": 175.2104, "eval_samples_per_second": 27.961, "eval_steps_per_second": 0.879, "eval_wer": 1.0, "step": 5600 }, { "epoch": 24.78, "eval_loss": 3.051391124725342, "eval_runtime": 175.9351, "eval_samples_per_second": 27.845, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 5700 }, { "epoch": 25.22, "eval_loss": 3.050508499145508, "eval_runtime": 175.1722, "eval_samples_per_second": 27.967, "eval_steps_per_second": 0.879, "eval_wer": 1.0, "step": 5800 }, { "epoch": 25.65, "eval_loss": 3.050753355026245, "eval_runtime": 175.581, "eval_samples_per_second": 27.902, "eval_steps_per_second": 0.877, "eval_wer": 1.0, "step": 5900 }, { "epoch": 26.09, "learning_rate": 4.2421875e-05, "loss": 3.1043, "step": 6000 }, { "epoch": 26.09, "eval_loss": 3.050074815750122, "eval_runtime": 175.9337, "eval_samples_per_second": 27.846, "eval_steps_per_second": 0.875, "eval_wer": 1.0, "step": 6000 }, { "epoch": 26.52, "eval_loss": 3.046748638153076, "eval_runtime": 176.2651, "eval_samples_per_second": 27.793, "eval_steps_per_second": 0.874, "eval_wer": 1.0, "step": 6100 }, { "epoch": 26.95, "eval_loss": 3.046581268310547, "eval_runtime": 175.4906, "eval_samples_per_second": 27.916, "eval_steps_per_second": 0.878, "eval_wer": 1.0, "step": 6200 }, { "epoch": 27.39, "eval_loss": 3.0465457439422607, "eval_runtime": 174.3257, "eval_samples_per_second": 28.103, "eval_steps_per_second": 0.883, "eval_wer": 1.0, "step": 6300 }, { "epoch": 27.82, "eval_loss": 3.0464954376220703, "eval_runtime": 174.3395, "eval_samples_per_second": 28.1, "eval_steps_per_second": 0.883, "eval_wer": 1.0, "step": 6400 }, { "epoch": 28.26, "learning_rate": 1.8984375e-05, "loss": 3.1175, "step": 6500 }, { "epoch": 28.26, "eval_loss": 3.046614170074463, "eval_runtime": 174.756, "eval_samples_per_second": 28.033, "eval_steps_per_second": 0.881, "eval_wer": 1.0, "step": 6500 }, { "epoch": 28.69, "eval_loss": 3.046605110168457, "eval_runtime": 174.8316, "eval_samples_per_second": 28.021, "eval_steps_per_second": 0.881, "eval_wer": 1.0, "step": 6600 }, { "epoch": 29.13, "eval_loss": 3.0464911460876465, "eval_runtime": 174.5543, "eval_samples_per_second": 28.066, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 6700 }, { "epoch": 29.56, "eval_loss": 3.046463966369629, "eval_runtime": 175.0973, "eval_samples_per_second": 27.979, "eval_steps_per_second": 0.88, "eval_wer": 1.0, "step": 6800 }, { "epoch": 30.0, "eval_loss": 3.046398878097534, "eval_runtime": 185.0373, "eval_samples_per_second": 26.476, "eval_steps_per_second": 0.832, "eval_wer": 1.0, "step": 6900 }, { "epoch": 30.0, "step": 6900, "total_flos": 1.7654566052477592e+19, "train_loss": 0.04483215774314991, "train_runtime": 852.37, "train_samples_per_second": 518.437, "train_steps_per_second": 8.095 } ], "max_steps": 6900, "num_train_epochs": 30, "total_flos": 1.7654566052477592e+19, "trial_name": null, "trial_params": null }