{ "best_metric": 5.074450392391248, "best_model_checkpoint": "./checkpoint-1500", "epoch": 1.3533333333333333, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 9.87983978638184e-07, "loss": 0.676, "step": 25 }, { "epoch": 0.03, "learning_rate": 9.712950600801067e-07, "loss": 0.3062, "step": 50 }, { "epoch": 0.05, "learning_rate": 9.546061415220292e-07, "loss": 0.2604, "step": 75 }, { "epoch": 0.07, "learning_rate": 9.379172229639519e-07, "loss": 0.2175, "step": 100 }, { "epoch": 0.08, "learning_rate": 9.212283044058745e-07, "loss": 0.2147, "step": 125 }, { "epoch": 0.1, "learning_rate": 9.04539385847797e-07, "loss": 0.1825, "step": 150 }, { "epoch": 0.12, "learning_rate": 8.885180240320427e-07, "loss": 0.168, "step": 175 }, { "epoch": 0.13, "learning_rate": 8.718291054739653e-07, "loss": 0.1771, "step": 200 }, { "epoch": 0.15, "learning_rate": 8.551401869158879e-07, "loss": 0.1862, "step": 225 }, { "epoch": 0.17, "learning_rate": 8.384512683578104e-07, "loss": 0.1867, "step": 250 }, { "epoch": 0.18, "learning_rate": 8.21762349799733e-07, "loss": 0.1652, "step": 275 }, { "epoch": 0.2, "learning_rate": 8.050734312416555e-07, "loss": 0.1583, "step": 300 }, { "epoch": 0.22, "learning_rate": 7.883845126835781e-07, "loss": 0.1562, "step": 325 }, { "epoch": 0.23, "learning_rate": 7.716955941255007e-07, "loss": 0.1607, "step": 350 }, { "epoch": 0.25, "learning_rate": 7.550066755674232e-07, "loss": 0.1422, "step": 375 }, { "epoch": 0.27, "learning_rate": 7.383177570093458e-07, "loss": 0.1508, "step": 400 }, { "epoch": 0.28, "learning_rate": 7.216288384512683e-07, "loss": 0.1556, "step": 425 }, { "epoch": 0.3, "learning_rate": 7.049399198931909e-07, "loss": 0.1581, "step": 450 }, { "epoch": 0.32, "learning_rate": 6.882510013351135e-07, "loss": 0.1738, "step": 475 }, { "epoch": 0.33, "learning_rate": 6.71562082777036e-07, "loss": 0.151, "step": 500 }, { "epoch": 0.35, "learning_rate": 6.548731642189586e-07, "loss": 0.1471, "step": 525 }, { "epoch": 0.37, "learning_rate": 6.381842456608812e-07, "loss": 0.1491, "step": 550 }, { "epoch": 0.38, "learning_rate": 6.214953271028037e-07, "loss": 0.1388, "step": 575 }, { "epoch": 0.4, "learning_rate": 6.048064085447263e-07, "loss": 0.1521, "step": 600 }, { "epoch": 0.42, "learning_rate": 5.887850467289718e-07, "loss": 0.1572, "step": 625 }, { "epoch": 0.43, "learning_rate": 5.720961281708944e-07, "loss": 0.1577, "step": 650 }, { "epoch": 0.45, "learning_rate": 5.55407209612817e-07, "loss": 0.1416, "step": 675 }, { "epoch": 0.47, "learning_rate": 5.387182910547396e-07, "loss": 0.1375, "step": 700 }, { "epoch": 0.48, "learning_rate": 5.220293724966622e-07, "loss": 0.1228, "step": 725 }, { "epoch": 0.5, "learning_rate": 5.053404539385847e-07, "loss": 0.1556, "step": 750 }, { "epoch": 0.5, "eval_loss": 0.1683349609375, "eval_runtime": 4390.4106, "eval_samples_per_second": 3.535, "eval_steps_per_second": 0.221, "eval_wer": 5.095888987058885, "step": 750 }, { "epoch": 0.52, "learning_rate": 4.886515353805072e-07, "loss": 0.1318, "step": 775 }, { "epoch": 0.53, "learning_rate": 4.719626168224299e-07, "loss": 0.1586, "step": 800 }, { "epoch": 0.55, "learning_rate": 4.5527369826435247e-07, "loss": 0.1403, "step": 825 }, { "epoch": 0.57, "learning_rate": 4.38584779706275e-07, "loss": 0.144, "step": 850 }, { "epoch": 0.58, "learning_rate": 4.225634178905207e-07, "loss": 0.1417, "step": 875 }, { "epoch": 0.6, "learning_rate": 4.058744993324432e-07, "loss": 0.1544, "step": 900 }, { "epoch": 0.62, "learning_rate": 3.891855807743658e-07, "loss": 0.1483, "step": 925 }, { "epoch": 0.63, "learning_rate": 3.724966622162884e-07, "loss": 0.1469, "step": 950 }, { "epoch": 1.0, "learning_rate": 3.558077436582109e-07, "loss": 0.1782, "step": 975 }, { "epoch": 1.02, "learning_rate": 3.391188251001335e-07, "loss": 0.2161, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.22429906542056e-07, "loss": 0.1903, "step": 1025 }, { "epoch": 1.05, "learning_rate": 3.057409879839786e-07, "loss": 0.2003, "step": 1050 }, { "epoch": 1.07, "learning_rate": 2.890520694259012e-07, "loss": 0.2123, "step": 1075 }, { "epoch": 1.09, "learning_rate": 2.723631508678237e-07, "loss": 0.2347, "step": 1100 }, { "epoch": 1.1, "learning_rate": 2.5567423230974635e-07, "loss": 0.1982, "step": 1125 }, { "epoch": 1.12, "learning_rate": 2.389853137516689e-07, "loss": 0.1899, "step": 1150 }, { "epoch": 1.14, "learning_rate": 2.2229639519359144e-07, "loss": 0.1865, "step": 1175 }, { "epoch": 1.15, "learning_rate": 2.05607476635514e-07, "loss": 0.204, "step": 1200 }, { "epoch": 1.17, "learning_rate": 1.8891855807743656e-07, "loss": 0.2119, "step": 1225 }, { "epoch": 1.19, "learning_rate": 1.7222963951935914e-07, "loss": 0.2108, "step": 1250 }, { "epoch": 1.2, "learning_rate": 1.555407209612817e-07, "loss": 0.229, "step": 1275 }, { "epoch": 1.22, "learning_rate": 1.3885180240320425e-07, "loss": 0.2851, "step": 1300 }, { "epoch": 1.24, "learning_rate": 1.221628838451268e-07, "loss": 0.1927, "step": 1325 }, { "epoch": 1.25, "learning_rate": 1.054739652870494e-07, "loss": 0.1675, "step": 1350 }, { "epoch": 1.27, "learning_rate": 8.878504672897195e-08, "loss": 0.167, "step": 1375 }, { "epoch": 1.29, "learning_rate": 7.209612817089452e-08, "loss": 0.176, "step": 1400 }, { "epoch": 1.3, "learning_rate": 5.5407209612817086e-08, "loss": 0.1718, "step": 1425 }, { "epoch": 1.32, "learning_rate": 3.871829105473965e-08, "loss": 0.1831, "step": 1450 }, { "epoch": 1.34, "learning_rate": 2.2029372496662217e-08, "loss": 0.1793, "step": 1475 }, { "epoch": 1.35, "learning_rate": 5.3404539385847795e-09, "loss": 0.1732, "step": 1500 }, { "epoch": 1.35, "eval_loss": 0.164794921875, "eval_runtime": 4364.6715, "eval_samples_per_second": 3.556, "eval_steps_per_second": 0.222, "eval_wer": 5.074450392391248, "step": 1500 }, { "epoch": 1.35, "step": 1500, "total_flos": 1.0191220995495297e+20, "train_loss": 0.1855107421875, "train_runtime": 20533.0269, "train_samples_per_second": 2.338, "train_steps_per_second": 0.073 } ], "max_steps": 1500, "num_train_epochs": 9223372036854775807, "total_flos": 1.0191220995495297e+20, "trial_name": null, "trial_params": null }