{ "best_metric": 15.901153962951717, "best_model_checkpoint": "./checkpoint-3000", "epoch": 9.049, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.2e-06, "loss": 0.9692, "step": 25 }, { "epoch": 0.02, "learning_rate": 4.7e-06, "loss": 0.309, "step": 50 }, { "epoch": 0.03, "learning_rate": 7.2e-06, "loss": 0.1594, "step": 75 }, { "epoch": 0.03, "learning_rate": 9.7e-06, "loss": 0.1527, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.22e-05, "loss": 0.1466, "step": 125 }, { "epoch": 0.05, "learning_rate": 1.47e-05, "loss": 0.1414, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.7199999999999998e-05, "loss": 0.1506, "step": 175 }, { "epoch": 0.07, "learning_rate": 1.97e-05, "loss": 0.1633, "step": 200 }, { "epoch": 0.07, "learning_rate": 2.22e-05, "loss": 0.1928, "step": 225 }, { "epoch": 0.08, "learning_rate": 2.47e-05, "loss": 0.187, "step": 250 }, { "epoch": 0.09, "learning_rate": 2.7200000000000004e-05, "loss": 0.1939, "step": 275 }, { "epoch": 0.1, "learning_rate": 2.97e-05, "loss": 0.1699, "step": 300 }, { "epoch": 1.0, "learning_rate": 3.2200000000000003e-05, "loss": 0.198, "step": 325 }, { "epoch": 1.01, "learning_rate": 3.4699999999999996e-05, "loss": 0.2113, "step": 350 }, { "epoch": 1.02, "learning_rate": 3.72e-05, "loss": 0.1882, "step": 375 }, { "epoch": 1.03, "learning_rate": 3.97e-05, "loss": 0.144, "step": 400 }, { "epoch": 1.04, "learning_rate": 4.22e-05, "loss": 0.1408, "step": 425 }, { "epoch": 1.04, "learning_rate": 4.47e-05, "loss": 0.1388, "step": 450 }, { "epoch": 1.05, "learning_rate": 4.72e-05, "loss": 0.1444, "step": 475 }, { "epoch": 1.06, "learning_rate": 4.97e-05, "loss": 0.1456, "step": 500 }, { "epoch": 1.07, "learning_rate": 4.956e-05, "loss": 0.16, "step": 525 }, { "epoch": 1.08, "learning_rate": 4.906e-05, "loss": 0.1578, "step": 550 }, { "epoch": 1.09, "learning_rate": 4.856e-05, "loss": 0.1387, "step": 575 }, { "epoch": 1.09, "learning_rate": 4.8060000000000004e-05, "loss": 0.1499, "step": 600 }, { "epoch": 1.1, "learning_rate": 4.7560000000000005e-05, "loss": 0.1323, "step": 625 }, { "epoch": 2.01, "learning_rate": 4.706000000000001e-05, "loss": 0.158, "step": 650 }, { "epoch": 2.01, "learning_rate": 4.656e-05, "loss": 0.1479, "step": 675 }, { "epoch": 2.02, "learning_rate": 4.606e-05, "loss": 0.1252, "step": 700 }, { "epoch": 2.03, "learning_rate": 4.5560000000000004e-05, "loss": 0.1246, "step": 725 }, { "epoch": 2.04, "learning_rate": 4.506e-05, "loss": 0.1201, "step": 750 }, { "epoch": 2.05, "learning_rate": 4.456e-05, "loss": 0.0986, "step": 775 }, { "epoch": 2.06, "learning_rate": 4.406e-05, "loss": 0.1088, "step": 800 }, { "epoch": 2.06, "learning_rate": 4.356e-05, "loss": 0.1078, "step": 825 }, { "epoch": 2.07, "learning_rate": 4.306e-05, "loss": 0.1135, "step": 850 }, { "epoch": 2.08, "learning_rate": 4.256e-05, "loss": 0.1058, "step": 875 }, { "epoch": 2.09, "learning_rate": 4.206e-05, "loss": 0.0886, "step": 900 }, { "epoch": 2.1, "learning_rate": 4.156e-05, "loss": 0.0906, "step": 925 }, { "epoch": 2.11, "learning_rate": 4.106e-05, "loss": 0.0828, "step": 950 }, { "epoch": 3.01, "learning_rate": 4.0560000000000005e-05, "loss": 0.0957, "step": 975 }, { "epoch": 3.02, "learning_rate": 4.0060000000000006e-05, "loss": 0.0856, "step": 1000 }, { "epoch": 3.02, "eval_loss": 0.3732326328754425, "eval_runtime": 7430.1269, "eval_samples_per_second": 1.365, "eval_steps_per_second": 0.043, "eval_wer": 20.67643486182812, "step": 1000 }, { "epoch": 3.02, "learning_rate": 3.956e-05, "loss": 0.0715, "step": 1025 }, { "epoch": 3.03, "learning_rate": 3.906e-05, "loss": 0.0813, "step": 1050 }, { "epoch": 3.04, "learning_rate": 3.8560000000000004e-05, "loss": 0.0681, "step": 1075 }, { "epoch": 3.05, "learning_rate": 3.806e-05, "loss": 0.0654, "step": 1100 }, { "epoch": 3.06, "learning_rate": 3.756e-05, "loss": 0.0698, "step": 1125 }, { "epoch": 3.07, "learning_rate": 3.706e-05, "loss": 0.0633, "step": 1150 }, { "epoch": 3.07, "learning_rate": 3.656e-05, "loss": 0.0657, "step": 1175 }, { "epoch": 3.08, "learning_rate": 3.606e-05, "loss": 0.0532, "step": 1200 }, { "epoch": 3.09, "learning_rate": 3.5560000000000005e-05, "loss": 0.0545, "step": 1225 }, { "epoch": 3.1, "learning_rate": 3.5060000000000007e-05, "loss": 0.0511, "step": 1250 }, { "epoch": 4.0, "learning_rate": 3.456e-05, "loss": 0.0654, "step": 1275 }, { "epoch": 4.01, "learning_rate": 3.406e-05, "loss": 0.063, "step": 1300 }, { "epoch": 4.02, "learning_rate": 3.3560000000000004e-05, "loss": 0.0532, "step": 1325 }, { "epoch": 4.03, "learning_rate": 3.3060000000000005e-05, "loss": 0.0427, "step": 1350 }, { "epoch": 4.04, "learning_rate": 3.256e-05, "loss": 0.0483, "step": 1375 }, { "epoch": 4.04, "learning_rate": 3.206e-05, "loss": 0.0462, "step": 1400 }, { "epoch": 4.05, "learning_rate": 3.156e-05, "loss": 0.0417, "step": 1425 }, { "epoch": 4.06, "learning_rate": 3.106e-05, "loss": 0.0408, "step": 1450 }, { "epoch": 4.07, "learning_rate": 3.056e-05, "loss": 0.0355, "step": 1475 }, { "epoch": 4.08, "learning_rate": 3.006e-05, "loss": 0.0426, "step": 1500 }, { "epoch": 4.09, "learning_rate": 2.9559999999999998e-05, "loss": 0.033, "step": 1525 }, { "epoch": 4.09, "learning_rate": 2.9060000000000003e-05, "loss": 0.0403, "step": 1550 }, { "epoch": 4.1, "learning_rate": 2.8560000000000004e-05, "loss": 0.0274, "step": 1575 }, { "epoch": 5.0, "learning_rate": 2.8060000000000002e-05, "loss": 0.0333, "step": 1600 }, { "epoch": 5.01, "learning_rate": 2.7560000000000004e-05, "loss": 0.0323, "step": 1625 }, { "epoch": 5.02, "learning_rate": 2.7060000000000002e-05, "loss": 0.0286, "step": 1650 }, { "epoch": 5.03, "learning_rate": 2.6560000000000003e-05, "loss": 0.0263, "step": 1675 }, { "epoch": 5.04, "learning_rate": 2.606e-05, "loss": 0.0249, "step": 1700 }, { "epoch": 5.05, "learning_rate": 2.556e-05, "loss": 0.0218, "step": 1725 }, { "epoch": 5.05, "learning_rate": 2.506e-05, "loss": 0.0201, "step": 1750 }, { "epoch": 5.06, "learning_rate": 2.4560000000000002e-05, "loss": 0.0219, "step": 1775 }, { "epoch": 5.07, "learning_rate": 2.4060000000000003e-05, "loss": 0.0219, "step": 1800 }, { "epoch": 5.08, "learning_rate": 2.356e-05, "loss": 0.0217, "step": 1825 }, { "epoch": 5.09, "learning_rate": 2.306e-05, "loss": 0.0176, "step": 1850 }, { "epoch": 5.1, "learning_rate": 2.256e-05, "loss": 0.0211, "step": 1875 }, { "epoch": 5.11, "learning_rate": 2.206e-05, "loss": 0.0169, "step": 1900 }, { "epoch": 6.01, "learning_rate": 2.1560000000000004e-05, "loss": 0.0222, "step": 1925 }, { "epoch": 6.02, "learning_rate": 2.106e-05, "loss": 0.0172, "step": 1950 }, { "epoch": 6.02, "learning_rate": 2.0560000000000003e-05, "loss": 0.0152, "step": 1975 }, { "epoch": 6.03, "learning_rate": 2.006e-05, "loss": 0.0119, "step": 2000 }, { "epoch": 6.03, "eval_loss": 0.3684307634830475, "eval_runtime": 6884.4626, "eval_samples_per_second": 1.473, "eval_steps_per_second": 0.046, "eval_wer": 17.535302156088672, "step": 2000 }, { "epoch": 6.04, "learning_rate": 1.956e-05, "loss": 0.0107, "step": 2025 }, { "epoch": 6.05, "learning_rate": 1.906e-05, "loss": 0.009, "step": 2050 }, { "epoch": 6.06, "learning_rate": 1.856e-05, "loss": 0.0155, "step": 2075 }, { "epoch": 6.07, "learning_rate": 1.8060000000000003e-05, "loss": 0.0129, "step": 2100 }, { "epoch": 6.07, "learning_rate": 1.756e-05, "loss": 0.0119, "step": 2125 }, { "epoch": 6.08, "learning_rate": 1.706e-05, "loss": 0.0074, "step": 2150 }, { "epoch": 6.09, "learning_rate": 1.656e-05, "loss": 0.0062, "step": 2175 }, { "epoch": 6.1, "learning_rate": 1.606e-05, "loss": 0.0052, "step": 2200 }, { "epoch": 7.0, "learning_rate": 1.556e-05, "loss": 0.0091, "step": 2225 }, { "epoch": 7.01, "learning_rate": 1.5060000000000001e-05, "loss": 0.0086, "step": 2250 }, { "epoch": 7.02, "learning_rate": 1.4560000000000001e-05, "loss": 0.0079, "step": 2275 }, { "epoch": 7.03, "learning_rate": 1.4060000000000001e-05, "loss": 0.0076, "step": 2300 }, { "epoch": 7.04, "learning_rate": 1.356e-05, "loss": 0.0052, "step": 2325 }, { "epoch": 7.04, "learning_rate": 1.306e-05, "loss": 0.0041, "step": 2350 }, { "epoch": 7.05, "learning_rate": 1.256e-05, "loss": 0.004, "step": 2375 }, { "epoch": 7.06, "learning_rate": 1.206e-05, "loss": 0.0032, "step": 2400 }, { "epoch": 7.07, "learning_rate": 1.156e-05, "loss": 0.0073, "step": 2425 }, { "epoch": 7.08, "learning_rate": 1.106e-05, "loss": 0.0048, "step": 2450 }, { "epoch": 7.09, "learning_rate": 1.056e-05, "loss": 0.0027, "step": 2475 }, { "epoch": 7.09, "learning_rate": 1.006e-05, "loss": 0.0032, "step": 2500 }, { "epoch": 7.1, "learning_rate": 9.560000000000002e-06, "loss": 0.0024, "step": 2525 }, { "epoch": 8.0, "learning_rate": 9.06e-06, "loss": 0.0033, "step": 2550 }, { "epoch": 8.01, "learning_rate": 8.56e-06, "loss": 0.0022, "step": 2575 }, { "epoch": 8.02, "learning_rate": 8.06e-06, "loss": 0.0027, "step": 2600 }, { "epoch": 8.03, "learning_rate": 7.5600000000000005e-06, "loss": 0.0014, "step": 2625 }, { "epoch": 8.04, "learning_rate": 7.06e-06, "loss": 0.0014, "step": 2650 }, { "epoch": 8.05, "learning_rate": 6.560000000000001e-06, "loss": 0.0025, "step": 2675 }, { "epoch": 8.05, "learning_rate": 6.0600000000000004e-06, "loss": 0.0011, "step": 2700 }, { "epoch": 8.06, "learning_rate": 5.56e-06, "loss": 0.0008, "step": 2725 }, { "epoch": 8.07, "learning_rate": 5.06e-06, "loss": 0.0016, "step": 2750 }, { "epoch": 8.08, "learning_rate": 4.56e-06, "loss": 0.001, "step": 2775 }, { "epoch": 8.09, "learning_rate": 4.06e-06, "loss": 0.0005, "step": 2800 }, { "epoch": 8.1, "learning_rate": 3.5600000000000002e-06, "loss": 0.0009, "step": 2825 }, { "epoch": 8.1, "learning_rate": 3.06e-06, "loss": 0.0005, "step": 2850 }, { "epoch": 9.01, "learning_rate": 2.56e-06, "loss": 0.0007, "step": 2875 }, { "epoch": 9.02, "learning_rate": 2.06e-06, "loss": 0.0005, "step": 2900 }, { "epoch": 9.02, "learning_rate": 1.56e-06, "loss": 0.0007, "step": 2925 }, { "epoch": 9.03, "learning_rate": 1.06e-06, "loss": 0.0004, "step": 2950 }, { "epoch": 9.04, "learning_rate": 5.6e-07, "loss": 0.0008, "step": 2975 }, { "epoch": 9.05, "learning_rate": 6e-08, "loss": 0.001, "step": 3000 }, { "epoch": 9.05, "eval_loss": 0.361074298620224, "eval_runtime": 6919.6339, "eval_samples_per_second": 1.466, "eval_steps_per_second": 0.046, "eval_wer": 15.901153962951717, "step": 3000 }, { "epoch": 9.05, "step": 3000, "total_flos": 1.9564383714213888e+20, "train_loss": 0.07008407963408778, "train_runtime": 69355.5101, "train_samples_per_second": 2.768, "train_steps_per_second": 0.043 } ], "max_steps": 3000, "num_train_epochs": 9223372036854775807, "total_flos": 1.9564383714213888e+20, "trial_name": null, "trial_params": null }