{ "best_metric": 11.732222816934506, "best_model_checkpoint": "./checkpoint-3000", "epoch": 4.09, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.4e-07, "loss": 1.3874, "step": 25 }, { "epoch": 0.01, "learning_rate": 9.200000000000001e-07, "loss": 1.224, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.42e-06, "loss": 0.7884, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.9200000000000003e-06, "loss": 0.466, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.42e-06, "loss": 0.2788, "step": 125 }, { "epoch": 0.04, "learning_rate": 2.92e-06, "loss": 0.2233, "step": 150 }, { "epoch": 0.04, "learning_rate": 3.4200000000000007e-06, "loss": 0.2411, "step": 175 }, { "epoch": 0.05, "learning_rate": 3.920000000000001e-06, "loss": 0.2389, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.42e-06, "loss": 0.2536, "step": 225 }, { "epoch": 0.06, "learning_rate": 4.92e-06, "loss": 0.2358, "step": 250 }, { "epoch": 0.07, "learning_rate": 5.420000000000001e-06, "loss": 0.2, "step": 275 }, { "epoch": 0.07, "learning_rate": 5.92e-06, "loss": 0.1946, "step": 300 }, { "epoch": 0.08, "learning_rate": 6.42e-06, "loss": 0.215, "step": 325 }, { "epoch": 0.09, "learning_rate": 6.92e-06, "loss": 0.1802, "step": 350 }, { "epoch": 0.09, "learning_rate": 7.420000000000001e-06, "loss": 0.2226, "step": 375 }, { "epoch": 0.1, "learning_rate": 7.92e-06, "loss": 0.2374, "step": 400 }, { "epoch": 0.11, "learning_rate": 8.42e-06, "loss": 0.2663, "step": 425 }, { "epoch": 0.11, "learning_rate": 8.920000000000001e-06, "loss": 0.3244, "step": 450 }, { "epoch": 0.12, "learning_rate": 9.42e-06, "loss": 0.4516, "step": 475 }, { "epoch": 0.12, "learning_rate": 9.920000000000002e-06, "loss": 0.5582, "step": 500 }, { "epoch": 0.12, "eval_loss": 0.3659963607788086, "eval_runtime": 1037.0778, "eval_samples_per_second": 2.468, "eval_steps_per_second": 0.077, "eval_wer": 14.817038942050365, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.940000000000001e-06, "loss": 0.6142, "step": 525 }, { "epoch": 0.14, "learning_rate": 9.871428571428572e-06, "loss": 0.5451, "step": 550 }, { "epoch": 0.14, "learning_rate": 9.800000000000001e-06, "loss": 0.3885, "step": 575 }, { "epoch": 0.15, "learning_rate": 9.72857142857143e-06, "loss": 0.2781, "step": 600 }, { "epoch": 0.16, "learning_rate": 9.657142857142859e-06, "loss": 0.2405, "step": 625 }, { "epoch": 0.16, "learning_rate": 9.585714285714286e-06, "loss": 0.2455, "step": 650 }, { "epoch": 0.17, "learning_rate": 9.514285714285715e-06, "loss": 0.2063, "step": 675 }, { "epoch": 0.17, "learning_rate": 9.442857142857144e-06, "loss": 0.2097, "step": 700 }, { "epoch": 0.18, "learning_rate": 9.371428571428572e-06, "loss": 0.2479, "step": 725 }, { "epoch": 0.19, "learning_rate": 9.3e-06, "loss": 0.2491, "step": 750 }, { "epoch": 0.19, "learning_rate": 9.22857142857143e-06, "loss": 0.2182, "step": 775 }, { "epoch": 0.2, "learning_rate": 9.157142857142857e-06, "loss": 0.2302, "step": 800 }, { "epoch": 0.21, "learning_rate": 9.085714285714286e-06, "loss": 0.2773, "step": 825 }, { "epoch": 0.21, "learning_rate": 9.014285714285715e-06, "loss": 0.2446, "step": 850 }, { "epoch": 0.22, "learning_rate": 8.942857142857142e-06, "loss": 0.2213, "step": 875 }, { "epoch": 0.23, "learning_rate": 8.871428571428571e-06, "loss": 0.1809, "step": 900 }, { "epoch": 1.0, "learning_rate": 8.8e-06, "loss": 0.2946, "step": 925 }, { "epoch": 1.01, "learning_rate": 8.72857142857143e-06, "loss": 0.3836, "step": 950 }, { "epoch": 1.02, "learning_rate": 8.657142857142858e-06, "loss": 0.2838, "step": 975 }, { "epoch": 1.02, "learning_rate": 8.585714285714286e-06, "loss": 0.2285, "step": 1000 }, { "epoch": 1.02, "eval_loss": 0.2918657660484314, "eval_runtime": 1060.8322, "eval_samples_per_second": 2.412, "eval_steps_per_second": 0.075, "eval_wer": 12.630383415360114, "step": 1000 }, { "epoch": 1.03, "learning_rate": 8.514285714285715e-06, "loss": 0.1869, "step": 1025 }, { "epoch": 1.03, "learning_rate": 8.442857142857144e-06, "loss": 0.1679, "step": 1050 }, { "epoch": 1.04, "learning_rate": 8.371428571428573e-06, "loss": 0.1406, "step": 1075 }, { "epoch": 1.05, "learning_rate": 8.3e-06, "loss": 0.1489, "step": 1100 }, { "epoch": 1.05, "learning_rate": 8.22857142857143e-06, "loss": 0.1214, "step": 1125 }, { "epoch": 1.06, "learning_rate": 8.157142857142858e-06, "loss": 0.12, "step": 1150 }, { "epoch": 1.07, "learning_rate": 8.085714285714287e-06, "loss": 0.1122, "step": 1175 }, { "epoch": 1.07, "learning_rate": 8.014285714285715e-06, "loss": 0.1117, "step": 1200 }, { "epoch": 1.08, "learning_rate": 7.942857142857144e-06, "loss": 0.1437, "step": 1225 }, { "epoch": 1.08, "learning_rate": 7.871428571428573e-06, "loss": 0.177, "step": 1250 }, { "epoch": 1.09, "learning_rate": 7.800000000000002e-06, "loss": 0.1541, "step": 1275 }, { "epoch": 1.1, "learning_rate": 7.72857142857143e-06, "loss": 0.1613, "step": 1300 }, { "epoch": 1.1, "learning_rate": 7.657142857142858e-06, "loss": 0.1439, "step": 1325 }, { "epoch": 1.11, "learning_rate": 7.585714285714286e-06, "loss": 0.144, "step": 1350 }, { "epoch": 1.12, "learning_rate": 7.514285714285715e-06, "loss": 0.1429, "step": 1375 }, { "epoch": 1.12, "learning_rate": 7.442857142857144e-06, "loss": 0.145, "step": 1400 }, { "epoch": 1.13, "learning_rate": 7.371428571428571e-06, "loss": 0.2797, "step": 1425 }, { "epoch": 1.14, "learning_rate": 7.3e-06, "loss": 0.3132, "step": 1450 }, { "epoch": 1.14, "learning_rate": 7.2285714285714294e-06, "loss": 0.2924, "step": 1475 }, { "epoch": 1.15, "learning_rate": 7.1571428571428584e-06, "loss": 0.2038, "step": 1500 }, { "epoch": 1.15, "eval_loss": 0.2794933021068573, "eval_runtime": 1001.5942, "eval_samples_per_second": 2.555, "eval_steps_per_second": 0.08, "eval_wer": 11.384968864636232, "step": 1500 }, { "epoch": 1.15, "learning_rate": 7.085714285714286e-06, "loss": 0.137, "step": 1525 }, { "epoch": 1.16, "learning_rate": 7.014285714285715e-06, "loss": 0.1311, "step": 1550 }, { "epoch": 1.17, "learning_rate": 6.942857142857144e-06, "loss": 0.112, "step": 1575 }, { "epoch": 1.17, "learning_rate": 6.871428571428572e-06, "loss": 0.1416, "step": 1600 }, { "epoch": 1.18, "learning_rate": 6.800000000000001e-06, "loss": 0.1226, "step": 1625 }, { "epoch": 1.19, "learning_rate": 6.7314285714285724e-06, "loss": 0.12, "step": 1650 }, { "epoch": 1.19, "learning_rate": 6.660000000000001e-06, "loss": 0.1247, "step": 1675 }, { "epoch": 1.2, "learning_rate": 6.588571428571429e-06, "loss": 0.2006, "step": 1700 }, { "epoch": 1.2, "learning_rate": 6.517142857142858e-06, "loss": 0.3048, "step": 1725 }, { "epoch": 1.21, "learning_rate": 6.445714285714286e-06, "loss": 0.2532, "step": 1750 }, { "epoch": 1.22, "learning_rate": 6.374285714285715e-06, "loss": 0.1975, "step": 1775 }, { "epoch": 1.22, "learning_rate": 6.302857142857144e-06, "loss": 0.1865, "step": 1800 }, { "epoch": 2.0, "learning_rate": 6.231428571428571e-06, "loss": 0.1657, "step": 1825 }, { "epoch": 2.01, "learning_rate": 6.16e-06, "loss": 0.0563, "step": 1850 }, { "epoch": 2.01, "learning_rate": 6.088571428571429e-06, "loss": 0.0617, "step": 1875 }, { "epoch": 2.02, "learning_rate": 6.017142857142858e-06, "loss": 0.0556, "step": 1900 }, { "epoch": 2.03, "learning_rate": 5.945714285714286e-06, "loss": 0.0645, "step": 1925 }, { "epoch": 2.03, "learning_rate": 5.874285714285715e-06, "loss": 0.0642, "step": 1950 }, { "epoch": 2.04, "learning_rate": 5.802857142857144e-06, "loss": 0.0786, "step": 1975 }, { "epoch": 2.04, "learning_rate": 5.731428571428572e-06, "loss": 0.074, "step": 2000 }, { "epoch": 2.04, "eval_loss": 0.3149697780609131, "eval_runtime": 1006.1929, "eval_samples_per_second": 2.543, "eval_steps_per_second": 0.08, "eval_wer": 12.10428062296837, "step": 2000 }, { "epoch": 2.05, "learning_rate": 5.66e-06, "loss": 0.0721, "step": 2025 }, { "epoch": 2.06, "learning_rate": 5.588571428571429e-06, "loss": 0.0649, "step": 2050 }, { "epoch": 2.06, "learning_rate": 5.517142857142857e-06, "loss": 0.0571, "step": 2075 }, { "epoch": 2.07, "learning_rate": 5.445714285714286e-06, "loss": 0.0728, "step": 2100 }, { "epoch": 2.08, "learning_rate": 5.374285714285715e-06, "loss": 0.0524, "step": 2125 }, { "epoch": 2.08, "learning_rate": 5.3028571428571425e-06, "loss": 0.077, "step": 2150 }, { "epoch": 2.09, "learning_rate": 5.2314285714285716e-06, "loss": 0.0717, "step": 2175 }, { "epoch": 2.1, "learning_rate": 5.1600000000000006e-06, "loss": 0.0977, "step": 2200 }, { "epoch": 2.1, "learning_rate": 5.08857142857143e-06, "loss": 0.1385, "step": 2225 }, { "epoch": 2.11, "learning_rate": 5.017142857142857e-06, "loss": 0.1941, "step": 2250 }, { "epoch": 2.11, "learning_rate": 4.945714285714286e-06, "loss": 0.1323, "step": 2275 }, { "epoch": 2.12, "learning_rate": 4.874285714285715e-06, "loss": 0.1016, "step": 2300 }, { "epoch": 2.13, "learning_rate": 4.802857142857143e-06, "loss": 0.1343, "step": 2325 }, { "epoch": 2.13, "learning_rate": 4.731428571428572e-06, "loss": 0.2062, "step": 2350 }, { "epoch": 2.14, "learning_rate": 4.66e-06, "loss": 0.1978, "step": 2375 }, { "epoch": 2.15, "learning_rate": 4.588571428571429e-06, "loss": 0.1342, "step": 2400 }, { "epoch": 2.15, "learning_rate": 4.5171428571428575e-06, "loss": 0.1188, "step": 2425 }, { "epoch": 2.16, "learning_rate": 4.445714285714286e-06, "loss": 0.1035, "step": 2450 }, { "epoch": 2.16, "learning_rate": 4.374285714285715e-06, "loss": 0.1606, "step": 2475 }, { "epoch": 2.17, "learning_rate": 4.302857142857143e-06, "loss": 0.2165, "step": 2500 }, { "epoch": 2.17, "eval_loss": 0.2978155016899109, "eval_runtime": 939.5906, "eval_samples_per_second": 2.724, "eval_steps_per_second": 0.085, "eval_wer": 12.851007167008262, "step": 2500 }, { "epoch": 2.18, "learning_rate": 4.231428571428572e-06, "loss": 0.204, "step": 2525 }, { "epoch": 2.18, "learning_rate": 4.16e-06, "loss": 0.1276, "step": 2550 }, { "epoch": 2.19, "learning_rate": 4.088571428571429e-06, "loss": 0.1049, "step": 2575 }, { "epoch": 2.19, "learning_rate": 4.017142857142857e-06, "loss": 0.1006, "step": 2600 }, { "epoch": 2.2, "learning_rate": 3.945714285714286e-06, "loss": 0.0767, "step": 2625 }, { "epoch": 2.21, "learning_rate": 3.874285714285715e-06, "loss": 0.083, "step": 2650 }, { "epoch": 2.21, "learning_rate": 3.802857142857143e-06, "loss": 0.0746, "step": 2675 }, { "epoch": 2.22, "learning_rate": 3.731428571428572e-06, "loss": 0.0573, "step": 2700 }, { "epoch": 2.23, "learning_rate": 3.66e-06, "loss": 0.0699, "step": 2725 }, { "epoch": 3.0, "learning_rate": 3.588571428571429e-06, "loss": 0.0473, "step": 2750 }, { "epoch": 3.01, "learning_rate": 3.5171428571428573e-06, "loss": 0.0565, "step": 2775 }, { "epoch": 3.02, "learning_rate": 3.4457142857142863e-06, "loss": 0.1134, "step": 2800 }, { "epoch": 3.02, "learning_rate": 3.3742857142857145e-06, "loss": 0.1051, "step": 2825 }, { "epoch": 3.03, "learning_rate": 3.302857142857143e-06, "loss": 0.0684, "step": 2850 }, { "epoch": 3.04, "learning_rate": 3.2314285714285716e-06, "loss": 0.0543, "step": 2875 }, { "epoch": 3.04, "learning_rate": 3.1600000000000002e-06, "loss": 0.039, "step": 2900 }, { "epoch": 3.05, "learning_rate": 3.0885714285714284e-06, "loss": 0.0344, "step": 2925 }, { "epoch": 3.06, "learning_rate": 3.0171428571428574e-06, "loss": 0.0298, "step": 2950 }, { "epoch": 3.06, "learning_rate": 2.945714285714286e-06, "loss": 0.0299, "step": 2975 }, { "epoch": 3.07, "learning_rate": 2.8742857142857146e-06, "loss": 0.0399, "step": 3000 }, { "epoch": 3.07, "eval_loss": 0.34670406579971313, "eval_runtime": 923.9331, "eval_samples_per_second": 2.77, "eval_steps_per_second": 0.087, "eval_wer": 11.732222816934506, "step": 3000 }, { "epoch": 3.07, "learning_rate": 2.802857142857143e-06, "loss": 0.0472, "step": 3025 }, { "epoch": 3.08, "learning_rate": 2.7314285714285714e-06, "loss": 0.0461, "step": 3050 }, { "epoch": 3.09, "learning_rate": 2.6600000000000004e-06, "loss": 0.0263, "step": 3075 }, { "epoch": 3.09, "learning_rate": 2.5885714285714285e-06, "loss": 0.0246, "step": 3100 }, { "epoch": 3.1, "learning_rate": 2.5171428571428575e-06, "loss": 0.0296, "step": 3125 }, { "epoch": 3.1, "learning_rate": 2.445714285714286e-06, "loss": 0.0223, "step": 3150 }, { "epoch": 3.11, "learning_rate": 2.3742857142857147e-06, "loss": 0.0278, "step": 3175 }, { "epoch": 3.12, "learning_rate": 2.302857142857143e-06, "loss": 0.0387, "step": 3200 }, { "epoch": 3.12, "learning_rate": 2.2314285714285715e-06, "loss": 0.0411, "step": 3225 }, { "epoch": 3.13, "learning_rate": 2.16e-06, "loss": 0.0396, "step": 3250 }, { "epoch": 3.14, "learning_rate": 2.0885714285714287e-06, "loss": 0.0308, "step": 3275 }, { "epoch": 3.14, "learning_rate": 2.0171428571428573e-06, "loss": 0.0369, "step": 3300 }, { "epoch": 3.15, "learning_rate": 1.945714285714286e-06, "loss": 0.0376, "step": 3325 }, { "epoch": 3.15, "learning_rate": 1.8742857142857142e-06, "loss": 0.0439, "step": 3350 }, { "epoch": 3.16, "learning_rate": 1.8028571428571432e-06, "loss": 0.0563, "step": 3375 }, { "epoch": 3.17, "learning_rate": 1.7314285714285716e-06, "loss": 0.1056, "step": 3400 }, { "epoch": 3.17, "learning_rate": 1.6600000000000002e-06, "loss": 0.1284, "step": 3425 }, { "epoch": 3.18, "learning_rate": 1.5885714285714288e-06, "loss": 0.0671, "step": 3450 }, { "epoch": 3.19, "learning_rate": 1.5171428571428574e-06, "loss": 0.0579, "step": 3475 }, { "epoch": 3.19, "learning_rate": 1.4457142857142858e-06, "loss": 0.045, "step": 3500 }, { "epoch": 3.19, "eval_loss": 0.3500906527042389, "eval_runtime": 931.3693, "eval_samples_per_second": 2.748, "eval_steps_per_second": 0.086, "eval_wer": 11.721779089045835, "step": 3500 }, { "epoch": 3.2, "learning_rate": 1.3742857142857143e-06, "loss": 0.0313, "step": 3525 }, { "epoch": 3.21, "learning_rate": 1.302857142857143e-06, "loss": 0.0386, "step": 3550 }, { "epoch": 3.21, "learning_rate": 1.2314285714285715e-06, "loss": 0.0584, "step": 3575 }, { "epoch": 3.22, "learning_rate": 1.1600000000000001e-06, "loss": 0.0967, "step": 3600 }, { "epoch": 3.22, "learning_rate": 1.0885714285714287e-06, "loss": 0.1048, "step": 3625 }, { "epoch": 4.0, "learning_rate": 1.0171428571428573e-06, "loss": 0.0759, "step": 3650 }, { "epoch": 4.01, "learning_rate": 9.457142857142858e-07, "loss": 0.0125, "step": 3675 }, { "epoch": 4.01, "learning_rate": 8.742857142857144e-07, "loss": 0.0108, "step": 3700 }, { "epoch": 4.02, "learning_rate": 8.028571428571429e-07, "loss": 0.0093, "step": 3725 }, { "epoch": 4.03, "learning_rate": 7.314285714285715e-07, "loss": 0.0094, "step": 3750 }, { "epoch": 4.03, "learning_rate": 6.6e-07, "loss": 0.0164, "step": 3775 }, { "epoch": 4.04, "learning_rate": 5.885714285714286e-07, "loss": 0.02, "step": 3800 }, { "epoch": 4.05, "learning_rate": 5.171428571428572e-07, "loss": 0.0165, "step": 3825 }, { "epoch": 4.05, "learning_rate": 4.457142857142858e-07, "loss": 0.0171, "step": 3850 }, { "epoch": 4.06, "learning_rate": 3.7428571428571434e-07, "loss": 0.0227, "step": 3875 }, { "epoch": 4.07, "learning_rate": 3.028571428571429e-07, "loss": 0.0413, "step": 3900 }, { "epoch": 4.07, "learning_rate": 2.3142857142857144e-07, "loss": 0.0547, "step": 3925 }, { "epoch": 4.08, "learning_rate": 1.6e-07, "loss": 0.037, "step": 3950 }, { "epoch": 4.08, "learning_rate": 8.857142857142858e-08, "loss": 0.0281, "step": 3975 }, { "epoch": 4.09, "learning_rate": 1.7142857142857143e-08, "loss": 0.0187, "step": 4000 }, { "epoch": 4.09, "eval_loss": 0.38336294889450073, "eval_runtime": 948.6243, "eval_samples_per_second": 2.698, "eval_steps_per_second": 0.084, "eval_wer": 11.888878735264553, "step": 4000 }, { "epoch": 4.09, "step": 4000, "total_flos": 1.358659599630336e+20, "train_loss": 0.1602003634274006, "train_runtime": 36777.8553, "train_samples_per_second": 1.74, "train_steps_per_second": 0.109 } ], "max_steps": 4000, "num_train_epochs": 9223372036854775807, "total_flos": 1.358659599630336e+20, "trial_name": null, "trial_params": null }