|
{ |
|
"best_metric": 9.396988496177238, |
|
"best_model_checkpoint": "./checkpoint-4000", |
|
"epoch": 2.23, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 1.5387, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 1.3488, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.42e-06, |
|
"loss": 0.8161, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 0.498, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.42e-06, |
|
"loss": 0.3711, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.92e-06, |
|
"loss": 0.3092, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.2652, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.2263, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.2451, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.227, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.2083, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.1479, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.1947, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.3045, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.3406, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.3461, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.3167, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.2418, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.3031, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.6144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.2794555425643921, |
|
"eval_runtime": 1731.7352, |
|
"eval_samples_per_second": 2.587, |
|
"eval_steps_per_second": 0.081, |
|
"eval_wer": 14.073652895247186, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.942857142857145e-06, |
|
"loss": 0.594, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.871428571428572e-06, |
|
"loss": 0.4331, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.3505, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.72857142857143e-06, |
|
"loss": 0.2652, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.657142857142859e-06, |
|
"loss": 0.2585, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.585714285714286e-06, |
|
"loss": 0.4135, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.514285714285715e-06, |
|
"loss": 0.5708, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.442857142857144e-06, |
|
"loss": 0.3726, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.374285714285715e-06, |
|
"loss": 0.2625, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.302857142857144e-06, |
|
"loss": 0.2396, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.231428571428573e-06, |
|
"loss": 0.2445, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.16e-06, |
|
"loss": 0.2828, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.08857142857143e-06, |
|
"loss": 0.2256, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.017142857142858e-06, |
|
"loss": 0.2214, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.945714285714286e-06, |
|
"loss": 0.1915, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.874285714285715e-06, |
|
"loss": 0.2015, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.802857142857144e-06, |
|
"loss": 0.2196, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.731428571428571e-06, |
|
"loss": 0.2346, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.66e-06, |
|
"loss": 0.2013, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.588571428571429e-06, |
|
"loss": 0.1643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.2213420271873474, |
|
"eval_runtime": 1735.8972, |
|
"eval_samples_per_second": 2.581, |
|
"eval_steps_per_second": 0.081, |
|
"eval_wer": 11.491638660154942, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.517142857142858e-06, |
|
"loss": 0.1507, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.445714285714285e-06, |
|
"loss": 0.2044, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.374285714285714e-06, |
|
"loss": 0.1698, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.302857142857143e-06, |
|
"loss": 0.1577, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.231428571428572e-06, |
|
"loss": 0.2725, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.16e-06, |
|
"loss": 0.354, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.088571428571429e-06, |
|
"loss": 0.4204, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.017142857142858e-06, |
|
"loss": 0.2924, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.945714285714287e-06, |
|
"loss": 0.2252, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.874285714285716e-06, |
|
"loss": 0.2221, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.802857142857143e-06, |
|
"loss": 0.1945, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.731428571428572e-06, |
|
"loss": 0.2151, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.660000000000001e-06, |
|
"loss": 0.2168, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.588571428571429e-06, |
|
"loss": 0.2256, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.5171428571428575e-06, |
|
"loss": 0.1883, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.445714285714286e-06, |
|
"loss": 0.1894, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.374285714285715e-06, |
|
"loss": 0.1798, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.302857142857144e-06, |
|
"loss": 0.2096, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.231428571428573e-06, |
|
"loss": 0.1906, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.16e-06, |
|
"loss": 0.2175, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.2009447067975998, |
|
"eval_runtime": 1739.6156, |
|
"eval_samples_per_second": 2.575, |
|
"eval_steps_per_second": 0.08, |
|
"eval_wer": 10.002132217811125, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.088571428571429e-06, |
|
"loss": 0.2645, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.017142857142858e-06, |
|
"loss": 0.1904, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.945714285714287e-06, |
|
"loss": 0.109, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.874285714285714e-06, |
|
"loss": 0.1449, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.8028571428571434e-06, |
|
"loss": 0.1633, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.7314285714285724e-06, |
|
"loss": 0.2946, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.660000000000001e-06, |
|
"loss": 0.3049, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.588571428571429e-06, |
|
"loss": 0.1726, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.517142857142858e-06, |
|
"loss": 0.1232, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.445714285714286e-06, |
|
"loss": 0.0528, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.374285714285715e-06, |
|
"loss": 0.0516, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.302857142857144e-06, |
|
"loss": 0.0326, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.231428571428571e-06, |
|
"loss": 0.0588, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.16e-06, |
|
"loss": 0.0802, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.088571428571429e-06, |
|
"loss": 0.0913, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.017142857142858e-06, |
|
"loss": 0.1228, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.945714285714286e-06, |
|
"loss": 0.119, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.874285714285715e-06, |
|
"loss": 0.1236, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.802857142857144e-06, |
|
"loss": 0.1413, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.731428571428572e-06, |
|
"loss": 0.1512, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.1980048269033432, |
|
"eval_runtime": 1706.7639, |
|
"eval_samples_per_second": 2.625, |
|
"eval_steps_per_second": 0.082, |
|
"eval_wer": 11.263186751820001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5.66e-06, |
|
"loss": 0.1528, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5.588571428571429e-06, |
|
"loss": 0.1917, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5.517142857142857e-06, |
|
"loss": 0.2796, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5.445714285714286e-06, |
|
"loss": 0.2391, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5.374285714285715e-06, |
|
"loss": 0.2025, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5.3028571428571425e-06, |
|
"loss": 0.1377, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5.2314285714285716e-06, |
|
"loss": 0.1342, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5.1600000000000006e-06, |
|
"loss": 0.1284, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5.08857142857143e-06, |
|
"loss": 0.1275, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5.017142857142857e-06, |
|
"loss": 0.1113, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.945714285714286e-06, |
|
"loss": 0.0745, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.874285714285715e-06, |
|
"loss": 0.0842, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.802857142857143e-06, |
|
"loss": 0.1308, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.731428571428572e-06, |
|
"loss": 0.1669, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.66e-06, |
|
"loss": 0.1911, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.588571428571429e-06, |
|
"loss": 0.1787, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.5171428571428575e-06, |
|
"loss": 0.2368, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.445714285714286e-06, |
|
"loss": 0.1796, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.374285714285715e-06, |
|
"loss": 0.1194, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.302857142857143e-06, |
|
"loss": 0.1527, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 0.1915724277496338, |
|
"eval_runtime": 1733.4878, |
|
"eval_samples_per_second": 2.584, |
|
"eval_steps_per_second": 0.081, |
|
"eval_wer": 10.846896607742996, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.231428571428572e-06, |
|
"loss": 0.1488, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.16e-06, |
|
"loss": 0.1485, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.088571428571429e-06, |
|
"loss": 0.1154, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.017142857142857e-06, |
|
"loss": 0.1026, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.945714285714286e-06, |
|
"loss": 0.1128, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.874285714285715e-06, |
|
"loss": 0.2235, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.802857142857143e-06, |
|
"loss": 0.3356, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.731428571428572e-06, |
|
"loss": 0.3351, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.66e-06, |
|
"loss": 0.2546, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.588571428571429e-06, |
|
"loss": 0.1517, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.5171428571428573e-06, |
|
"loss": 0.1087, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.4457142857142863e-06, |
|
"loss": 0.0966, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.3742857142857145e-06, |
|
"loss": 0.1031, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.302857142857143e-06, |
|
"loss": 0.0992, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.2314285714285716e-06, |
|
"loss": 0.0913, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.1600000000000002e-06, |
|
"loss": 0.1051, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0885714285714284e-06, |
|
"loss": 0.102, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0171428571428574e-06, |
|
"loss": 0.0885, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.945714285714286e-06, |
|
"loss": 0.063, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8742857142857146e-06, |
|
"loss": 0.0918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.1890150010585785, |
|
"eval_runtime": 1728.8601, |
|
"eval_samples_per_second": 2.591, |
|
"eval_steps_per_second": 0.081, |
|
"eval_wer": 9.649808608067906, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.802857142857143e-06, |
|
"loss": 0.116, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7314285714285714e-06, |
|
"loss": 0.1168, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6600000000000004e-06, |
|
"loss": 0.1147, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.5885714285714285e-06, |
|
"loss": 0.033, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.5171428571428575e-06, |
|
"loss": 0.014, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.445714285714286e-06, |
|
"loss": 0.0493, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.3742857142857147e-06, |
|
"loss": 0.0763, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.302857142857143e-06, |
|
"loss": 0.0583, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.2314285714285715e-06, |
|
"loss": 0.0483, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.16e-06, |
|
"loss": 0.0386, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.0885714285714287e-06, |
|
"loss": 0.0472, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.0171428571428573e-06, |
|
"loss": 0.0519, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.945714285714286e-06, |
|
"loss": 0.0701, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.8742857142857142e-06, |
|
"loss": 0.0581, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.8028571428571432e-06, |
|
"loss": 0.0485, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7314285714285716e-06, |
|
"loss": 0.0431, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6600000000000002e-06, |
|
"loss": 0.0569, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5885714285714288e-06, |
|
"loss": 0.0906, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5171428571428574e-06, |
|
"loss": 0.069, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4457142857142858e-06, |
|
"loss": 0.047, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.20336349308490753, |
|
"eval_runtime": 1725.5815, |
|
"eval_samples_per_second": 2.596, |
|
"eval_steps_per_second": 0.081, |
|
"eval_wer": 9.427448750621897, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.3742857142857143e-06, |
|
"loss": 0.0271, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.302857142857143e-06, |
|
"loss": 0.0164, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.2314285714285715e-06, |
|
"loss": 0.036, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1600000000000001e-06, |
|
"loss": 0.0201, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.0885714285714287e-06, |
|
"loss": 0.0334, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.0171428571428573e-06, |
|
"loss": 0.0561, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.457142857142858e-07, |
|
"loss": 0.0577, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.742857142857144e-07, |
|
"loss": 0.0606, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.028571428571429e-07, |
|
"loss": 0.071, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.314285714285715e-07, |
|
"loss": 0.0644, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 6.6e-07, |
|
"loss": 0.0576, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.885714285714286e-07, |
|
"loss": 0.052, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.171428571428572e-07, |
|
"loss": 0.0633, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.457142857142858e-07, |
|
"loss": 0.0583, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.7428571428571434e-07, |
|
"loss": 0.0635, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.028571428571429e-07, |
|
"loss": 0.0828, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.3142857142857144e-07, |
|
"loss": 0.0765, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.0596, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.857142857142858e-08, |
|
"loss": 0.0581, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.7142857142857143e-08, |
|
"loss": 0.0822, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 0.19694200158119202, |
|
"eval_runtime": 1711.536, |
|
"eval_samples_per_second": 2.618, |
|
"eval_steps_per_second": 0.082, |
|
"eval_wer": 9.396988496177238, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"step": 4000, |
|
"total_flos": 1.358319892267008e+20, |
|
"train_loss": 0.19088249330222606, |
|
"train_runtime": 43144.9916, |
|
"train_samples_per_second": 1.483, |
|
"train_steps_per_second": 0.093 |
|
} |
|
], |
|
"max_steps": 4000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 1.358319892267008e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|