|
{ |
|
"best_metric": 0.4691739691294871, |
|
"best_model_checkpoint": "./whisper-small-ar_tsize_0.8/checkpoint-5000", |
|
"epoch": 1.6103059581320451, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 74.5753402709961, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 3.3084, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 23.875898361206055, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 22.36936378479004, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.9536, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 18.3158016204834, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 14.385392189025879, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.1058, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 14.316064834594727, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0312, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 16.03868293762207, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.9277, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 16.684885025024414, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 14.64684772491455, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.8164, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 16.835796356201172, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7126, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 17.927705764770508, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.6566, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 20.162878036499023, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 15.939558982849121, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.4888, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.801262855529785, |
|
"learning_rate": 7e-06, |
|
"loss": 0.4609, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 14.835463523864746, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4432, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.00851821899414, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 15.16860294342041, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.4177, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 15.027597427368164, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4447, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 15.810965538024902, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4198, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 16.27136993408203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.16751766204834, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.4191, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.931772232055664, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.3924, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.04465389251709, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.4176, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.589746475219727, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.4358, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 14.16213607788086, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.3828, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 15.552674293518066, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.4139, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 12.745460510253906, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.3915, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.602766990661621, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.365, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.100479125976562, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4218, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.610438346862793, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.3358, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.454590797424316, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.4012, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.808696746826172, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.3754, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.447662353515625, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.3407, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 9.822239875793457, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.3557, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.579741477966309, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.3557, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 13.126590728759766, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.3588, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.920930862426758, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.3761, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 16.436307907104492, |
|
"learning_rate": 9e-06, |
|
"loss": 0.3613, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 16.763071060180664, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.3718, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.7616376876831055, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.2768, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.3424592614173889, |
|
"eval_runtime": 2548.0078, |
|
"eval_samples_per_second": 3.046, |
|
"eval_steps_per_second": 0.381, |
|
"eval_wer": 0.5745963257862194, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 9.518533706665039, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.3837, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 15.829094886779785, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.324, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 12.936345100402832, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.3251, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.695411682128906, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.3596, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 10.362096786499023, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.3336, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 11.786664009094238, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.3427, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.013471603393555, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.3265, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 11.64340877532959, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.3561, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 11.622845649719238, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.3457, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 13.175597190856934, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3605, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 13.138216018676758, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.3493, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.549818992614746, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.2964, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.058015823364258, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.3219, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 13.490798950195312, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.2941, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 9.73831844329834, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.329, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 6.883594512939453, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.3076, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 12.517281532287598, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.3153, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.685423851013184, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.2991, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 9.92092514038086, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.3064, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 13.166866302490234, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.3195, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.179815292358398, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.3214, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 12.424599647521973, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.3062, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 13.103777885437012, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.2858, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.597440719604492, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.3194, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.370293617248535, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3285, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 10.53341007232666, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.2848, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 13.32474422454834, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.304, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.368809700012207, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.3135, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 13.282475471496582, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.2797, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 10.41550350189209, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.3019, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.789618492126465, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.3085, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.927020072937012, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.271, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 14.091415405273438, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.2985, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.496126174926758, |
|
"learning_rate": 7e-06, |
|
"loss": 0.3029, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.859543323516846, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.271, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 16.182022094726562, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.3039, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.995250225067139, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.2867, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 11.519378662109375, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.3339, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 10.880196571350098, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.2873, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 16.335350036621094, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2977, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.2855232059955597, |
|
"eval_runtime": 2086.2971, |
|
"eval_samples_per_second": 3.72, |
|
"eval_steps_per_second": 0.465, |
|
"eval_wer": 0.5296694986877808, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 12.254558563232422, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.2668, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 14.6058931350708, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.3302, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.349474906921387, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.3228, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.22000789642334, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.2944, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 7.740598201751709, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.2875, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.818449974060059, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.2918, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 17.033401489257812, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.2803, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 8.139738082885742, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.2964, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.335383892059326, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.2548, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 8.209503173828125, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.2768, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.060270309448242, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.3085, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.265747547149658, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2695, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 14.466809272766113, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.2711, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.156736373901367, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.2893, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 11.08906364440918, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.2621, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 14.466202735900879, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.2988, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 11.54973030090332, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.2747, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.84890079498291, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.2845, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.711421966552734, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.2513, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 13.456315994262695, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2653, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 12.786430358886719, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.2562, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.972217559814453, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2499, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.539140224456787, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.2587, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.782011032104492, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2563, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 9.317708015441895, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2669, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.227994441986084, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.2516, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 7.91267728805542, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.2708, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 9.71472454071045, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.2752, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.538637161254883, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.2541, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 10.09244441986084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2667, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.292573928833008, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.3007, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 7.321602821350098, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.245, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.209299087524414, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.2337, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 11.65815258026123, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.2553, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.273401260375977, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.247, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.994607925415039, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.2761, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.799399375915527, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.2637, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.812761306762695, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.2926, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 9.971693992614746, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.2571, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 9.44893741607666, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.254996120929718, |
|
"eval_runtime": 2083.5916, |
|
"eval_samples_per_second": 3.725, |
|
"eval_steps_per_second": 0.466, |
|
"eval_wer": 0.5009118811440773, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.639594078063965, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.2846, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.723342895507812, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.2692, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.255222320556641, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.2406, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.54223346710205, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.2596, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 7.074874401092529, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2201, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 5.8523640632629395, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.1695, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 6.729598522186279, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.1396, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 7.361201286315918, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1583, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 5.176375389099121, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.1543, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 10.241930961608887, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.1555, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 6.182642936706543, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.1441, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 13.024508476257324, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.1503, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 7.090968608856201, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.1714, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 8.179136276245117, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.1663, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 9.39560604095459, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.1643, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.854971408843994, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.1767, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 7.17493200302124, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.1408, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 5.211622714996338, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.1592, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 4.639506816864014, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.1428, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 7.832518100738525, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.1564, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 5.339199542999268, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.1722, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 7.612499713897705, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.1667, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 10.03056812286377, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.1636, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 6.799817085266113, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.1454, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 6.477158069610596, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.1604, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.446293354034424, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1545, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.985503673553467, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.1508, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 6.837751388549805, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.1587, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 5.51887845993042, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.1485, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 8.242449760437012, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.1656, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 8.832524299621582, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.1594, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 5.77171516418457, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.1495, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 6.725485801696777, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.1473, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 10.141379356384277, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.1421, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 5.970383167266846, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1423, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 6.767889499664307, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.1741, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 7.690521240234375, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.1614, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 4.897395133972168, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.153, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 7.075612545013428, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.1563, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 4.673244953155518, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1512, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.2438921481370926, |
|
"eval_runtime": 2435.1055, |
|
"eval_samples_per_second": 3.187, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 0.47462301499043635, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 12.350273132324219, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.1631, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 10.359116554260254, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.149, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 6.066259860992432, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.1642, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 5.781357765197754, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1451, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 8.008292198181152, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.1498, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 8.501078605651855, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.1614, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 5.542455673217773, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.1538, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 6.979973316192627, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.151, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 8.080177307128906, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.1462, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 7.01210355758667, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.1578, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 8.484158515930176, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.1468, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 4.0456132888793945, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.1509, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 10.766803741455078, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1495, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 9.22208023071289, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.1621, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 8.47338581085205, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.1585, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 7.198250770568848, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.1726, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 5.910817623138428, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.1451, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 5.221580982208252, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.1469, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 7.902259826660156, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.1522, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 7.130344390869141, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.1477, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 4.117496490478516, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.1308, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 4.9565935134887695, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.143, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 9.52632999420166, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.1488, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 7.335168838500977, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.1467, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 4.118008613586426, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.1602, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 5.684025764465332, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.1767, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 7.431414604187012, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.1622, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 11.669175148010254, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.1587, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 4.074918270111084, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.1389, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 4.829873561859131, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.1273, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 6.028909683227539, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.1522, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 6.762262344360352, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.1484, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 6.13216495513916, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.1423, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 8.262957572937012, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.1463, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 3.5338327884674072, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.1241, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 8.249900817871094, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.1419, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 6.7409539222717285, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.1308, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 5.887229919433594, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.1581, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 6.0119099617004395, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.1407, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 5.794888019561768, |
|
"learning_rate": 0.0, |
|
"loss": 0.1415, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.23594366014003754, |
|
"eval_runtime": 2284.612, |
|
"eval_samples_per_second": 3.397, |
|
"eval_steps_per_second": 0.425, |
|
"eval_wer": 0.4691739691294871, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 1.15419730857984e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|