{ "best_metric": 75.64947360448383, "best_model_checkpoint": "./whisper-small-finetune_gulf/checkpoint-4000", "epoch": 2.311604253351826, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 23.472209930419922, "learning_rate": 5.000000000000001e-07, "loss": 1.8733, "step": 25 }, { "epoch": 0.02, "grad_norm": 25.33260154724121, "learning_rate": 1.0000000000000002e-06, "loss": 1.6853, "step": 50 }, { "epoch": 0.03, "grad_norm": 28.451927185058594, "learning_rate": 1.5e-06, "loss": 1.6101, "step": 75 }, { "epoch": 0.05, "grad_norm": 29.835905075073242, "learning_rate": 2.0000000000000003e-06, "loss": 1.7625, "step": 100 }, { "epoch": 0.06, "grad_norm": 28.589879989624023, "learning_rate": 2.5e-06, "loss": 1.6508, "step": 125 }, { "epoch": 0.07, "grad_norm": 28.195240020751953, "learning_rate": 3e-06, "loss": 1.6032, "step": 150 }, { "epoch": 0.08, "grad_norm": 23.07231903076172, "learning_rate": 3.5e-06, "loss": 1.5002, "step": 175 }, { "epoch": 0.09, "grad_norm": 26.471607208251953, "learning_rate": 4.000000000000001e-06, "loss": 1.3828, "step": 200 }, { "epoch": 0.1, "grad_norm": 20.687870025634766, "learning_rate": 4.5e-06, "loss": 1.52, "step": 225 }, { "epoch": 0.12, "grad_norm": 20.147708892822266, "learning_rate": 5e-06, "loss": 1.5193, "step": 250 }, { "epoch": 0.13, "grad_norm": 21.233428955078125, "learning_rate": 5.500000000000001e-06, "loss": 1.5131, "step": 275 }, { "epoch": 0.14, "grad_norm": 21.05452537536621, "learning_rate": 6e-06, "loss": 1.4526, "step": 300 }, { "epoch": 0.15, "grad_norm": 24.340858459472656, "learning_rate": 6.5000000000000004e-06, "loss": 1.5262, "step": 325 }, { "epoch": 0.16, "grad_norm": 24.064132690429688, "learning_rate": 7e-06, "loss": 1.397, "step": 350 }, { "epoch": 0.17, "grad_norm": 21.471921920776367, "learning_rate": 7.500000000000001e-06, "loss": 1.5244, "step": 375 }, { "epoch": 0.18, "grad_norm": 22.753755569458008, "learning_rate": 8.000000000000001e-06, "loss": 1.3962, "step": 400 }, { "epoch": 0.2, "grad_norm": 18.438940048217773, "learning_rate": 8.5e-06, "loss": 1.3372, "step": 425 }, { "epoch": 0.21, "grad_norm": 19.2254581451416, "learning_rate": 9e-06, "loss": 1.426, "step": 450 }, { "epoch": 0.22, "grad_norm": 18.967300415039062, "learning_rate": 9.5e-06, "loss": 1.3301, "step": 475 }, { "epoch": 0.23, "grad_norm": 20.847196578979492, "learning_rate": 1e-05, "loss": 1.2433, "step": 500 }, { "epoch": 0.24, "grad_norm": 18.459182739257812, "learning_rate": 9.944444444444445e-06, "loss": 1.2761, "step": 525 }, { "epoch": 0.25, "grad_norm": 22.492895126342773, "learning_rate": 9.88888888888889e-06, "loss": 1.4599, "step": 550 }, { "epoch": 0.27, "grad_norm": 25.510766983032227, "learning_rate": 9.833333333333333e-06, "loss": 1.2511, "step": 575 }, { "epoch": 0.28, "grad_norm": 20.4246768951416, "learning_rate": 9.777777777777779e-06, "loss": 1.2121, "step": 600 }, { "epoch": 0.29, "grad_norm": 25.91058349609375, "learning_rate": 9.722222222222223e-06, "loss": 1.4637, "step": 625 }, { "epoch": 0.3, "grad_norm": 25.087705612182617, "learning_rate": 9.666666666666667e-06, "loss": 1.2607, "step": 650 }, { "epoch": 0.31, "grad_norm": 20.08827781677246, "learning_rate": 9.611111111111112e-06, "loss": 1.4525, "step": 675 }, { "epoch": 0.32, "grad_norm": 16.310239791870117, "learning_rate": 9.555555555555556e-06, "loss": 1.2905, "step": 700 }, { "epoch": 0.34, "grad_norm": 18.671165466308594, "learning_rate": 9.5e-06, "loss": 1.3568, "step": 725 }, { "epoch": 0.35, "grad_norm": 25.499460220336914, "learning_rate": 9.444444444444445e-06, "loss": 1.2893, "step": 750 }, { "epoch": 0.36, "grad_norm": 20.31481170654297, "learning_rate": 9.38888888888889e-06, "loss": 1.268, "step": 775 }, { "epoch": 0.37, "grad_norm": 22.260953903198242, "learning_rate": 9.333333333333334e-06, "loss": 1.2924, "step": 800 }, { "epoch": 0.38, "grad_norm": 19.559682846069336, "learning_rate": 9.277777777777778e-06, "loss": 1.3534, "step": 825 }, { "epoch": 0.39, "grad_norm": 21.260534286499023, "learning_rate": 9.222222222222224e-06, "loss": 1.265, "step": 850 }, { "epoch": 0.4, "grad_norm": 22.76539421081543, "learning_rate": 9.166666666666666e-06, "loss": 1.302, "step": 875 }, { "epoch": 0.42, "grad_norm": 23.064783096313477, "learning_rate": 9.111111111111112e-06, "loss": 1.25, "step": 900 }, { "epoch": 0.43, "grad_norm": 19.941423416137695, "learning_rate": 9.055555555555556e-06, "loss": 1.2915, "step": 925 }, { "epoch": 0.44, "grad_norm": 17.240646362304688, "learning_rate": 9e-06, "loss": 1.2868, "step": 950 }, { "epoch": 0.45, "grad_norm": 18.2373046875, "learning_rate": 8.944444444444446e-06, "loss": 1.2259, "step": 975 }, { "epoch": 0.46, "grad_norm": 22.34012794494629, "learning_rate": 8.888888888888888e-06, "loss": 1.3947, "step": 1000 }, { "epoch": 0.46, "eval_cer": 65.38537817965462, "eval_loss": 1.2471961975097656, "eval_runtime": 1151.935, "eval_samples_per_second": 3.755, "eval_steps_per_second": 0.47, "eval_wer": 84.87843671892752, "step": 1000 }, { "epoch": 0.47, "grad_norm": 18.88918685913086, "learning_rate": 8.833333333333334e-06, "loss": 1.2824, "step": 1025 }, { "epoch": 0.49, "grad_norm": 20.2314395904541, "learning_rate": 8.777777777777778e-06, "loss": 1.3135, "step": 1050 }, { "epoch": 0.5, "grad_norm": 21.175655364990234, "learning_rate": 8.722222222222224e-06, "loss": 1.2259, "step": 1075 }, { "epoch": 0.51, "grad_norm": 16.999879837036133, "learning_rate": 8.666666666666668e-06, "loss": 1.2408, "step": 1100 }, { "epoch": 0.52, "grad_norm": 19.178285598754883, "learning_rate": 8.611111111111112e-06, "loss": 1.2721, "step": 1125 }, { "epoch": 0.53, "grad_norm": 20.64499855041504, "learning_rate": 8.555555555555556e-06, "loss": 1.3392, "step": 1150 }, { "epoch": 0.54, "grad_norm": 19.396726608276367, "learning_rate": 8.5e-06, "loss": 1.2863, "step": 1175 }, { "epoch": 0.55, "grad_norm": 19.87407875061035, "learning_rate": 8.444444444444446e-06, "loss": 1.3248, "step": 1200 }, { "epoch": 0.57, "grad_norm": 16.245336532592773, "learning_rate": 8.38888888888889e-06, "loss": 1.3406, "step": 1225 }, { "epoch": 0.58, "grad_norm": 17.97079086303711, "learning_rate": 8.333333333333334e-06, "loss": 1.211, "step": 1250 }, { "epoch": 0.59, "grad_norm": 25.904541015625, "learning_rate": 8.277777777777778e-06, "loss": 1.334, "step": 1275 }, { "epoch": 0.6, "grad_norm": 21.328983306884766, "learning_rate": 8.222222222222222e-06, "loss": 1.2982, "step": 1300 }, { "epoch": 0.61, "grad_norm": 19.949731826782227, "learning_rate": 8.166666666666668e-06, "loss": 1.2365, "step": 1325 }, { "epoch": 0.62, "grad_norm": 15.301751136779785, "learning_rate": 8.111111111111112e-06, "loss": 1.2033, "step": 1350 }, { "epoch": 0.64, "grad_norm": 22.96265411376953, "learning_rate": 8.055555555555557e-06, "loss": 1.2319, "step": 1375 }, { "epoch": 0.65, "grad_norm": 17.26740074157715, "learning_rate": 8.000000000000001e-06, "loss": 1.1544, "step": 1400 }, { "epoch": 0.66, "grad_norm": 27.209585189819336, "learning_rate": 7.944444444444445e-06, "loss": 1.3496, "step": 1425 }, { "epoch": 0.67, "grad_norm": 20.707279205322266, "learning_rate": 7.88888888888889e-06, "loss": 1.2727, "step": 1450 }, { "epoch": 0.68, "grad_norm": 20.74648666381836, "learning_rate": 7.833333333333333e-06, "loss": 1.2371, "step": 1475 }, { "epoch": 0.69, "grad_norm": 20.16001319885254, "learning_rate": 7.77777777777778e-06, "loss": 1.2456, "step": 1500 }, { "epoch": 0.71, "grad_norm": 20.684553146362305, "learning_rate": 7.722222222222223e-06, "loss": 1.1862, "step": 1525 }, { "epoch": 0.72, "grad_norm": 17.660783767700195, "learning_rate": 7.666666666666667e-06, "loss": 1.1802, "step": 1550 }, { "epoch": 0.73, "grad_norm": 27.349889755249023, "learning_rate": 7.611111111111111e-06, "loss": 1.1869, "step": 1575 }, { "epoch": 0.74, "grad_norm": 20.43325424194336, "learning_rate": 7.555555555555556e-06, "loss": 1.2752, "step": 1600 }, { "epoch": 0.75, "grad_norm": 18.21103286743164, "learning_rate": 7.500000000000001e-06, "loss": 1.083, "step": 1625 }, { "epoch": 0.76, "grad_norm": 17.000732421875, "learning_rate": 7.444444444444445e-06, "loss": 1.1785, "step": 1650 }, { "epoch": 0.77, "grad_norm": 16.118410110473633, "learning_rate": 7.38888888888889e-06, "loss": 1.2115, "step": 1675 }, { "epoch": 0.79, "grad_norm": 16.376646041870117, "learning_rate": 7.333333333333333e-06, "loss": 1.1399, "step": 1700 }, { "epoch": 0.8, "grad_norm": 23.872587203979492, "learning_rate": 7.277777777777778e-06, "loss": 1.2943, "step": 1725 }, { "epoch": 0.81, "grad_norm": 17.694063186645508, "learning_rate": 7.222222222222223e-06, "loss": 1.1768, "step": 1750 }, { "epoch": 0.82, "grad_norm": 15.69645881652832, "learning_rate": 7.166666666666667e-06, "loss": 1.1779, "step": 1775 }, { "epoch": 0.83, "grad_norm": 17.946985244750977, "learning_rate": 7.111111111111112e-06, "loss": 1.1575, "step": 1800 }, { "epoch": 0.84, "grad_norm": 24.68529510498047, "learning_rate": 7.055555555555557e-06, "loss": 1.2993, "step": 1825 }, { "epoch": 0.86, "grad_norm": 20.466711044311523, "learning_rate": 7e-06, "loss": 1.1476, "step": 1850 }, { "epoch": 0.87, "grad_norm": 20.040111541748047, "learning_rate": 6.944444444444445e-06, "loss": 1.1855, "step": 1875 }, { "epoch": 0.88, "grad_norm": 20.88412857055664, "learning_rate": 6.88888888888889e-06, "loss": 1.2041, "step": 1900 }, { "epoch": 0.89, "grad_norm": 16.89543342590332, "learning_rate": 6.833333333333334e-06, "loss": 1.2062, "step": 1925 }, { "epoch": 0.9, "grad_norm": 20.681123733520508, "learning_rate": 6.777777777777779e-06, "loss": 1.0671, "step": 1950 }, { "epoch": 0.91, "grad_norm": 20.805986404418945, "learning_rate": 6.7222222222222235e-06, "loss": 1.197, "step": 1975 }, { "epoch": 0.92, "grad_norm": 16.39628791809082, "learning_rate": 6.666666666666667e-06, "loss": 1.333, "step": 2000 }, { "epoch": 0.92, "eval_cer": 72.85421517713513, "eval_loss": 1.1428595781326294, "eval_runtime": 1198.4252, "eval_samples_per_second": 3.61, "eval_steps_per_second": 0.451, "eval_wer": 92.31235325304856, "step": 2000 }, { "epoch": 0.94, "grad_norm": 18.98177719116211, "learning_rate": 6.6111111111111115e-06, "loss": 1.1586, "step": 2025 }, { "epoch": 0.95, "grad_norm": 26.32819366455078, "learning_rate": 6.555555555555556e-06, "loss": 1.2463, "step": 2050 }, { "epoch": 0.96, "grad_norm": 17.621980667114258, "learning_rate": 6.5000000000000004e-06, "loss": 1.1764, "step": 2075 }, { "epoch": 0.97, "grad_norm": 19.834068298339844, "learning_rate": 6.444444444444445e-06, "loss": 1.2383, "step": 2100 }, { "epoch": 0.98, "grad_norm": 18.443336486816406, "learning_rate": 6.3888888888888885e-06, "loss": 1.1966, "step": 2125 }, { "epoch": 0.99, "grad_norm": 23.796865463256836, "learning_rate": 6.333333333333333e-06, "loss": 1.1506, "step": 2150 }, { "epoch": 1.01, "grad_norm": 13.802796363830566, "learning_rate": 6.277777777777778e-06, "loss": 1.004, "step": 2175 }, { "epoch": 1.02, "grad_norm": 11.86549186706543, "learning_rate": 6.222222222222223e-06, "loss": 0.8599, "step": 2200 }, { "epoch": 1.03, "grad_norm": 20.885765075683594, "learning_rate": 6.166666666666667e-06, "loss": 0.9144, "step": 2225 }, { "epoch": 1.04, "grad_norm": 15.317635536193848, "learning_rate": 6.111111111111112e-06, "loss": 0.863, "step": 2250 }, { "epoch": 1.05, "grad_norm": 16.995641708374023, "learning_rate": 6.055555555555555e-06, "loss": 0.8261, "step": 2275 }, { "epoch": 1.06, "grad_norm": 16.974023818969727, "learning_rate": 6e-06, "loss": 0.7936, "step": 2300 }, { "epoch": 1.07, "grad_norm": 15.467360496520996, "learning_rate": 5.944444444444445e-06, "loss": 0.8796, "step": 2325 }, { "epoch": 1.09, "grad_norm": 14.19994068145752, "learning_rate": 5.88888888888889e-06, "loss": 0.9499, "step": 2350 }, { "epoch": 1.1, "grad_norm": 18.083833694458008, "learning_rate": 5.833333333333334e-06, "loss": 0.8659, "step": 2375 }, { "epoch": 1.11, "grad_norm": 13.523449897766113, "learning_rate": 5.777777777777778e-06, "loss": 0.7468, "step": 2400 }, { "epoch": 1.12, "grad_norm": 22.28549575805664, "learning_rate": 5.722222222222222e-06, "loss": 0.9936, "step": 2425 }, { "epoch": 1.13, "grad_norm": 12.301285743713379, "learning_rate": 5.666666666666667e-06, "loss": 0.9272, "step": 2450 }, { "epoch": 1.14, "grad_norm": 15.811965942382812, "learning_rate": 5.611111111111112e-06, "loss": 0.8392, "step": 2475 }, { "epoch": 1.16, "grad_norm": 10.93188762664795, "learning_rate": 5.555555555555557e-06, "loss": 0.7856, "step": 2500 }, { "epoch": 1.17, "grad_norm": 15.368990898132324, "learning_rate": 5.500000000000001e-06, "loss": 0.991, "step": 2525 }, { "epoch": 1.18, "grad_norm": 23.429412841796875, "learning_rate": 5.444444444444445e-06, "loss": 0.956, "step": 2550 }, { "epoch": 1.19, "grad_norm": 15.563962936401367, "learning_rate": 5.388888888888889e-06, "loss": 0.9221, "step": 2575 }, { "epoch": 1.2, "grad_norm": 15.646391868591309, "learning_rate": 5.333333333333334e-06, "loss": 0.8138, "step": 2600 }, { "epoch": 1.21, "grad_norm": 13.61239242553711, "learning_rate": 5.2777777777777785e-06, "loss": 0.9199, "step": 2625 }, { "epoch": 1.23, "grad_norm": 16.09264373779297, "learning_rate": 5.2222222222222226e-06, "loss": 0.9146, "step": 2650 }, { "epoch": 1.24, "grad_norm": 18.509414672851562, "learning_rate": 5.1666666666666675e-06, "loss": 0.8323, "step": 2675 }, { "epoch": 1.25, "grad_norm": 12.82259750366211, "learning_rate": 5.1111111111111115e-06, "loss": 0.8451, "step": 2700 }, { "epoch": 1.26, "grad_norm": 13.008484840393066, "learning_rate": 5.0555555555555555e-06, "loss": 0.8415, "step": 2725 }, { "epoch": 1.27, "grad_norm": 15.046307563781738, "learning_rate": 5e-06, "loss": 0.8555, "step": 2750 }, { "epoch": 1.28, "grad_norm": 16.91812515258789, "learning_rate": 4.944444444444445e-06, "loss": 1.015, "step": 2775 }, { "epoch": 1.29, "grad_norm": 16.76761245727539, "learning_rate": 4.888888888888889e-06, "loss": 0.8488, "step": 2800 }, { "epoch": 1.31, "grad_norm": 20.7960262298584, "learning_rate": 4.833333333333333e-06, "loss": 0.9098, "step": 2825 }, { "epoch": 1.32, "grad_norm": 15.695616722106934, "learning_rate": 4.777777777777778e-06, "loss": 0.8866, "step": 2850 }, { "epoch": 1.33, "grad_norm": 18.41292953491211, "learning_rate": 4.722222222222222e-06, "loss": 0.8872, "step": 2875 }, { "epoch": 1.34, "grad_norm": 22.192108154296875, "learning_rate": 4.666666666666667e-06, "loss": 0.8102, "step": 2900 }, { "epoch": 1.35, "grad_norm": 21.19410514831543, "learning_rate": 4.611111111111112e-06, "loss": 0.9069, "step": 2925 }, { "epoch": 1.36, "grad_norm": 18.010290145874023, "learning_rate": 4.555555555555556e-06, "loss": 0.8646, "step": 2950 }, { "epoch": 1.38, "grad_norm": 18.863052368164062, "learning_rate": 4.5e-06, "loss": 0.9674, "step": 2975 }, { "epoch": 1.39, "grad_norm": 13.40828800201416, "learning_rate": 4.444444444444444e-06, "loss": 0.918, "step": 3000 }, { "epoch": 1.39, "eval_cer": 64.30927214097356, "eval_loss": 1.1143155097961426, "eval_runtime": 1160.055, "eval_samples_per_second": 3.729, "eval_steps_per_second": 0.466, "eval_wer": 82.85238203438612, "step": 3000 }, { "epoch": 1.4, "grad_norm": 16.100902557373047, "learning_rate": 4.388888888888889e-06, "loss": 0.8247, "step": 3025 }, { "epoch": 1.41, "grad_norm": 14.138301849365234, "learning_rate": 4.333333333333334e-06, "loss": 0.8077, "step": 3050 }, { "epoch": 1.42, "grad_norm": 21.747058868408203, "learning_rate": 4.277777777777778e-06, "loss": 0.9274, "step": 3075 }, { "epoch": 1.43, "grad_norm": 12.049752235412598, "learning_rate": 4.222222222222223e-06, "loss": 0.9581, "step": 3100 }, { "epoch": 1.44, "grad_norm": 13.890820503234863, "learning_rate": 4.166666666666667e-06, "loss": 0.8945, "step": 3125 }, { "epoch": 1.46, "grad_norm": 16.04409408569336, "learning_rate": 4.111111111111111e-06, "loss": 0.8455, "step": 3150 }, { "epoch": 1.47, "grad_norm": 14.720856666564941, "learning_rate": 4.055555555555556e-06, "loss": 0.8707, "step": 3175 }, { "epoch": 1.48, "grad_norm": 23.8120174407959, "learning_rate": 4.000000000000001e-06, "loss": 0.9138, "step": 3200 }, { "epoch": 1.49, "grad_norm": 19.527795791625977, "learning_rate": 3.944444444444445e-06, "loss": 0.8293, "step": 3225 }, { "epoch": 1.5, "grad_norm": 16.076868057250977, "learning_rate": 3.88888888888889e-06, "loss": 0.8586, "step": 3250 }, { "epoch": 1.51, "grad_norm": 18.909570693969727, "learning_rate": 3.833333333333334e-06, "loss": 0.9239, "step": 3275 }, { "epoch": 1.53, "grad_norm": 20.29988670349121, "learning_rate": 3.777777777777778e-06, "loss": 0.8682, "step": 3300 }, { "epoch": 1.54, "grad_norm": 15.01752758026123, "learning_rate": 3.7222222222222225e-06, "loss": 0.8631, "step": 3325 }, { "epoch": 1.55, "grad_norm": 14.974254608154297, "learning_rate": 3.6666666666666666e-06, "loss": 0.8882, "step": 3350 }, { "epoch": 1.56, "grad_norm": 15.673378944396973, "learning_rate": 3.6111111111111115e-06, "loss": 0.958, "step": 3375 }, { "epoch": 1.57, "grad_norm": 20.833036422729492, "learning_rate": 3.555555555555556e-06, "loss": 0.8278, "step": 3400 }, { "epoch": 1.58, "grad_norm": 15.56125545501709, "learning_rate": 3.5e-06, "loss": 0.8853, "step": 3425 }, { "epoch": 1.6, "grad_norm": 13.465311050415039, "learning_rate": 3.444444444444445e-06, "loss": 0.8877, "step": 3450 }, { "epoch": 1.61, "grad_norm": 19.25458335876465, "learning_rate": 3.3888888888888893e-06, "loss": 0.8935, "step": 3475 }, { "epoch": 1.62, "grad_norm": 17.35223960876465, "learning_rate": 3.3333333333333333e-06, "loss": 0.8421, "step": 3500 }, { "epoch": 1.63, "grad_norm": 17.846412658691406, "learning_rate": 3.277777777777778e-06, "loss": 0.791, "step": 3525 }, { "epoch": 1.64, "grad_norm": 12.937151908874512, "learning_rate": 3.2222222222222227e-06, "loss": 0.811, "step": 3550 }, { "epoch": 1.65, "grad_norm": 19.468202590942383, "learning_rate": 3.1666666666666667e-06, "loss": 0.8527, "step": 3575 }, { "epoch": 1.66, "grad_norm": 14.417373657226562, "learning_rate": 3.1111111111111116e-06, "loss": 0.8507, "step": 3600 }, { "epoch": 1.68, "grad_norm": 19.41051483154297, "learning_rate": 3.055555555555556e-06, "loss": 0.8945, "step": 3625 }, { "epoch": 1.69, "grad_norm": 17.270503997802734, "learning_rate": 3e-06, "loss": 0.8866, "step": 3650 }, { "epoch": 1.7, "grad_norm": 15.31128215789795, "learning_rate": 2.944444444444445e-06, "loss": 0.8862, "step": 3675 }, { "epoch": 1.71, "grad_norm": 16.625324249267578, "learning_rate": 2.888888888888889e-06, "loss": 0.8744, "step": 3700 }, { "epoch": 1.72, "grad_norm": 23.434606552124023, "learning_rate": 2.8333333333333335e-06, "loss": 0.927, "step": 3725 }, { "epoch": 1.73, "grad_norm": 14.484862327575684, "learning_rate": 2.7777777777777783e-06, "loss": 0.8662, "step": 3750 }, { "epoch": 1.75, "grad_norm": 15.611286163330078, "learning_rate": 2.7222222222222224e-06, "loss": 0.8296, "step": 3775 }, { "epoch": 1.76, "grad_norm": 14.025354385375977, "learning_rate": 2.666666666666667e-06, "loss": 0.8567, "step": 3800 }, { "epoch": 1.77, "grad_norm": 17.058929443359375, "learning_rate": 2.6111111111111113e-06, "loss": 0.8872, "step": 3825 }, { "epoch": 1.78, "grad_norm": 18.206918716430664, "learning_rate": 2.5555555555555557e-06, "loss": 0.9316, "step": 3850 }, { "epoch": 1.79, "grad_norm": 14.466046333312988, "learning_rate": 2.5e-06, "loss": 0.8486, "step": 3875 }, { "epoch": 1.8, "grad_norm": 16.97430992126465, "learning_rate": 2.4444444444444447e-06, "loss": 0.8618, "step": 3900 }, { "epoch": 1.81, "grad_norm": 11.907506942749023, "learning_rate": 2.388888888888889e-06, "loss": 0.7407, "step": 3925 }, { "epoch": 1.83, "grad_norm": 17.779897689819336, "learning_rate": 2.3333333333333336e-06, "loss": 0.762, "step": 3950 }, { "epoch": 1.84, "grad_norm": 14.020506858825684, "learning_rate": 2.277777777777778e-06, "loss": 0.8425, "step": 3975 }, { "epoch": 1.85, "grad_norm": 15.239115715026855, "learning_rate": 2.222222222222222e-06, "loss": 0.8402, "step": 4000 }, { "epoch": 1.85, "eval_cer": 53.700996775354604, "eval_loss": 1.0890547037124634, "eval_runtime": 1101.5884, "eval_samples_per_second": 3.927, "eval_steps_per_second": 0.491, "eval_wer": 75.64947360448383, "step": 4000 }, { "epoch": 1.86, "grad_norm": 16.369224548339844, "learning_rate": 2.166666666666667e-06, "loss": 0.9486, "step": 4025 }, { "epoch": 1.87, "grad_norm": 13.583636283874512, "learning_rate": 2.1111111111111114e-06, "loss": 0.8256, "step": 4050 }, { "epoch": 1.88, "grad_norm": 11.406463623046875, "learning_rate": 2.0555555555555555e-06, "loss": 0.7751, "step": 4075 }, { "epoch": 1.9, "grad_norm": 17.16242790222168, "learning_rate": 2.0000000000000003e-06, "loss": 0.777, "step": 4100 }, { "epoch": 1.91, "grad_norm": 10.725821495056152, "learning_rate": 1.944444444444445e-06, "loss": 0.7842, "step": 4125 }, { "epoch": 1.92, "grad_norm": 18.373470306396484, "learning_rate": 1.888888888888889e-06, "loss": 0.7934, "step": 4150 }, { "epoch": 1.93, "grad_norm": 19.23195457458496, "learning_rate": 1.8333333333333333e-06, "loss": 0.9092, "step": 4175 }, { "epoch": 1.94, "grad_norm": 16.823266983032227, "learning_rate": 1.777777777777778e-06, "loss": 0.8693, "step": 4200 }, { "epoch": 1.95, "grad_norm": 17.290674209594727, "learning_rate": 1.7222222222222224e-06, "loss": 0.8973, "step": 4225 }, { "epoch": 1.96, "grad_norm": 17.71505355834961, "learning_rate": 1.6666666666666667e-06, "loss": 0.8385, "step": 4250 }, { "epoch": 1.98, "grad_norm": 16.774585723876953, "learning_rate": 1.6111111111111113e-06, "loss": 0.7899, "step": 4275 }, { "epoch": 1.99, "grad_norm": 19.144384384155273, "learning_rate": 1.5555555555555558e-06, "loss": 0.9822, "step": 4300 }, { "epoch": 2.0, "grad_norm": 16.863649368286133, "learning_rate": 1.5e-06, "loss": 0.7994, "step": 4325 }, { "epoch": 2.01, "grad_norm": 13.130481719970703, "learning_rate": 1.4444444444444445e-06, "loss": 0.5804, "step": 4350 }, { "epoch": 2.02, "grad_norm": 19.954849243164062, "learning_rate": 1.3888888888888892e-06, "loss": 0.7005, "step": 4375 }, { "epoch": 2.03, "grad_norm": 16.30689811706543, "learning_rate": 1.3333333333333334e-06, "loss": 0.6735, "step": 4400 }, { "epoch": 2.05, "grad_norm": 12.148699760437012, "learning_rate": 1.2777777777777779e-06, "loss": 0.6317, "step": 4425 }, { "epoch": 2.06, "grad_norm": 9.524985313415527, "learning_rate": 1.2222222222222223e-06, "loss": 0.5642, "step": 4450 }, { "epoch": 2.07, "grad_norm": 17.790037155151367, "learning_rate": 1.1666666666666668e-06, "loss": 0.5801, "step": 4475 }, { "epoch": 2.08, "grad_norm": 11.228938102722168, "learning_rate": 1.111111111111111e-06, "loss": 0.7253, "step": 4500 }, { "epoch": 2.09, "grad_norm": 15.2631254196167, "learning_rate": 1.0555555555555557e-06, "loss": 0.6465, "step": 4525 }, { "epoch": 2.1, "grad_norm": 20.890735626220703, "learning_rate": 1.0000000000000002e-06, "loss": 0.6849, "step": 4550 }, { "epoch": 2.12, "grad_norm": 15.065770149230957, "learning_rate": 9.444444444444445e-07, "loss": 0.5892, "step": 4575 }, { "epoch": 2.13, "grad_norm": 10.663036346435547, "learning_rate": 8.88888888888889e-07, "loss": 0.5681, "step": 4600 }, { "epoch": 2.14, "grad_norm": 17.5535831451416, "learning_rate": 8.333333333333333e-07, "loss": 0.6496, "step": 4625 }, { "epoch": 2.15, "grad_norm": 14.98912239074707, "learning_rate": 7.777777777777779e-07, "loss": 0.6773, "step": 4650 }, { "epoch": 2.16, "grad_norm": 14.315783500671387, "learning_rate": 7.222222222222222e-07, "loss": 0.6852, "step": 4675 }, { "epoch": 2.17, "grad_norm": 17.369815826416016, "learning_rate": 6.666666666666667e-07, "loss": 0.6886, "step": 4700 }, { "epoch": 2.18, "grad_norm": 9.153715133666992, "learning_rate": 6.111111111111112e-07, "loss": 0.6393, "step": 4725 }, { "epoch": 2.2, "grad_norm": 7.958740234375, "learning_rate": 5.555555555555555e-07, "loss": 0.6516, "step": 4750 }, { "epoch": 2.21, "grad_norm": 12.058722496032715, "learning_rate": 5.000000000000001e-07, "loss": 0.6594, "step": 4775 }, { "epoch": 2.22, "grad_norm": 10.650951385498047, "learning_rate": 4.444444444444445e-07, "loss": 0.6383, "step": 4800 }, { "epoch": 2.23, "grad_norm": 9.111865043640137, "learning_rate": 3.8888888888888895e-07, "loss": 0.5531, "step": 4825 }, { "epoch": 2.24, "grad_norm": 15.841039657592773, "learning_rate": 3.3333333333333335e-07, "loss": 0.6738, "step": 4850 }, { "epoch": 2.25, "grad_norm": 16.60221290588379, "learning_rate": 2.7777777777777776e-07, "loss": 0.643, "step": 4875 }, { "epoch": 2.27, "grad_norm": 17.007307052612305, "learning_rate": 2.2222222222222224e-07, "loss": 0.6843, "step": 4900 }, { "epoch": 2.28, "grad_norm": 13.0649995803833, "learning_rate": 1.6666666666666668e-07, "loss": 0.6416, "step": 4925 }, { "epoch": 2.29, "grad_norm": 15.913739204406738, "learning_rate": 1.1111111111111112e-07, "loss": 0.6124, "step": 4950 }, { "epoch": 2.3, "grad_norm": 17.32563018798828, "learning_rate": 5.555555555555556e-08, "loss": 0.6627, "step": 4975 }, { "epoch": 2.31, "grad_norm": 16.87995147705078, "learning_rate": 0.0, "loss": 0.6894, "step": 5000 }, { "epoch": 2.31, "eval_cer": 57.09752532338271, "eval_loss": 1.100616455078125, "eval_runtime": 1127.1011, "eval_samples_per_second": 3.838, "eval_steps_per_second": 0.48, "eval_wer": 79.2130576384155, "step": 5000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 1.154283884199936e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }