{ "best_metric": 68.70090634441087, "best_model_checkpoint": "./whisper-small-dialect_egyptian/checkpoint-5000", "epoch": 2.8200789622109417, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 122.04756164550781, "learning_rate": 5.000000000000001e-07, "loss": 4.3768, "step": 25 }, { "epoch": 0.03, "grad_norm": 40.10433578491211, "learning_rate": 1.0000000000000002e-06, "loss": 3.742, "step": 50 }, { "epoch": 0.04, "grad_norm": 35.64628982543945, "learning_rate": 1.5e-06, "loss": 3.1278, "step": 75 }, { "epoch": 0.06, "grad_norm": 36.580284118652344, "learning_rate": 2.0000000000000003e-06, "loss": 2.4904, "step": 100 }, { "epoch": 0.07, "grad_norm": 23.064411163330078, "learning_rate": 2.5e-06, "loss": 1.9741, "step": 125 }, { "epoch": 0.08, "grad_norm": 25.443105697631836, "learning_rate": 3e-06, "loss": 1.993, "step": 150 }, { "epoch": 0.1, "grad_norm": 25.980615615844727, "learning_rate": 3.5e-06, "loss": 1.8217, "step": 175 }, { "epoch": 0.11, "grad_norm": 20.749223709106445, "learning_rate": 4.000000000000001e-06, "loss": 1.7756, "step": 200 }, { "epoch": 0.13, "grad_norm": 26.173999786376953, "learning_rate": 4.5e-06, "loss": 1.8824, "step": 225 }, { "epoch": 0.14, "grad_norm": 26.512161254882812, "learning_rate": 5e-06, "loss": 1.7371, "step": 250 }, { "epoch": 0.16, "grad_norm": 23.805395126342773, "learning_rate": 5.500000000000001e-06, "loss": 1.5593, "step": 275 }, { "epoch": 0.17, "grad_norm": 33.373779296875, "learning_rate": 6e-06, "loss": 1.4868, "step": 300 }, { "epoch": 0.18, "grad_norm": 27.45977783203125, "learning_rate": 6.5000000000000004e-06, "loss": 1.5585, "step": 325 }, { "epoch": 0.2, "grad_norm": 24.94574737548828, "learning_rate": 7e-06, "loss": 1.3193, "step": 350 }, { "epoch": 0.21, "grad_norm": 18.338777542114258, "learning_rate": 7.500000000000001e-06, "loss": 1.1767, "step": 375 }, { "epoch": 0.23, "grad_norm": 26.012649536132812, "learning_rate": 8.000000000000001e-06, "loss": 1.2317, "step": 400 }, { "epoch": 0.24, "grad_norm": 23.6628360748291, "learning_rate": 8.5e-06, "loss": 1.1083, "step": 425 }, { "epoch": 0.25, "grad_norm": 24.645048141479492, "learning_rate": 9e-06, "loss": 1.1444, "step": 450 }, { "epoch": 0.27, "grad_norm": 15.89114761352539, "learning_rate": 9.5e-06, "loss": 1.1598, "step": 475 }, { "epoch": 0.28, "grad_norm": 20.403839111328125, "learning_rate": 1e-05, "loss": 1.28, "step": 500 }, { "epoch": 0.3, "grad_norm": 23.043336868286133, "learning_rate": 9.944444444444445e-06, "loss": 1.2277, "step": 525 }, { "epoch": 0.31, "grad_norm": 25.324621200561523, "learning_rate": 9.88888888888889e-06, "loss": 1.2154, "step": 550 }, { "epoch": 0.32, "grad_norm": 21.331308364868164, "learning_rate": 9.833333333333333e-06, "loss": 1.2405, "step": 575 }, { "epoch": 0.34, "grad_norm": 17.898706436157227, "learning_rate": 9.777777777777779e-06, "loss": 1.1903, "step": 600 }, { "epoch": 0.35, "grad_norm": 18.209653854370117, "learning_rate": 9.722222222222223e-06, "loss": 1.2725, "step": 625 }, { "epoch": 0.37, "grad_norm": 24.40468406677246, "learning_rate": 9.666666666666667e-06, "loss": 1.226, "step": 650 }, { "epoch": 0.38, "grad_norm": 22.115428924560547, "learning_rate": 9.611111111111112e-06, "loss": 1.1932, "step": 675 }, { "epoch": 0.39, "grad_norm": 18.02147674560547, "learning_rate": 9.555555555555556e-06, "loss": 1.1863, "step": 700 }, { "epoch": 0.41, "grad_norm": 26.181509017944336, "learning_rate": 9.5e-06, "loss": 1.2406, "step": 725 }, { "epoch": 0.42, "grad_norm": 21.838193893432617, "learning_rate": 9.444444444444445e-06, "loss": 1.1168, "step": 750 }, { "epoch": 0.44, "grad_norm": 21.846904754638672, "learning_rate": 9.38888888888889e-06, "loss": 1.1986, "step": 775 }, { "epoch": 0.45, "grad_norm": 20.488187789916992, "learning_rate": 9.333333333333334e-06, "loss": 1.1278, "step": 800 }, { "epoch": 0.47, "grad_norm": 20.44611167907715, "learning_rate": 9.277777777777778e-06, "loss": 1.2049, "step": 825 }, { "epoch": 0.48, "grad_norm": 22.499937057495117, "learning_rate": 9.222222222222224e-06, "loss": 1.1704, "step": 850 }, { "epoch": 0.49, "grad_norm": 26.488370895385742, "learning_rate": 9.166666666666666e-06, "loss": 1.2335, "step": 875 }, { "epoch": 0.51, "grad_norm": 21.434371948242188, "learning_rate": 9.111111111111112e-06, "loss": 1.0156, "step": 900 }, { "epoch": 0.52, "grad_norm": 34.55289840698242, "learning_rate": 9.055555555555556e-06, "loss": 1.3202, "step": 925 }, { "epoch": 0.54, "grad_norm": 20.672531127929688, "learning_rate": 9e-06, "loss": 1.1801, "step": 950 }, { "epoch": 0.55, "grad_norm": 23.088294982910156, "learning_rate": 8.944444444444446e-06, "loss": 1.1447, "step": 975 }, { "epoch": 0.56, "grad_norm": 20.51502799987793, "learning_rate": 8.888888888888888e-06, "loss": 1.1284, "step": 1000 }, { "epoch": 0.56, "eval_cer": 44.417841554934526, "eval_loss": 1.1110167503356934, "eval_runtime": 911.3479, "eval_samples_per_second": 3.891, "eval_steps_per_second": 0.487, "eval_wer": 76.0281973816717, "step": 1000 }, { "epoch": 0.58, "grad_norm": 17.626625061035156, "learning_rate": 8.833333333333334e-06, "loss": 1.0659, "step": 1025 }, { "epoch": 0.59, "grad_norm": 20.585145950317383, "learning_rate": 8.777777777777778e-06, "loss": 1.1016, "step": 1050 }, { "epoch": 0.61, "grad_norm": 21.167909622192383, "learning_rate": 8.722222222222224e-06, "loss": 1.205, "step": 1075 }, { "epoch": 0.62, "grad_norm": 23.734207153320312, "learning_rate": 8.666666666666668e-06, "loss": 1.1123, "step": 1100 }, { "epoch": 0.63, "grad_norm": 26.98702621459961, "learning_rate": 8.611111111111112e-06, "loss": 1.1399, "step": 1125 }, { "epoch": 0.65, "grad_norm": 22.44954490661621, "learning_rate": 8.555555555555556e-06, "loss": 1.1662, "step": 1150 }, { "epoch": 0.66, "grad_norm": 24.248973846435547, "learning_rate": 8.5e-06, "loss": 1.1006, "step": 1175 }, { "epoch": 0.68, "grad_norm": 21.817211151123047, "learning_rate": 8.444444444444446e-06, "loss": 1.2818, "step": 1200 }, { "epoch": 0.69, "grad_norm": 19.826051712036133, "learning_rate": 8.38888888888889e-06, "loss": 1.1063, "step": 1225 }, { "epoch": 0.71, "grad_norm": 22.592252731323242, "learning_rate": 8.333333333333334e-06, "loss": 1.0954, "step": 1250 }, { "epoch": 0.72, "grad_norm": 19.33829689025879, "learning_rate": 8.277777777777778e-06, "loss": 1.0621, "step": 1275 }, { "epoch": 0.73, "grad_norm": 18.287986755371094, "learning_rate": 8.222222222222222e-06, "loss": 1.0543, "step": 1300 }, { "epoch": 0.75, "grad_norm": 18.611087799072266, "learning_rate": 8.166666666666668e-06, "loss": 0.9963, "step": 1325 }, { "epoch": 0.76, "grad_norm": 10.573747634887695, "learning_rate": 8.111111111111112e-06, "loss": 1.1583, "step": 1350 }, { "epoch": 0.78, "grad_norm": 20.298009872436523, "learning_rate": 8.055555555555557e-06, "loss": 1.1522, "step": 1375 }, { "epoch": 0.79, "grad_norm": 15.819123268127441, "learning_rate": 8.000000000000001e-06, "loss": 1.051, "step": 1400 }, { "epoch": 0.8, "grad_norm": 19.158708572387695, "learning_rate": 7.944444444444445e-06, "loss": 1.159, "step": 1425 }, { "epoch": 0.82, "grad_norm": 20.468978881835938, "learning_rate": 7.88888888888889e-06, "loss": 1.1176, "step": 1450 }, { "epoch": 0.83, "grad_norm": 18.97748565673828, "learning_rate": 7.833333333333333e-06, "loss": 1.1303, "step": 1475 }, { "epoch": 0.85, "grad_norm": 20.76470947265625, "learning_rate": 7.77777777777778e-06, "loss": 1.0628, "step": 1500 }, { "epoch": 0.86, "grad_norm": 20.96468162536621, "learning_rate": 7.722222222222223e-06, "loss": 1.116, "step": 1525 }, { "epoch": 0.87, "grad_norm": 16.63492202758789, "learning_rate": 7.666666666666667e-06, "loss": 1.119, "step": 1550 }, { "epoch": 0.89, "grad_norm": 19.015464782714844, "learning_rate": 7.611111111111111e-06, "loss": 1.0257, "step": 1575 }, { "epoch": 0.9, "grad_norm": 26.927936553955078, "learning_rate": 7.555555555555556e-06, "loss": 1.0344, "step": 1600 }, { "epoch": 0.92, "grad_norm": 24.408445358276367, "learning_rate": 7.500000000000001e-06, "loss": 1.0531, "step": 1625 }, { "epoch": 0.93, "grad_norm": 23.072290420532227, "learning_rate": 7.444444444444445e-06, "loss": 1.035, "step": 1650 }, { "epoch": 0.94, "grad_norm": 22.863815307617188, "learning_rate": 7.38888888888889e-06, "loss": 1.1428, "step": 1675 }, { "epoch": 0.96, "grad_norm": 20.69190788269043, "learning_rate": 7.333333333333333e-06, "loss": 1.0685, "step": 1700 }, { "epoch": 0.97, "grad_norm": 22.434356689453125, "learning_rate": 7.277777777777778e-06, "loss": 1.0722, "step": 1725 }, { "epoch": 0.99, "grad_norm": 23.94415283203125, "learning_rate": 7.222222222222223e-06, "loss": 1.045, "step": 1750 }, { "epoch": 1.0, "grad_norm": 13.114087104797363, "learning_rate": 7.166666666666667e-06, "loss": 1.0877, "step": 1775 }, { "epoch": 1.02, "grad_norm": 12.52387523651123, "learning_rate": 7.111111111111112e-06, "loss": 0.9332, "step": 1800 }, { "epoch": 1.03, "grad_norm": 17.861310958862305, "learning_rate": 7.055555555555557e-06, "loss": 0.9197, "step": 1825 }, { "epoch": 1.04, "grad_norm": 15.940573692321777, "learning_rate": 7e-06, "loss": 0.7525, "step": 1850 }, { "epoch": 1.06, "grad_norm": 13.879676818847656, "learning_rate": 6.944444444444445e-06, "loss": 0.8269, "step": 1875 }, { "epoch": 1.07, "grad_norm": 18.230445861816406, "learning_rate": 6.88888888888889e-06, "loss": 0.8411, "step": 1900 }, { "epoch": 1.09, "grad_norm": 15.949620246887207, "learning_rate": 6.833333333333334e-06, "loss": 0.8183, "step": 1925 }, { "epoch": 1.1, "grad_norm": 16.57105827331543, "learning_rate": 6.777777777777779e-06, "loss": 0.7672, "step": 1950 }, { "epoch": 1.11, "grad_norm": 20.67877769470215, "learning_rate": 6.7222222222222235e-06, "loss": 0.8034, "step": 1975 }, { "epoch": 1.13, "grad_norm": 20.689613342285156, "learning_rate": 6.666666666666667e-06, "loss": 0.7338, "step": 2000 }, { "epoch": 1.13, "eval_cer": 45.29547524099601, "eval_loss": 1.0379834175109863, "eval_runtime": 916.1939, "eval_samples_per_second": 3.87, "eval_steps_per_second": 0.485, "eval_wer": 73.40584088620342, "step": 2000 }, { "epoch": 1.14, "grad_norm": 17.04854965209961, "learning_rate": 6.6111111111111115e-06, "loss": 0.7875, "step": 2025 }, { "epoch": 1.16, "grad_norm": 15.2536039352417, "learning_rate": 6.555555555555556e-06, "loss": 0.7838, "step": 2050 }, { "epoch": 1.17, "grad_norm": 19.358837127685547, "learning_rate": 6.5000000000000004e-06, "loss": 0.8523, "step": 2075 }, { "epoch": 1.18, "grad_norm": 16.287504196166992, "learning_rate": 6.444444444444445e-06, "loss": 0.798, "step": 2100 }, { "epoch": 1.2, "grad_norm": 19.646032333374023, "learning_rate": 6.3888888888888885e-06, "loss": 0.8304, "step": 2125 }, { "epoch": 1.21, "grad_norm": 21.761016845703125, "learning_rate": 6.333333333333333e-06, "loss": 0.8689, "step": 2150 }, { "epoch": 1.23, "grad_norm": 20.19859504699707, "learning_rate": 6.277777777777778e-06, "loss": 0.8861, "step": 2175 }, { "epoch": 1.24, "grad_norm": 21.027082443237305, "learning_rate": 6.222222222222223e-06, "loss": 0.7618, "step": 2200 }, { "epoch": 1.25, "grad_norm": 16.476125717163086, "learning_rate": 6.166666666666667e-06, "loss": 0.7588, "step": 2225 }, { "epoch": 1.27, "grad_norm": 19.24110984802246, "learning_rate": 6.111111111111112e-06, "loss": 0.8258, "step": 2250 }, { "epoch": 1.28, "grad_norm": 19.605239868164062, "learning_rate": 6.055555555555555e-06, "loss": 0.7461, "step": 2275 }, { "epoch": 1.3, "grad_norm": 20.91330909729004, "learning_rate": 6e-06, "loss": 0.6665, "step": 2300 }, { "epoch": 1.31, "grad_norm": 16.637998580932617, "learning_rate": 5.944444444444445e-06, "loss": 0.773, "step": 2325 }, { "epoch": 1.33, "grad_norm": 15.381094932556152, "learning_rate": 5.88888888888889e-06, "loss": 0.7393, "step": 2350 }, { "epoch": 1.34, "grad_norm": 15.25363826751709, "learning_rate": 5.833333333333334e-06, "loss": 0.8684, "step": 2375 }, { "epoch": 1.35, "grad_norm": 19.22905921936035, "learning_rate": 5.777777777777778e-06, "loss": 0.7679, "step": 2400 }, { "epoch": 1.37, "grad_norm": 17.750835418701172, "learning_rate": 5.722222222222222e-06, "loss": 0.7728, "step": 2425 }, { "epoch": 1.38, "grad_norm": 14.001325607299805, "learning_rate": 5.666666666666667e-06, "loss": 0.8133, "step": 2450 }, { "epoch": 1.4, "grad_norm": 17.449642181396484, "learning_rate": 5.611111111111112e-06, "loss": 0.7363, "step": 2475 }, { "epoch": 1.41, "grad_norm": 17.825336456298828, "learning_rate": 5.555555555555557e-06, "loss": 0.8418, "step": 2500 }, { "epoch": 1.42, "grad_norm": 17.27322006225586, "learning_rate": 5.500000000000001e-06, "loss": 0.6961, "step": 2525 }, { "epoch": 1.44, "grad_norm": 14.609389305114746, "learning_rate": 5.444444444444445e-06, "loss": 0.8457, "step": 2550 }, { "epoch": 1.45, "grad_norm": 19.48899269104004, "learning_rate": 5.388888888888889e-06, "loss": 0.7922, "step": 2575 }, { "epoch": 1.47, "grad_norm": 16.829326629638672, "learning_rate": 5.333333333333334e-06, "loss": 0.8122, "step": 2600 }, { "epoch": 1.48, "grad_norm": 17.410572052001953, "learning_rate": 5.2777777777777785e-06, "loss": 0.7086, "step": 2625 }, { "epoch": 1.49, "grad_norm": 20.42573356628418, "learning_rate": 5.2222222222222226e-06, "loss": 0.7816, "step": 2650 }, { "epoch": 1.51, "grad_norm": 20.022361755371094, "learning_rate": 5.1666666666666675e-06, "loss": 0.7642, "step": 2675 }, { "epoch": 1.52, "grad_norm": 12.024133682250977, "learning_rate": 5.1111111111111115e-06, "loss": 0.7466, "step": 2700 }, { "epoch": 1.54, "grad_norm": 12.811111450195312, "learning_rate": 5.0555555555555555e-06, "loss": 0.786, "step": 2725 }, { "epoch": 1.55, "grad_norm": 12.076204299926758, "learning_rate": 5e-06, "loss": 0.7625, "step": 2750 }, { "epoch": 1.57, "grad_norm": 12.905007362365723, "learning_rate": 4.944444444444445e-06, "loss": 0.6815, "step": 2775 }, { "epoch": 1.58, "grad_norm": 15.16416072845459, "learning_rate": 4.888888888888889e-06, "loss": 0.7714, "step": 2800 }, { "epoch": 1.59, "grad_norm": 15.578303337097168, "learning_rate": 4.833333333333333e-06, "loss": 0.7348, "step": 2825 }, { "epoch": 1.61, "grad_norm": 9.60951042175293, "learning_rate": 4.777777777777778e-06, "loss": 0.8109, "step": 2850 }, { "epoch": 1.62, "grad_norm": 19.289772033691406, "learning_rate": 4.722222222222222e-06, "loss": 0.8359, "step": 2875 }, { "epoch": 1.64, "grad_norm": 15.233059883117676, "learning_rate": 4.666666666666667e-06, "loss": 0.8516, "step": 2900 }, { "epoch": 1.65, "grad_norm": 18.676319122314453, "learning_rate": 4.611111111111112e-06, "loss": 0.8884, "step": 2925 }, { "epoch": 1.66, "grad_norm": 12.788491249084473, "learning_rate": 4.555555555555556e-06, "loss": 0.7779, "step": 2950 }, { "epoch": 1.68, "grad_norm": 11.790907859802246, "learning_rate": 4.5e-06, "loss": 0.739, "step": 2975 }, { "epoch": 1.69, "grad_norm": 14.475822448730469, "learning_rate": 4.444444444444444e-06, "loss": 0.7393, "step": 3000 }, { "epoch": 1.69, "eval_cer": 41.74961534838446, "eval_loss": 1.0024151802062988, "eval_runtime": 903.7019, "eval_samples_per_second": 3.924, "eval_steps_per_second": 0.491, "eval_wer": 69.41389728096676, "step": 3000 }, { "epoch": 1.71, "grad_norm": 13.670875549316406, "learning_rate": 4.388888888888889e-06, "loss": 0.7552, "step": 3025 }, { "epoch": 1.72, "grad_norm": 17.736740112304688, "learning_rate": 4.333333333333334e-06, "loss": 0.7793, "step": 3050 }, { "epoch": 1.73, "grad_norm": 15.000802040100098, "learning_rate": 4.277777777777778e-06, "loss": 0.7238, "step": 3075 }, { "epoch": 1.75, "grad_norm": 13.509275436401367, "learning_rate": 4.222222222222223e-06, "loss": 0.7613, "step": 3100 }, { "epoch": 1.76, "grad_norm": 19.606420516967773, "learning_rate": 4.166666666666667e-06, "loss": 0.6867, "step": 3125 }, { "epoch": 1.78, "grad_norm": 20.64481544494629, "learning_rate": 4.111111111111111e-06, "loss": 0.8513, "step": 3150 }, { "epoch": 1.79, "grad_norm": 16.68463706970215, "learning_rate": 4.055555555555556e-06, "loss": 0.754, "step": 3175 }, { "epoch": 1.8, "grad_norm": 13.40403938293457, "learning_rate": 4.000000000000001e-06, "loss": 0.7143, "step": 3200 }, { "epoch": 1.82, "grad_norm": 15.898687362670898, "learning_rate": 3.944444444444445e-06, "loss": 0.7647, "step": 3225 }, { "epoch": 1.83, "grad_norm": 18.194766998291016, "learning_rate": 3.88888888888889e-06, "loss": 0.7521, "step": 3250 }, { "epoch": 1.85, "grad_norm": 14.401911735534668, "learning_rate": 3.833333333333334e-06, "loss": 0.7586, "step": 3275 }, { "epoch": 1.86, "grad_norm": 17.611783981323242, "learning_rate": 3.777777777777778e-06, "loss": 0.7286, "step": 3300 }, { "epoch": 1.88, "grad_norm": 22.452648162841797, "learning_rate": 3.7222222222222225e-06, "loss": 0.7609, "step": 3325 }, { "epoch": 1.89, "grad_norm": 19.66781997680664, "learning_rate": 3.6666666666666666e-06, "loss": 0.7042, "step": 3350 }, { "epoch": 1.9, "grad_norm": 17.88923454284668, "learning_rate": 3.6111111111111115e-06, "loss": 0.7208, "step": 3375 }, { "epoch": 1.92, "grad_norm": 14.06191635131836, "learning_rate": 3.555555555555556e-06, "loss": 0.8189, "step": 3400 }, { "epoch": 1.93, "grad_norm": 17.72393798828125, "learning_rate": 3.5e-06, "loss": 0.7533, "step": 3425 }, { "epoch": 1.95, "grad_norm": 15.48258113861084, "learning_rate": 3.444444444444445e-06, "loss": 0.8195, "step": 3450 }, { "epoch": 1.96, "grad_norm": 13.074950218200684, "learning_rate": 3.3888888888888893e-06, "loss": 0.7019, "step": 3475 }, { "epoch": 1.97, "grad_norm": 13.651080131530762, "learning_rate": 3.3333333333333333e-06, "loss": 0.8497, "step": 3500 }, { "epoch": 1.99, "grad_norm": 15.199976921081543, "learning_rate": 3.277777777777778e-06, "loss": 0.7122, "step": 3525 }, { "epoch": 2.0, "grad_norm": 15.230713844299316, "learning_rate": 3.2222222222222227e-06, "loss": 0.6821, "step": 3550 }, { "epoch": 2.02, "grad_norm": 10.71396255493164, "learning_rate": 3.1666666666666667e-06, "loss": 0.5199, "step": 3575 }, { "epoch": 2.03, "grad_norm": 14.009171485900879, "learning_rate": 3.1111111111111116e-06, "loss": 0.5152, "step": 3600 }, { "epoch": 2.04, "grad_norm": 14.985831260681152, "learning_rate": 3.055555555555556e-06, "loss": 0.6146, "step": 3625 }, { "epoch": 2.06, "grad_norm": 11.974893569946289, "learning_rate": 3e-06, "loss": 0.5589, "step": 3650 }, { "epoch": 2.07, "grad_norm": 8.09765625, "learning_rate": 2.944444444444445e-06, "loss": 0.473, "step": 3675 }, { "epoch": 2.09, "grad_norm": 13.75256061553955, "learning_rate": 2.888888888888889e-06, "loss": 0.5609, "step": 3700 }, { "epoch": 2.1, "grad_norm": 17.261474609375, "learning_rate": 2.8333333333333335e-06, "loss": 0.5907, "step": 3725 }, { "epoch": 2.12, "grad_norm": 15.0508451461792, "learning_rate": 2.7777777777777783e-06, "loss": 0.541, "step": 3750 }, { "epoch": 2.13, "grad_norm": 13.755702018737793, "learning_rate": 2.7222222222222224e-06, "loss": 0.6274, "step": 3775 }, { "epoch": 2.14, "grad_norm": 13.00074577331543, "learning_rate": 2.666666666666667e-06, "loss": 0.4982, "step": 3800 }, { "epoch": 2.16, "grad_norm": 14.188249588012695, "learning_rate": 2.6111111111111113e-06, "loss": 0.4688, "step": 3825 }, { "epoch": 2.17, "grad_norm": 17.107295989990234, "learning_rate": 2.5555555555555557e-06, "loss": 0.5059, "step": 3850 }, { "epoch": 2.19, "grad_norm": 11.36642074584961, "learning_rate": 2.5e-06, "loss": 0.563, "step": 3875 }, { "epoch": 2.2, "grad_norm": 10.15700912475586, "learning_rate": 2.4444444444444447e-06, "loss": 0.5302, "step": 3900 }, { "epoch": 2.21, "grad_norm": 13.058419227600098, "learning_rate": 2.388888888888889e-06, "loss": 0.5041, "step": 3925 }, { "epoch": 2.23, "grad_norm": 15.872893333435059, "learning_rate": 2.3333333333333336e-06, "loss": 0.5845, "step": 3950 }, { "epoch": 2.24, "grad_norm": 9.651241302490234, "learning_rate": 2.277777777777778e-06, "loss": 0.5145, "step": 3975 }, { "epoch": 2.26, "grad_norm": 11.108153343200684, "learning_rate": 2.222222222222222e-06, "loss": 0.556, "step": 4000 }, { "epoch": 2.26, "eval_cer": 43.06371086758564, "eval_loss": 1.0082812309265137, "eval_runtime": 913.0221, "eval_samples_per_second": 3.884, "eval_steps_per_second": 0.486, "eval_wer": 69.88922457200403, "step": 4000 }, { "epoch": 2.27, "grad_norm": 14.091168403625488, "learning_rate": 2.166666666666667e-06, "loss": 0.4955, "step": 4025 }, { "epoch": 2.28, "grad_norm": 16.199934005737305, "learning_rate": 2.1111111111111114e-06, "loss": 0.529, "step": 4050 }, { "epoch": 2.3, "grad_norm": 11.609833717346191, "learning_rate": 2.0555555555555555e-06, "loss": 0.4852, "step": 4075 }, { "epoch": 2.31, "grad_norm": 12.764281272888184, "learning_rate": 2.0000000000000003e-06, "loss": 0.5283, "step": 4100 }, { "epoch": 2.33, "grad_norm": 8.958930015563965, "learning_rate": 1.944444444444445e-06, "loss": 0.4375, "step": 4125 }, { "epoch": 2.34, "grad_norm": 12.987662315368652, "learning_rate": 1.888888888888889e-06, "loss": 0.4881, "step": 4150 }, { "epoch": 2.35, "grad_norm": 10.337594032287598, "learning_rate": 1.8333333333333333e-06, "loss": 0.5212, "step": 4175 }, { "epoch": 2.37, "grad_norm": 15.952834129333496, "learning_rate": 1.777777777777778e-06, "loss": 0.5109, "step": 4200 }, { "epoch": 2.38, "grad_norm": 11.717411041259766, "learning_rate": 1.7222222222222224e-06, "loss": 0.5598, "step": 4225 }, { "epoch": 2.4, "grad_norm": 11.100090026855469, "learning_rate": 1.6666666666666667e-06, "loss": 0.5073, "step": 4250 }, { "epoch": 2.41, "grad_norm": 17.794734954833984, "learning_rate": 1.6111111111111113e-06, "loss": 0.522, "step": 4275 }, { "epoch": 2.43, "grad_norm": 16.543949127197266, "learning_rate": 1.5555555555555558e-06, "loss": 0.577, "step": 4300 }, { "epoch": 2.44, "grad_norm": 14.382917404174805, "learning_rate": 1.5e-06, "loss": 0.4889, "step": 4325 }, { "epoch": 2.45, "grad_norm": 6.578629970550537, "learning_rate": 1.4444444444444445e-06, "loss": 0.4715, "step": 4350 }, { "epoch": 2.47, "grad_norm": 14.872430801391602, "learning_rate": 1.3888888888888892e-06, "loss": 0.6008, "step": 4375 }, { "epoch": 2.48, "grad_norm": 16.687318801879883, "learning_rate": 1.3333333333333334e-06, "loss": 0.489, "step": 4400 }, { "epoch": 2.5, "grad_norm": 16.002120971679688, "learning_rate": 1.2777777777777779e-06, "loss": 0.5201, "step": 4425 }, { "epoch": 2.51, "grad_norm": 8.543059349060059, "learning_rate": 1.2222222222222223e-06, "loss": 0.5222, "step": 4450 }, { "epoch": 2.52, "grad_norm": 14.853689193725586, "learning_rate": 1.1666666666666668e-06, "loss": 0.5044, "step": 4475 }, { "epoch": 2.54, "grad_norm": 19.702091217041016, "learning_rate": 1.111111111111111e-06, "loss": 0.5847, "step": 4500 }, { "epoch": 2.55, "grad_norm": 18.463783264160156, "learning_rate": 1.0555555555555557e-06, "loss": 0.6202, "step": 4525 }, { "epoch": 2.57, "grad_norm": 10.704026222229004, "learning_rate": 1.0000000000000002e-06, "loss": 0.5371, "step": 4550 }, { "epoch": 2.58, "grad_norm": 13.03596305847168, "learning_rate": 9.444444444444445e-07, "loss": 0.5034, "step": 4575 }, { "epoch": 2.59, "grad_norm": 8.77291488647461, "learning_rate": 8.88888888888889e-07, "loss": 0.5054, "step": 4600 }, { "epoch": 2.61, "grad_norm": 12.519658088684082, "learning_rate": 8.333333333333333e-07, "loss": 0.4917, "step": 4625 }, { "epoch": 2.62, "grad_norm": 13.293140411376953, "learning_rate": 7.777777777777779e-07, "loss": 0.5367, "step": 4650 }, { "epoch": 2.64, "grad_norm": 10.27344799041748, "learning_rate": 7.222222222222222e-07, "loss": 0.5368, "step": 4675 }, { "epoch": 2.65, "grad_norm": 17.37248992919922, "learning_rate": 6.666666666666667e-07, "loss": 0.5907, "step": 4700 }, { "epoch": 2.66, "grad_norm": 11.495616912841797, "learning_rate": 6.111111111111112e-07, "loss": 0.5692, "step": 4725 }, { "epoch": 2.68, "grad_norm": 13.74099349975586, "learning_rate": 5.555555555555555e-07, "loss": 0.4925, "step": 4750 }, { "epoch": 2.69, "grad_norm": 14.522932052612305, "learning_rate": 5.000000000000001e-07, "loss": 0.5133, "step": 4775 }, { "epoch": 2.71, "grad_norm": 14.491698265075684, "learning_rate": 4.444444444444445e-07, "loss": 0.5781, "step": 4800 }, { "epoch": 2.72, "grad_norm": 7.774211406707764, "learning_rate": 3.8888888888888895e-07, "loss": 0.4805, "step": 4825 }, { "epoch": 2.74, "grad_norm": 15.983479499816895, "learning_rate": 3.3333333333333335e-07, "loss": 0.582, "step": 4850 }, { "epoch": 2.75, "grad_norm": 14.96191692352295, "learning_rate": 2.7777777777777776e-07, "loss": 0.5464, "step": 4875 }, { "epoch": 2.76, "grad_norm": 11.486920356750488, "learning_rate": 2.2222222222222224e-07, "loss": 0.5766, "step": 4900 }, { "epoch": 2.78, "grad_norm": 9.038966178894043, "learning_rate": 1.6666666666666668e-07, "loss": 0.4745, "step": 4925 }, { "epoch": 2.79, "grad_norm": 13.27407455444336, "learning_rate": 1.1111111111111112e-07, "loss": 0.55, "step": 4950 }, { "epoch": 2.81, "grad_norm": 19.504352569580078, "learning_rate": 5.555555555555556e-08, "loss": 0.5034, "step": 4975 }, { "epoch": 2.82, "grad_norm": 12.238847732543945, "learning_rate": 0.0, "loss": 0.4653, "step": 5000 }, { "epoch": 2.82, "eval_cer": 41.698590134078565, "eval_loss": 1.0031510591506958, "eval_runtime": 912.0081, "eval_samples_per_second": 3.888, "eval_steps_per_second": 0.487, "eval_wer": 68.70090634441087, "step": 5000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 1.154110732959744e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }