{ "best_metric": null, "best_model_checkpoint": null, "epoch": 32.30769230769231, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 9e-07, "loss": 18.8043, "step": 10 }, { "epoch": 0.31, "learning_rate": 1.8e-06, "loss": 23.3938, "step": 20 }, { "epoch": 0.46, "learning_rate": 2.8000000000000003e-06, "loss": 20.8738, "step": 30 }, { "epoch": 0.62, "learning_rate": 3.8e-06, "loss": 19.9257, "step": 40 }, { "epoch": 0.77, "learning_rate": 4.800000000000001e-06, "loss": 22.7793, "step": 50 }, { "epoch": 0.92, "learning_rate": 5.8e-06, "loss": 18.2911, "step": 60 }, { "epoch": 1.08, "learning_rate": 6.800000000000001e-06, "loss": 20.4742, "step": 70 }, { "epoch": 1.23, "learning_rate": 7.7e-06, "loss": 19.7014, "step": 80 }, { "epoch": 1.38, "learning_rate": 8.7e-06, "loss": 15.6078, "step": 90 }, { "epoch": 1.54, "learning_rate": 9.7e-06, "loss": 15.1693, "step": 100 }, { "epoch": 1.54, "eval_cer": 0.9953039832285115, "eval_loss": 12.956756591796875, "eval_runtime": 111.4724, "eval_samples_per_second": 3.059, "eval_steps_per_second": 0.386, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.69, "learning_rate": 1.0700000000000001e-05, "loss": 12.7027, "step": 110 }, { "epoch": 1.85, "learning_rate": 1.1700000000000001e-05, "loss": 10.9656, "step": 120 }, { "epoch": 2.0, "learning_rate": 1.27e-05, "loss": 10.6914, "step": 130 }, { "epoch": 2.15, "learning_rate": 1.3700000000000001e-05, "loss": 7.6051, "step": 140 }, { "epoch": 2.31, "learning_rate": 1.47e-05, "loss": 8.7814, "step": 150 }, { "epoch": 2.46, "learning_rate": 1.5700000000000002e-05, "loss": 7.5898, "step": 160 }, { "epoch": 2.62, "learning_rate": 1.6700000000000003e-05, "loss": 7.0232, "step": 170 }, { "epoch": 2.77, "learning_rate": 1.77e-05, "loss": 6.8873, "step": 180 }, { "epoch": 2.92, "learning_rate": 1.87e-05, "loss": 5.7658, "step": 190 }, { "epoch": 3.08, "learning_rate": 1.97e-05, "loss": 6.434, "step": 200 }, { "epoch": 3.08, "eval_cer": 0.9953039832285115, "eval_loss": 5.401930809020996, "eval_runtime": 111.63, "eval_samples_per_second": 3.055, "eval_steps_per_second": 0.385, "eval_wer": 1.0, "step": 200 }, { "epoch": 3.23, "learning_rate": 2.07e-05, "loss": 5.5544, "step": 210 }, { "epoch": 3.38, "learning_rate": 2.1700000000000002e-05, "loss": 5.2728, "step": 220 }, { "epoch": 3.54, "learning_rate": 2.2700000000000003e-05, "loss": 5.3269, "step": 230 }, { "epoch": 3.69, "learning_rate": 2.37e-05, "loss": 4.8495, "step": 240 }, { "epoch": 3.85, "learning_rate": 2.47e-05, "loss": 4.7196, "step": 250 }, { "epoch": 4.0, "learning_rate": 2.57e-05, "loss": 4.7143, "step": 260 }, { "epoch": 4.15, "learning_rate": 2.6700000000000002e-05, "loss": 4.0896, "step": 270 }, { "epoch": 4.31, "learning_rate": 2.7700000000000002e-05, "loss": 4.342, "step": 280 }, { "epoch": 4.46, "learning_rate": 2.87e-05, "loss": 3.9603, "step": 290 }, { "epoch": 4.62, "learning_rate": 2.97e-05, "loss": 3.9273, "step": 300 }, { "epoch": 4.62, "eval_cer": 0.9953039832285115, "eval_loss": 3.848849058151245, "eval_runtime": 111.5668, "eval_samples_per_second": 3.056, "eval_steps_per_second": 0.385, "eval_wer": 1.0, "step": 300 }, { "epoch": 4.77, "learning_rate": 3.07e-05, "loss": 3.9211, "step": 310 }, { "epoch": 4.92, "learning_rate": 3.1700000000000005e-05, "loss": 3.6815, "step": 320 }, { "epoch": 5.08, "learning_rate": 3.27e-05, "loss": 3.7151, "step": 330 }, { "epoch": 5.23, "learning_rate": 3.3700000000000006e-05, "loss": 3.6599, "step": 340 }, { "epoch": 5.38, "learning_rate": 3.4699999999999996e-05, "loss": 3.4484, "step": 350 }, { "epoch": 5.54, "learning_rate": 3.57e-05, "loss": 3.4587, "step": 360 }, { "epoch": 5.69, "learning_rate": 3.6700000000000004e-05, "loss": 3.3518, "step": 370 }, { "epoch": 5.85, "learning_rate": 3.77e-05, "loss": 3.3416, "step": 380 }, { "epoch": 6.0, "learning_rate": 3.8700000000000006e-05, "loss": 3.3674, "step": 390 }, { "epoch": 6.15, "learning_rate": 3.97e-05, "loss": 3.1977, "step": 400 }, { "epoch": 6.15, "eval_cer": 0.9953039832285115, "eval_loss": 3.2796881198883057, "eval_runtime": 111.0722, "eval_samples_per_second": 3.07, "eval_steps_per_second": 0.387, "eval_wer": 1.0, "step": 400 }, { "epoch": 6.31, "learning_rate": 4.07e-05, "loss": 3.3328, "step": 410 }, { "epoch": 6.46, "learning_rate": 4.17e-05, "loss": 3.2257, "step": 420 }, { "epoch": 6.62, "learning_rate": 4.27e-05, "loss": 3.2563, "step": 430 }, { "epoch": 6.77, "learning_rate": 4.3700000000000005e-05, "loss": 3.264, "step": 440 }, { "epoch": 6.92, "learning_rate": 4.47e-05, "loss": 3.166, "step": 450 }, { "epoch": 7.08, "learning_rate": 4.5700000000000006e-05, "loss": 3.223, "step": 460 }, { "epoch": 7.23, "learning_rate": 4.6700000000000003e-05, "loss": 3.1885, "step": 470 }, { "epoch": 7.38, "learning_rate": 4.77e-05, "loss": 3.182, "step": 480 }, { "epoch": 7.54, "learning_rate": 4.87e-05, "loss": 3.2332, "step": 490 }, { "epoch": 7.69, "learning_rate": 4.97e-05, "loss": 3.1592, "step": 500 }, { "epoch": 7.69, "eval_cer": 0.9953039832285115, "eval_loss": 3.201068162918091, "eval_runtime": 111.8171, "eval_samples_per_second": 3.05, "eval_steps_per_second": 0.385, "eval_wer": 1.0, "step": 500 }, { "epoch": 7.85, "learning_rate": 5.0700000000000006e-05, "loss": 3.1996, "step": 510 }, { "epoch": 8.0, "learning_rate": 5.17e-05, "loss": 3.2977, "step": 520 }, { "epoch": 8.15, "learning_rate": 5.270000000000001e-05, "loss": 3.1425, "step": 530 }, { "epoch": 8.31, "learning_rate": 5.3700000000000004e-05, "loss": 3.2105, "step": 540 }, { "epoch": 8.46, "learning_rate": 5.470000000000001e-05, "loss": 3.1593, "step": 550 }, { "epoch": 8.62, "learning_rate": 5.5700000000000005e-05, "loss": 3.1665, "step": 560 }, { "epoch": 8.77, "learning_rate": 5.6699999999999996e-05, "loss": 3.2244, "step": 570 }, { "epoch": 8.92, "learning_rate": 5.77e-05, "loss": 3.1578, "step": 580 }, { "epoch": 9.08, "learning_rate": 5.87e-05, "loss": 3.1924, "step": 590 }, { "epoch": 9.23, "learning_rate": 5.97e-05, "loss": 3.1667, "step": 600 }, { "epoch": 9.23, "eval_cer": 0.9953039832285115, "eval_loss": 3.139559030532837, "eval_runtime": 109.7588, "eval_samples_per_second": 3.107, "eval_steps_per_second": 0.392, "eval_wer": 1.0, "step": 600 }, { "epoch": 9.38, "learning_rate": 6.07e-05, "loss": 3.1127, "step": 610 }, { "epoch": 9.54, "learning_rate": 6.170000000000001e-05, "loss": 3.1775, "step": 620 }, { "epoch": 9.69, "learning_rate": 6.27e-05, "loss": 3.1132, "step": 630 }, { "epoch": 9.85, "learning_rate": 6.37e-05, "loss": 3.1704, "step": 640 }, { "epoch": 10.0, "learning_rate": 6.47e-05, "loss": 3.1306, "step": 650 }, { "epoch": 10.15, "learning_rate": 6.570000000000001e-05, "loss": 3.0967, "step": 660 }, { "epoch": 10.31, "learning_rate": 6.670000000000001e-05, "loss": 3.1375, "step": 670 }, { "epoch": 10.46, "learning_rate": 6.77e-05, "loss": 3.1139, "step": 680 }, { "epoch": 10.62, "learning_rate": 6.87e-05, "loss": 3.098, "step": 690 }, { "epoch": 10.77, "learning_rate": 6.97e-05, "loss": 3.1111, "step": 700 }, { "epoch": 10.77, "eval_cer": 0.9953039832285115, "eval_loss": 3.0903170108795166, "eval_runtime": 109.1662, "eval_samples_per_second": 3.124, "eval_steps_per_second": 0.394, "eval_wer": 1.0, "step": 700 }, { "epoch": 10.92, "learning_rate": 7.07e-05, "loss": 3.0775, "step": 710 }, { "epoch": 11.08, "learning_rate": 7.17e-05, "loss": 3.1294, "step": 720 }, { "epoch": 11.23, "learning_rate": 7.27e-05, "loss": 3.0943, "step": 730 }, { "epoch": 11.38, "learning_rate": 7.37e-05, "loss": 3.0812, "step": 740 }, { "epoch": 11.54, "learning_rate": 7.47e-05, "loss": 3.0843, "step": 750 }, { "epoch": 11.69, "learning_rate": 7.570000000000001e-05, "loss": 3.0668, "step": 760 }, { "epoch": 11.85, "learning_rate": 7.670000000000001e-05, "loss": 3.0617, "step": 770 }, { "epoch": 12.0, "learning_rate": 7.77e-05, "loss": 3.0469, "step": 780 }, { "epoch": 12.15, "learning_rate": 7.87e-05, "loss": 2.9867, "step": 790 }, { "epoch": 12.31, "learning_rate": 7.970000000000001e-05, "loss": 2.9955, "step": 800 }, { "epoch": 12.31, "eval_cer": 0.9953039832285115, "eval_loss": 2.895956039428711, "eval_runtime": 109.2356, "eval_samples_per_second": 3.122, "eval_steps_per_second": 0.394, "eval_wer": 1.0, "step": 800 }, { "epoch": 12.46, "learning_rate": 8.070000000000001e-05, "loss": 2.9201, "step": 810 }, { "epoch": 12.62, "learning_rate": 8.17e-05, "loss": 2.8683, "step": 820 }, { "epoch": 12.77, "learning_rate": 8.27e-05, "loss": 2.8432, "step": 830 }, { "epoch": 12.92, "learning_rate": 8.37e-05, "loss": 2.6776, "step": 840 }, { "epoch": 13.08, "learning_rate": 8.47e-05, "loss": 2.5611, "step": 850 }, { "epoch": 13.23, "learning_rate": 8.57e-05, "loss": 2.4031, "step": 860 }, { "epoch": 13.38, "learning_rate": 8.67e-05, "loss": 2.2939, "step": 870 }, { "epoch": 13.54, "learning_rate": 8.77e-05, "loss": 2.0835, "step": 880 }, { "epoch": 13.69, "learning_rate": 8.87e-05, "loss": 1.8785, "step": 890 }, { "epoch": 13.85, "learning_rate": 8.970000000000001e-05, "loss": 1.784, "step": 900 }, { "epoch": 13.85, "eval_cer": 0.33366876310272536, "eval_loss": 1.4985727071762085, "eval_runtime": 109.5182, "eval_samples_per_second": 3.114, "eval_steps_per_second": 0.393, "eval_wer": 0.8279052553663953, "step": 900 }, { "epoch": 14.0, "learning_rate": 9.070000000000001e-05, "loss": 1.6083, "step": 910 }, { "epoch": 14.15, "learning_rate": 9.17e-05, "loss": 1.5164, "step": 920 }, { "epoch": 14.31, "learning_rate": 9.27e-05, "loss": 1.4019, "step": 930 }, { "epoch": 14.46, "learning_rate": 9.370000000000001e-05, "loss": 1.3325, "step": 940 }, { "epoch": 14.62, "learning_rate": 9.47e-05, "loss": 1.3251, "step": 950 }, { "epoch": 14.77, "learning_rate": 9.57e-05, "loss": 1.2811, "step": 960 }, { "epoch": 14.92, "learning_rate": 9.67e-05, "loss": 1.1927, "step": 970 }, { "epoch": 15.08, "learning_rate": 9.77e-05, "loss": 1.2017, "step": 980 }, { "epoch": 15.23, "learning_rate": 9.87e-05, "loss": 1.0452, "step": 990 }, { "epoch": 15.38, "learning_rate": 9.970000000000001e-05, "loss": 1.1511, "step": 1000 }, { "epoch": 15.38, "eval_cer": 0.21970649895178196, "eval_loss": 0.8053019046783447, "eval_runtime": 109.46, "eval_samples_per_second": 3.115, "eval_steps_per_second": 0.393, "eval_wer": 0.6402664692820134, "step": 1000 }, { "epoch": 15.54, "learning_rate": 9.994166666666667e-05, "loss": 5.2926, "step": 1010 }, { "epoch": 15.69, "learning_rate": 9.985833333333334e-05, "loss": 3.4895, "step": 1020 }, { "epoch": 15.85, "learning_rate": 9.977500000000001e-05, "loss": 3.3029, "step": 1030 }, { "epoch": 16.0, "learning_rate": 9.969166666666667e-05, "loss": 3.2695, "step": 1040 }, { "epoch": 16.15, "learning_rate": 9.960833333333333e-05, "loss": 3.1054, "step": 1050 }, { "epoch": 16.31, "learning_rate": 9.952500000000001e-05, "loss": 3.0923, "step": 1060 }, { "epoch": 16.46, "learning_rate": 9.944166666666667e-05, "loss": 2.9955, "step": 1070 }, { "epoch": 16.62, "learning_rate": 9.935833333333334e-05, "loss": 2.9343, "step": 1080 }, { "epoch": 16.77, "learning_rate": 9.9275e-05, "loss": 2.7871, "step": 1090 }, { "epoch": 16.92, "learning_rate": 9.919166666666667e-05, "loss": 2.3674, "step": 1100 }, { "epoch": 16.92, "eval_cer": 0.4807547169811321, "eval_loss": 1.9613749980926514, "eval_runtime": 26.9012, "eval_samples_per_second": 12.676, "eval_steps_per_second": 1.598, "eval_wer": 0.9925980754996299, "step": 1100 }, { "epoch": 17.08, "learning_rate": 9.910833333333333e-05, "loss": 2.1319, "step": 1110 }, { "epoch": 17.23, "learning_rate": 9.9025e-05, "loss": 1.6758, "step": 1120 }, { "epoch": 17.38, "learning_rate": 9.894166666666668e-05, "loss": 1.5306, "step": 1130 }, { "epoch": 17.54, "learning_rate": 9.885833333333334e-05, "loss": 1.3654, "step": 1140 }, { "epoch": 17.69, "learning_rate": 9.8775e-05, "loss": 1.2197, "step": 1150 }, { "epoch": 17.85, "learning_rate": 9.869166666666668e-05, "loss": 1.1739, "step": 1160 }, { "epoch": 18.0, "learning_rate": 9.860833333333334e-05, "loss": 1.0703, "step": 1170 }, { "epoch": 18.15, "learning_rate": 9.8525e-05, "loss": 1.064, "step": 1180 }, { "epoch": 18.31, "learning_rate": 9.844166666666667e-05, "loss": 1.0071, "step": 1190 }, { "epoch": 18.46, "learning_rate": 9.835833333333334e-05, "loss": 0.8716, "step": 1200 }, { "epoch": 18.46, "eval_cer": 0.20771488469601676, "eval_loss": 0.7716531753540039, "eval_runtime": 26.7363, "eval_samples_per_second": 12.754, "eval_steps_per_second": 1.608, "eval_wer": 0.609178386380459, "step": 1200 }, { "epoch": 18.62, "learning_rate": 9.8275e-05, "loss": 0.9567, "step": 1210 }, { "epoch": 18.77, "learning_rate": 9.819166666666668e-05, "loss": 0.9223, "step": 1220 }, { "epoch": 18.92, "learning_rate": 9.810833333333334e-05, "loss": 0.8467, "step": 1230 }, { "epoch": 19.08, "learning_rate": 9.8025e-05, "loss": 0.8997, "step": 1240 }, { "epoch": 19.23, "learning_rate": 9.794166666666667e-05, "loss": 0.7379, "step": 1250 }, { "epoch": 19.38, "learning_rate": 9.785833333333334e-05, "loss": 0.7997, "step": 1260 }, { "epoch": 19.54, "learning_rate": 9.7775e-05, "loss": 0.8364, "step": 1270 }, { "epoch": 19.69, "learning_rate": 9.769166666666667e-05, "loss": 0.6978, "step": 1280 }, { "epoch": 19.85, "learning_rate": 9.760833333333334e-05, "loss": 0.7572, "step": 1290 }, { "epoch": 20.0, "learning_rate": 9.7525e-05, "loss": 0.7108, "step": 1300 }, { "epoch": 20.0, "eval_cer": 0.16545073375262054, "eval_loss": 0.5405445098876953, "eval_runtime": 26.8237, "eval_samples_per_second": 12.713, "eval_steps_per_second": 1.603, "eval_wer": 0.5159141376757957, "step": 1300 }, { "epoch": 20.15, "learning_rate": 9.744166666666667e-05, "loss": 0.7364, "step": 1310 }, { "epoch": 20.31, "learning_rate": 9.735833333333335e-05, "loss": 0.6907, "step": 1320 }, { "epoch": 20.46, "learning_rate": 9.7275e-05, "loss": 0.6201, "step": 1330 }, { "epoch": 20.62, "learning_rate": 9.719166666666667e-05, "loss": 0.6772, "step": 1340 }, { "epoch": 20.77, "learning_rate": 9.710833333333334e-05, "loss": 0.6928, "step": 1350 }, { "epoch": 20.92, "learning_rate": 9.7025e-05, "loss": 0.6312, "step": 1360 }, { "epoch": 21.08, "learning_rate": 9.694166666666667e-05, "loss": 0.6452, "step": 1370 }, { "epoch": 21.23, "learning_rate": 9.685833333333333e-05, "loss": 0.551, "step": 1380 }, { "epoch": 21.38, "learning_rate": 9.677500000000001e-05, "loss": 0.6457, "step": 1390 }, { "epoch": 21.54, "learning_rate": 9.669166666666667e-05, "loss": 0.612, "step": 1400 }, { "epoch": 21.54, "eval_cer": 0.140041928721174, "eval_loss": 0.42258089780807495, "eval_runtime": 27.0022, "eval_samples_per_second": 12.629, "eval_steps_per_second": 1.592, "eval_wer": 0.463360473723168, "step": 1400 }, { "epoch": 21.69, "learning_rate": 9.660833333333333e-05, "loss": 0.5377, "step": 1410 }, { "epoch": 21.85, "learning_rate": 9.652500000000002e-05, "loss": 0.625, "step": 1420 }, { "epoch": 22.0, "learning_rate": 9.644166666666668e-05, "loss": 0.5206, "step": 1430 }, { "epoch": 22.15, "learning_rate": 9.635833333333334e-05, "loss": 0.6004, "step": 1440 }, { "epoch": 22.31, "learning_rate": 9.627500000000001e-05, "loss": 0.5728, "step": 1450 }, { "epoch": 22.46, "learning_rate": 9.619166666666667e-05, "loss": 0.5696, "step": 1460 }, { "epoch": 22.62, "learning_rate": 9.610833333333333e-05, "loss": 0.5712, "step": 1470 }, { "epoch": 22.77, "learning_rate": 9.6025e-05, "loss": 0.5303, "step": 1480 }, { "epoch": 22.92, "learning_rate": 9.594166666666668e-05, "loss": 0.4954, "step": 1490 }, { "epoch": 23.08, "learning_rate": 9.585833333333334e-05, "loss": 0.4919, "step": 1500 }, { "epoch": 23.08, "eval_cer": 0.11941299790356394, "eval_loss": 0.3299271762371063, "eval_runtime": 27.0051, "eval_samples_per_second": 12.627, "eval_steps_per_second": 1.592, "eval_wer": 0.4052553663952628, "step": 1500 }, { "epoch": 23.23, "learning_rate": 9.5775e-05, "loss": 0.391, "step": 1510 }, { "epoch": 23.38, "learning_rate": 9.569166666666667e-05, "loss": 0.5205, "step": 1520 }, { "epoch": 23.54, "learning_rate": 9.560833333333333e-05, "loss": 0.5302, "step": 1530 }, { "epoch": 23.69, "learning_rate": 9.5525e-05, "loss": 0.4806, "step": 1540 }, { "epoch": 23.85, "learning_rate": 9.544166666666668e-05, "loss": 0.5882, "step": 1550 }, { "epoch": 24.0, "learning_rate": 9.535833333333334e-05, "loss": 0.4541, "step": 1560 }, { "epoch": 24.15, "learning_rate": 9.5275e-05, "loss": 0.457, "step": 1570 }, { "epoch": 24.31, "learning_rate": 9.519166666666667e-05, "loss": 0.4797, "step": 1580 }, { "epoch": 24.46, "learning_rate": 9.510833333333333e-05, "loss": 0.3973, "step": 1590 }, { "epoch": 24.62, "learning_rate": 9.5025e-05, "loss": 0.4999, "step": 1600 }, { "epoch": 24.62, "eval_cer": 0.1020545073375262, "eval_loss": 0.2569698691368103, "eval_runtime": 30.0585, "eval_samples_per_second": 11.345, "eval_steps_per_second": 1.431, "eval_wer": 0.3604737231680237, "step": 1600 }, { "epoch": 24.77, "learning_rate": 9.494166666666668e-05, "loss": 0.452, "step": 1610 }, { "epoch": 24.92, "learning_rate": 9.485833333333334e-05, "loss": 0.4148, "step": 1620 }, { "epoch": 25.08, "learning_rate": 9.4775e-05, "loss": 0.4316, "step": 1630 }, { "epoch": 25.23, "learning_rate": 9.469166666666667e-05, "loss": 0.3673, "step": 1640 }, { "epoch": 25.38, "learning_rate": 9.460833333333335e-05, "loss": 0.4365, "step": 1650 }, { "epoch": 25.54, "learning_rate": 9.452500000000001e-05, "loss": 0.4191, "step": 1660 }, { "epoch": 25.69, "learning_rate": 9.444166666666667e-05, "loss": 0.3857, "step": 1670 }, { "epoch": 25.85, "learning_rate": 9.435833333333334e-05, "loss": 0.4627, "step": 1680 }, { "epoch": 26.0, "learning_rate": 9.4275e-05, "loss": 0.3845, "step": 1690 }, { "epoch": 26.15, "learning_rate": 9.419166666666666e-05, "loss": 0.4349, "step": 1700 }, { "epoch": 26.15, "eval_cer": 0.09207547169811321, "eval_loss": 0.21595901250839233, "eval_runtime": 27.0141, "eval_samples_per_second": 12.623, "eval_steps_per_second": 1.592, "eval_wer": 0.33826794966691337, "step": 1700 }, { "epoch": 26.31, "learning_rate": 9.410833333333335e-05, "loss": 0.359, "step": 1710 }, { "epoch": 26.46, "learning_rate": 9.402500000000001e-05, "loss": 0.3897, "step": 1720 }, { "epoch": 26.62, "learning_rate": 9.394166666666667e-05, "loss": 0.3857, "step": 1730 }, { "epoch": 26.77, "learning_rate": 9.385833333333334e-05, "loss": 0.361, "step": 1740 }, { "epoch": 26.92, "learning_rate": 9.3775e-05, "loss": 0.4116, "step": 1750 }, { "epoch": 27.08, "learning_rate": 9.369166666666666e-05, "loss": 0.427, "step": 1760 }, { "epoch": 27.23, "learning_rate": 9.360833333333334e-05, "loss": 0.2724, "step": 1770 }, { "epoch": 27.38, "learning_rate": 9.352500000000001e-05, "loss": 0.416, "step": 1780 }, { "epoch": 27.54, "learning_rate": 9.344166666666667e-05, "loss": 0.3837, "step": 1790 }, { "epoch": 27.69, "learning_rate": 9.335833333333333e-05, "loss": 0.3205, "step": 1800 }, { "epoch": 27.69, "eval_cer": 0.07907756813417191, "eval_loss": 0.17836497724056244, "eval_runtime": 26.9274, "eval_samples_per_second": 12.664, "eval_steps_per_second": 1.597, "eval_wer": 0.29533678756476683, "step": 1800 }, { "epoch": 27.85, "learning_rate": 9.3275e-05, "loss": 0.3578, "step": 1810 }, { "epoch": 28.0, "learning_rate": 9.319166666666666e-05, "loss": 0.3527, "step": 1820 }, { "epoch": 28.15, "learning_rate": 9.310833333333334e-05, "loss": 0.3458, "step": 1830 }, { "epoch": 28.31, "learning_rate": 9.302500000000001e-05, "loss": 0.3527, "step": 1840 }, { "epoch": 28.46, "learning_rate": 9.294166666666667e-05, "loss": 0.3244, "step": 1850 }, { "epoch": 28.62, "learning_rate": 9.285833333333333e-05, "loss": 0.393, "step": 1860 }, { "epoch": 28.77, "learning_rate": 9.2775e-05, "loss": 0.3255, "step": 1870 }, { "epoch": 28.92, "learning_rate": 9.269166666666668e-05, "loss": 0.2884, "step": 1880 }, { "epoch": 29.08, "learning_rate": 9.260833333333334e-05, "loss": 0.3382, "step": 1890 }, { "epoch": 29.23, "learning_rate": 9.252500000000001e-05, "loss": 0.2717, "step": 1900 }, { "epoch": 29.23, "eval_cer": 0.07127882599580712, "eval_loss": 0.14557726681232452, "eval_runtime": 26.923, "eval_samples_per_second": 12.666, "eval_steps_per_second": 1.597, "eval_wer": 0.2701702442635085, "step": 1900 }, { "epoch": 29.38, "learning_rate": 9.244166666666667e-05, "loss": 0.3271, "step": 1910 }, { "epoch": 29.54, "learning_rate": 9.235833333333333e-05, "loss": 0.3439, "step": 1920 }, { "epoch": 29.69, "learning_rate": 9.2275e-05, "loss": 0.3184, "step": 1930 }, { "epoch": 29.85, "learning_rate": 9.219166666666668e-05, "loss": 0.3458, "step": 1940 }, { "epoch": 30.0, "learning_rate": 9.210833333333334e-05, "loss": 0.2775, "step": 1950 }, { "epoch": 30.15, "learning_rate": 9.2025e-05, "loss": 0.2959, "step": 1960 }, { "epoch": 30.31, "learning_rate": 9.194166666666667e-05, "loss": 0.2861, "step": 1970 }, { "epoch": 30.46, "learning_rate": 9.185833333333333e-05, "loss": 0.3047, "step": 1980 }, { "epoch": 30.62, "learning_rate": 9.1775e-05, "loss": 0.3244, "step": 1990 }, { "epoch": 30.77, "learning_rate": 9.169166666666668e-05, "loss": 0.2903, "step": 2000 }, { "epoch": 30.77, "eval_cer": 0.06532494758909853, "eval_loss": 0.1265391856431961, "eval_runtime": 29.4587, "eval_samples_per_second": 11.576, "eval_steps_per_second": 1.46, "eval_wer": 0.2527757216876388, "step": 2000 }, { "epoch": 30.92, "learning_rate": 9.160833333333334e-05, "loss": 0.2849, "step": 2010 }, { "epoch": 31.08, "learning_rate": 9.1525e-05, "loss": 0.3266, "step": 2020 }, { "epoch": 31.23, "learning_rate": 9.144166666666668e-05, "loss": 0.2915, "step": 2030 }, { "epoch": 31.38, "learning_rate": 9.135833333333334e-05, "loss": 0.3066, "step": 2040 }, { "epoch": 31.54, "learning_rate": 9.1275e-05, "loss": 0.3167, "step": 2050 }, { "epoch": 31.69, "learning_rate": 9.119166666666667e-05, "loss": 0.2572, "step": 2060 }, { "epoch": 31.85, "learning_rate": 9.110833333333334e-05, "loss": 0.3269, "step": 2070 }, { "epoch": 32.0, "learning_rate": 9.1025e-05, "loss": 0.2422, "step": 2080 }, { "epoch": 32.15, "learning_rate": 9.094166666666666e-05, "loss": 0.3152, "step": 2090 }, { "epoch": 32.31, "learning_rate": 9.085833333333334e-05, "loss": 0.2703, "step": 2100 }, { "epoch": 32.31, "eval_cer": 0.060964360587002095, "eval_loss": 0.11160185188055038, "eval_runtime": 26.8072, "eval_samples_per_second": 12.72, "eval_steps_per_second": 1.604, "eval_wer": 0.24426350851221318, "step": 2100 } ], "max_steps": 13000, "num_train_epochs": 200, "total_flos": 8.493815951541043e+18, "trial_name": null, "trial_params": null }