{ "best_metric": 0.5028899908065796, "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base-new_onset-idmt-2_8k/checkpoint-459", "epoch": 100.0, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 101.80458068847656, "eval_runtime": 2.5283, "eval_samples_per_second": 6.724, "eval_steps_per_second": 1.978, "eval_wer": 0.98, "step": 9 }, { "epoch": 1.11, "learning_rate": 7e-05, "loss": 17.4958, "step": 10 }, { "epoch": 2.0, "eval_loss": 82.49200439453125, "eval_runtime": 2.6724, "eval_samples_per_second": 6.361, "eval_steps_per_second": 1.871, "eval_wer": 1.0, "step": 18 }, { "epoch": 2.22, "learning_rate": 0.00015999999999999999, "loss": 16.2087, "step": 20 }, { "epoch": 3.0, "eval_loss": 36.138763427734375, "eval_runtime": 2.6141, "eval_samples_per_second": 6.503, "eval_steps_per_second": 1.913, "eval_wer": 1.0, "step": 27 }, { "epoch": 3.33, "learning_rate": 0.00026, "loss": 6.2942, "step": 30 }, { "epoch": 4.0, "eval_loss": 8.326688766479492, "eval_runtime": 2.6808, "eval_samples_per_second": 6.341, "eval_steps_per_second": 1.865, "eval_wer": 1.0, "step": 36 }, { "epoch": 4.44, "learning_rate": 0.0002979310344827586, "loss": 2.0411, "step": 40 }, { "epoch": 5.0, "eval_loss": 6.8215179443359375, "eval_runtime": 2.696, "eval_samples_per_second": 6.306, "eval_steps_per_second": 1.855, "eval_wer": 1.0, "step": 45 }, { "epoch": 5.56, "learning_rate": 0.00029448275862068964, "loss": 1.554, "step": 50 }, { "epoch": 6.0, "eval_loss": 5.384669780731201, "eval_runtime": 2.7298, "eval_samples_per_second": 6.228, "eval_steps_per_second": 1.832, "eval_wer": 1.0, "step": 54 }, { "epoch": 6.67, "learning_rate": 0.00029103448275862064, "loss": 1.6215, "step": 60 }, { "epoch": 7.0, "eval_loss": 4.4644598960876465, "eval_runtime": 2.6233, "eval_samples_per_second": 6.48, "eval_steps_per_second": 1.906, "eval_wer": 1.0, "step": 63 }, { "epoch": 7.78, "learning_rate": 0.0002875862068965517, "loss": 1.4962, "step": 70 }, { "epoch": 8.0, "eval_loss": 3.221142053604126, "eval_runtime": 2.7171, "eval_samples_per_second": 6.257, "eval_steps_per_second": 1.84, "eval_wer": 1.0, "step": 72 }, { "epoch": 8.89, "learning_rate": 0.00028413793103448275, "loss": 1.3825, "step": 80 }, { "epoch": 9.0, "eval_loss": 2.5513429641723633, "eval_runtime": 2.6858, "eval_samples_per_second": 6.329, "eval_steps_per_second": 1.862, "eval_wer": 1.0, "step": 81 }, { "epoch": 10.0, "learning_rate": 0.00028068965517241375, "loss": 1.3443, "step": 90 }, { "epoch": 10.0, "eval_loss": 2.8581652641296387, "eval_runtime": 2.7157, "eval_samples_per_second": 6.26, "eval_steps_per_second": 1.841, "eval_wer": 1.0, "step": 90 }, { "epoch": 11.0, "eval_loss": 2.544593572616577, "eval_runtime": 2.6215, "eval_samples_per_second": 6.485, "eval_steps_per_second": 1.907, "eval_wer": 1.0, "step": 99 }, { "epoch": 11.11, "learning_rate": 0.0002772413793103448, "loss": 1.3096, "step": 100 }, { "epoch": 12.0, "eval_loss": 2.021075963973999, "eval_runtime": 2.6505, "eval_samples_per_second": 6.414, "eval_steps_per_second": 1.886, "eval_wer": 0.9955555555555555, "step": 108 }, { "epoch": 12.22, "learning_rate": 0.0002737931034482758, "loss": 1.3361, "step": 110 }, { "epoch": 13.0, "eval_loss": 1.8110406398773193, "eval_runtime": 2.6015, "eval_samples_per_second": 6.535, "eval_steps_per_second": 1.922, "eval_wer": 0.9944444444444445, "step": 117 }, { "epoch": 13.33, "learning_rate": 0.00027034482758620687, "loss": 1.2862, "step": 120 }, { "epoch": 14.0, "eval_loss": 1.7796473503112793, "eval_runtime": 2.6116, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.915, "eval_wer": 0.9933333333333333, "step": 126 }, { "epoch": 14.44, "learning_rate": 0.0002668965517241379, "loss": 1.2556, "step": 130 }, { "epoch": 15.0, "eval_loss": 1.730082631111145, "eval_runtime": 2.5934, "eval_samples_per_second": 6.555, "eval_steps_per_second": 1.928, "eval_wer": 0.9922222222222222, "step": 135 }, { "epoch": 15.56, "learning_rate": 0.0002634482758620689, "loss": 1.1959, "step": 140 }, { "epoch": 16.0, "eval_loss": 1.4244635105133057, "eval_runtime": 2.6511, "eval_samples_per_second": 6.412, "eval_steps_per_second": 1.886, "eval_wer": 0.9988888888888889, "step": 144 }, { "epoch": 16.67, "learning_rate": 0.00026, "loss": 1.1161, "step": 150 }, { "epoch": 17.0, "eval_loss": 1.1931955814361572, "eval_runtime": 2.6143, "eval_samples_per_second": 6.503, "eval_steps_per_second": 1.913, "eval_wer": 0.5677777777777778, "step": 153 }, { "epoch": 17.78, "learning_rate": 0.00025655172413793103, "loss": 0.8853, "step": 160 }, { "epoch": 18.0, "eval_loss": 1.2725539207458496, "eval_runtime": 2.6031, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.921, "eval_wer": 0.4922222222222222, "step": 162 }, { "epoch": 18.89, "learning_rate": 0.00025310344827586203, "loss": 0.7996, "step": 170 }, { "epoch": 19.0, "eval_loss": 1.0841138362884521, "eval_runtime": 2.6115, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.915, "eval_wer": 0.5511111111111111, "step": 171 }, { "epoch": 20.0, "learning_rate": 0.0002496551724137931, "loss": 0.8165, "step": 180 }, { "epoch": 20.0, "eval_loss": 1.406151294708252, "eval_runtime": 2.6236, "eval_samples_per_second": 6.48, "eval_steps_per_second": 1.906, "eval_wer": 0.4411111111111111, "step": 180 }, { "epoch": 21.0, "eval_loss": 1.4218931198120117, "eval_runtime": 2.6036, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.92, "eval_wer": 0.33666666666666667, "step": 189 }, { "epoch": 21.11, "learning_rate": 0.0002462068965517241, "loss": 0.6807, "step": 190 }, { "epoch": 22.0, "eval_loss": 1.2106943130493164, "eval_runtime": 2.6208, "eval_samples_per_second": 6.486, "eval_steps_per_second": 1.908, "eval_wer": 0.33444444444444443, "step": 198 }, { "epoch": 22.22, "learning_rate": 0.00024275862068965515, "loss": 0.7315, "step": 200 }, { "epoch": 23.0, "eval_loss": 1.1419602632522583, "eval_runtime": 2.6272, "eval_samples_per_second": 6.471, "eval_steps_per_second": 1.903, "eval_wer": 0.3188888888888889, "step": 207 }, { "epoch": 23.33, "learning_rate": 0.00023931034482758617, "loss": 0.6203, "step": 210 }, { "epoch": 24.0, "eval_loss": 1.0770384073257446, "eval_runtime": 2.6173, "eval_samples_per_second": 6.495, "eval_steps_per_second": 1.91, "eval_wer": 0.37777777777777777, "step": 216 }, { "epoch": 24.44, "learning_rate": 0.00023586206896551723, "loss": 0.6552, "step": 220 }, { "epoch": 25.0, "eval_loss": 1.1095293760299683, "eval_runtime": 2.6206, "eval_samples_per_second": 6.487, "eval_steps_per_second": 1.908, "eval_wer": 0.3788888888888889, "step": 225 }, { "epoch": 25.56, "learning_rate": 0.00023241379310344826, "loss": 0.5618, "step": 230 }, { "epoch": 26.0, "eval_loss": 1.0003583431243896, "eval_runtime": 2.6097, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.916, "eval_wer": 0.3477777777777778, "step": 234 }, { "epoch": 26.67, "learning_rate": 0.0002289655172413793, "loss": 0.5311, "step": 240 }, { "epoch": 27.0, "eval_loss": 0.8810951113700867, "eval_runtime": 2.705, "eval_samples_per_second": 6.285, "eval_steps_per_second": 1.848, "eval_wer": 0.33111111111111113, "step": 243 }, { "epoch": 27.78, "learning_rate": 0.00022551724137931031, "loss": 0.5391, "step": 250 }, { "epoch": 28.0, "eval_loss": 0.8162650465965271, "eval_runtime": 2.6062, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.919, "eval_wer": 0.36777777777777776, "step": 252 }, { "epoch": 28.89, "learning_rate": 0.00022206896551724134, "loss": 0.5275, "step": 260 }, { "epoch": 29.0, "eval_loss": 0.9999949336051941, "eval_runtime": 2.6225, "eval_samples_per_second": 6.482, "eval_steps_per_second": 1.907, "eval_wer": 0.33111111111111113, "step": 261 }, { "epoch": 30.0, "learning_rate": 0.0002186206896551724, "loss": 0.4965, "step": 270 }, { "epoch": 30.0, "eval_loss": 0.7320018410682678, "eval_runtime": 2.6343, "eval_samples_per_second": 6.453, "eval_steps_per_second": 1.898, "eval_wer": 0.37, "step": 270 }, { "epoch": 31.0, "eval_loss": 0.9642557501792908, "eval_runtime": 2.6199, "eval_samples_per_second": 6.489, "eval_steps_per_second": 1.909, "eval_wer": 0.3388888888888889, "step": 279 }, { "epoch": 31.11, "learning_rate": 0.00021517241379310343, "loss": 0.4909, "step": 280 }, { "epoch": 32.0, "eval_loss": 0.7662584185600281, "eval_runtime": 2.6092, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.916, "eval_wer": 0.35888888888888887, "step": 288 }, { "epoch": 32.22, "learning_rate": 0.00021172413793103445, "loss": 0.5218, "step": 290 }, { "epoch": 33.0, "eval_loss": 0.9004315733909607, "eval_runtime": 2.6009, "eval_samples_per_second": 6.536, "eval_steps_per_second": 1.922, "eval_wer": 0.3488888888888889, "step": 297 }, { "epoch": 33.33, "learning_rate": 0.0002082758620689655, "loss": 0.4991, "step": 300 }, { "epoch": 34.0, "eval_loss": 0.7342296242713928, "eval_runtime": 2.6253, "eval_samples_per_second": 6.475, "eval_steps_per_second": 1.905, "eval_wer": 0.38, "step": 306 }, { "epoch": 34.44, "learning_rate": 0.00020482758620689654, "loss": 0.4883, "step": 310 }, { "epoch": 35.0, "eval_loss": 0.7959361672401428, "eval_runtime": 2.6169, "eval_samples_per_second": 6.496, "eval_steps_per_second": 1.911, "eval_wer": 0.3388888888888889, "step": 315 }, { "epoch": 35.56, "learning_rate": 0.0002013793103448276, "loss": 0.4902, "step": 320 }, { "epoch": 36.0, "eval_loss": 0.6891648173332214, "eval_runtime": 2.6322, "eval_samples_per_second": 6.458, "eval_steps_per_second": 1.9, "eval_wer": 0.3377777777777778, "step": 324 }, { "epoch": 36.67, "learning_rate": 0.0001979310344827586, "loss": 0.4447, "step": 330 }, { "epoch": 37.0, "eval_loss": 0.6479607224464417, "eval_runtime": 2.5996, "eval_samples_per_second": 6.539, "eval_steps_per_second": 1.923, "eval_wer": 0.3333333333333333, "step": 333 }, { "epoch": 37.78, "learning_rate": 0.00019448275862068962, "loss": 0.4458, "step": 340 }, { "epoch": 38.0, "eval_loss": 0.6198386549949646, "eval_runtime": 2.6161, "eval_samples_per_second": 6.498, "eval_steps_per_second": 1.911, "eval_wer": 0.3333333333333333, "step": 342 }, { "epoch": 38.89, "learning_rate": 0.00019103448275862068, "loss": 0.4607, "step": 350 }, { "epoch": 39.0, "eval_loss": 0.6080996990203857, "eval_runtime": 2.6354, "eval_samples_per_second": 6.451, "eval_steps_per_second": 1.897, "eval_wer": 0.3111111111111111, "step": 351 }, { "epoch": 40.0, "learning_rate": 0.0001875862068965517, "loss": 0.4352, "step": 360 }, { "epoch": 40.0, "eval_loss": 0.6748089790344238, "eval_runtime": 2.6402, "eval_samples_per_second": 6.439, "eval_steps_per_second": 1.894, "eval_wer": 0.31555555555555553, "step": 360 }, { "epoch": 41.0, "eval_loss": 0.6885289549827576, "eval_runtime": 2.6179, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.91, "eval_wer": 0.32555555555555554, "step": 369 }, { "epoch": 41.11, "learning_rate": 0.00018413793103448273, "loss": 0.4286, "step": 370 }, { "epoch": 42.0, "eval_loss": 0.6806181073188782, "eval_runtime": 2.614, "eval_samples_per_second": 6.504, "eval_steps_per_second": 1.913, "eval_wer": 0.3333333333333333, "step": 378 }, { "epoch": 42.22, "learning_rate": 0.0001806896551724138, "loss": 0.4314, "step": 380 }, { "epoch": 43.0, "eval_loss": 0.7854954600334167, "eval_runtime": 2.5985, "eval_samples_per_second": 6.542, "eval_steps_per_second": 1.924, "eval_wer": 0.32222222222222224, "step": 387 }, { "epoch": 43.33, "learning_rate": 0.00017724137931034482, "loss": 0.4476, "step": 390 }, { "epoch": 44.0, "eval_loss": 0.6569249629974365, "eval_runtime": 2.5902, "eval_samples_per_second": 6.563, "eval_steps_per_second": 1.93, "eval_wer": 0.31444444444444447, "step": 396 }, { "epoch": 44.44, "learning_rate": 0.00017379310344827587, "loss": 0.4815, "step": 400 }, { "epoch": 45.0, "eval_loss": 0.5388519167900085, "eval_runtime": 2.6157, "eval_samples_per_second": 6.499, "eval_steps_per_second": 1.912, "eval_wer": 0.30333333333333334, "step": 405 }, { "epoch": 45.56, "learning_rate": 0.00017034482758620687, "loss": 0.36, "step": 410 }, { "epoch": 46.0, "eval_loss": 0.5549790263175964, "eval_runtime": 2.5979, "eval_samples_per_second": 6.544, "eval_steps_per_second": 1.925, "eval_wer": 0.3011111111111111, "step": 414 }, { "epoch": 46.67, "learning_rate": 0.0001668965517241379, "loss": 0.4516, "step": 420 }, { "epoch": 47.0, "eval_loss": 0.5923512578010559, "eval_runtime": 2.6204, "eval_samples_per_second": 6.488, "eval_steps_per_second": 1.908, "eval_wer": 0.31444444444444447, "step": 423 }, { "epoch": 47.78, "learning_rate": 0.00016344827586206896, "loss": 0.3682, "step": 430 }, { "epoch": 48.0, "eval_loss": 0.727486789226532, "eval_runtime": 2.6112, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.915, "eval_wer": 0.3055555555555556, "step": 432 }, { "epoch": 48.89, "learning_rate": 0.00015999999999999999, "loss": 0.4371, "step": 440 }, { "epoch": 49.0, "eval_loss": 0.7050825357437134, "eval_runtime": 2.6034, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.921, "eval_wer": 0.3088888888888889, "step": 441 }, { "epoch": 50.0, "learning_rate": 0.00015655172413793101, "loss": 0.4004, "step": 450 }, { "epoch": 50.0, "eval_loss": 0.5668944716453552, "eval_runtime": 2.6536, "eval_samples_per_second": 6.406, "eval_steps_per_second": 1.884, "eval_wer": 0.30777777777777776, "step": 450 }, { "epoch": 51.0, "eval_loss": 0.5028899908065796, "eval_runtime": 2.6202, "eval_samples_per_second": 6.488, "eval_steps_per_second": 1.908, "eval_wer": 0.31777777777777777, "step": 459 }, { "epoch": 51.11, "learning_rate": 0.00015310344827586207, "loss": 0.3298, "step": 460 }, { "epoch": 52.0, "eval_loss": 0.6150208711624146, "eval_runtime": 2.602, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.922, "eval_wer": 0.32, "step": 468 }, { "epoch": 52.22, "learning_rate": 0.00014965517241379307, "loss": 0.4083, "step": 470 }, { "epoch": 53.0, "eval_loss": 0.5881750583648682, "eval_runtime": 2.6209, "eval_samples_per_second": 6.486, "eval_steps_per_second": 1.908, "eval_wer": 0.33, "step": 477 }, { "epoch": 53.33, "learning_rate": 0.00014620689655172413, "loss": 0.4022, "step": 480 }, { "epoch": 54.0, "eval_loss": 0.7253018021583557, "eval_runtime": 2.6223, "eval_samples_per_second": 6.483, "eval_steps_per_second": 1.907, "eval_wer": 0.31444444444444447, "step": 486 }, { "epoch": 54.44, "learning_rate": 0.00014275862068965515, "loss": 0.4465, "step": 490 }, { "epoch": 55.0, "eval_loss": 0.6807547807693481, "eval_runtime": 2.6014, "eval_samples_per_second": 6.535, "eval_steps_per_second": 1.922, "eval_wer": 0.3111111111111111, "step": 495 }, { "epoch": 55.56, "learning_rate": 0.0001393103448275862, "loss": 0.3955, "step": 500 }, { "epoch": 56.0, "eval_loss": 0.6001608967781067, "eval_runtime": 2.629, "eval_samples_per_second": 6.466, "eval_steps_per_second": 1.902, "eval_wer": 0.31333333333333335, "step": 504 }, { "epoch": 56.67, "learning_rate": 0.0001358620689655172, "loss": 0.3877, "step": 510 }, { "epoch": 57.0, "eval_loss": 0.7593300938606262, "eval_runtime": 2.6173, "eval_samples_per_second": 6.495, "eval_steps_per_second": 1.91, "eval_wer": 0.3055555555555556, "step": 513 }, { "epoch": 57.78, "learning_rate": 0.00013241379310344827, "loss": 0.3486, "step": 520 }, { "epoch": 58.0, "eval_loss": 0.6764330863952637, "eval_runtime": 2.646, "eval_samples_per_second": 6.425, "eval_steps_per_second": 1.89, "eval_wer": 0.3188888888888889, "step": 522 }, { "epoch": 58.89, "learning_rate": 0.0001289655172413793, "loss": 0.3782, "step": 530 }, { "epoch": 59.0, "eval_loss": 0.6771883368492126, "eval_runtime": 2.6301, "eval_samples_per_second": 6.464, "eval_steps_per_second": 1.901, "eval_wer": 0.31333333333333335, "step": 531 }, { "epoch": 60.0, "learning_rate": 0.00012551724137931035, "loss": 0.3599, "step": 540 }, { "epoch": 60.0, "eval_loss": 0.8846335411071777, "eval_runtime": 2.615, "eval_samples_per_second": 6.501, "eval_steps_per_second": 1.912, "eval_wer": 0.3111111111111111, "step": 540 }, { "epoch": 61.0, "eval_loss": 0.945833683013916, "eval_runtime": 2.5924, "eval_samples_per_second": 6.558, "eval_steps_per_second": 1.929, "eval_wer": 0.3233333333333333, "step": 549 }, { "epoch": 61.11, "learning_rate": 0.00012206896551724136, "loss": 0.3424, "step": 550 }, { "epoch": 62.0, "eval_loss": 0.8398585319519043, "eval_runtime": 2.6035, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.921, "eval_wer": 0.3233333333333333, "step": 558 }, { "epoch": 62.22, "learning_rate": 0.0001186206896551724, "loss": 0.3652, "step": 560 }, { "epoch": 63.0, "eval_loss": 0.8266441822052002, "eval_runtime": 2.6064, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.918, "eval_wer": 0.31333333333333335, "step": 567 }, { "epoch": 63.33, "learning_rate": 0.00011517241379310343, "loss": 0.3327, "step": 570 }, { "epoch": 64.0, "eval_loss": 0.7813167572021484, "eval_runtime": 2.6355, "eval_samples_per_second": 6.45, "eval_steps_per_second": 1.897, "eval_wer": 0.30777777777777776, "step": 576 }, { "epoch": 64.44, "learning_rate": 0.00011172413793103448, "loss": 0.3603, "step": 580 }, { "epoch": 65.0, "eval_loss": 0.8066079616546631, "eval_runtime": 2.5997, "eval_samples_per_second": 6.539, "eval_steps_per_second": 1.923, "eval_wer": 0.31555555555555553, "step": 585 }, { "epoch": 65.56, "learning_rate": 0.0001082758620689655, "loss": 0.3401, "step": 590 }, { "epoch": 66.0, "eval_loss": 0.7959613800048828, "eval_runtime": 2.6015, "eval_samples_per_second": 6.535, "eval_steps_per_second": 1.922, "eval_wer": 0.30666666666666664, "step": 594 }, { "epoch": 66.67, "learning_rate": 0.00010482758620689655, "loss": 0.3797, "step": 600 }, { "epoch": 67.0, "eval_loss": 0.8513309955596924, "eval_runtime": 2.6257, "eval_samples_per_second": 6.474, "eval_steps_per_second": 1.904, "eval_wer": 0.29888888888888887, "step": 603 }, { "epoch": 67.78, "learning_rate": 0.00010137931034482757, "loss": 0.3353, "step": 610 }, { "epoch": 68.0, "eval_loss": 0.8319157361984253, "eval_runtime": 2.5993, "eval_samples_per_second": 6.54, "eval_steps_per_second": 1.924, "eval_wer": 0.2722222222222222, "step": 612 }, { "epoch": 68.89, "learning_rate": 9.793103448275862e-05, "loss": 0.3909, "step": 620 }, { "epoch": 69.0, "eval_loss": 0.8244246244430542, "eval_runtime": 2.6313, "eval_samples_per_second": 6.461, "eval_steps_per_second": 1.9, "eval_wer": 0.2877777777777778, "step": 621 }, { "epoch": 70.0, "learning_rate": 9.448275862068964e-05, "loss": 0.3263, "step": 630 }, { "epoch": 70.0, "eval_loss": 0.9538501501083374, "eval_runtime": 2.6379, "eval_samples_per_second": 6.445, "eval_steps_per_second": 1.895, "eval_wer": 0.3022222222222222, "step": 630 }, { "epoch": 71.0, "eval_loss": 1.002958059310913, "eval_runtime": 2.6816, "eval_samples_per_second": 6.34, "eval_steps_per_second": 1.865, "eval_wer": 0.2922222222222222, "step": 639 }, { "epoch": 71.11, "learning_rate": 9.103448275862069e-05, "loss": 0.3102, "step": 640 }, { "epoch": 72.0, "eval_loss": 0.9874680638313293, "eval_runtime": 2.5977, "eval_samples_per_second": 6.544, "eval_steps_per_second": 1.925, "eval_wer": 0.30444444444444446, "step": 648 }, { "epoch": 72.22, "learning_rate": 8.758620689655171e-05, "loss": 0.3577, "step": 650 }, { "epoch": 73.0, "eval_loss": 0.9030035138130188, "eval_runtime": 2.6088, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.917, "eval_wer": 0.29777777777777775, "step": 657 }, { "epoch": 73.33, "learning_rate": 8.413793103448276e-05, "loss": 0.2953, "step": 660 }, { "epoch": 74.0, "eval_loss": 0.9392061233520508, "eval_runtime": 2.5966, "eval_samples_per_second": 6.547, "eval_steps_per_second": 1.926, "eval_wer": 0.28888888888888886, "step": 666 }, { "epoch": 74.44, "learning_rate": 8.068965517241378e-05, "loss": 0.3644, "step": 670 }, { "epoch": 75.0, "eval_loss": 0.9088665843009949, "eval_runtime": 2.6416, "eval_samples_per_second": 6.436, "eval_steps_per_second": 1.893, "eval_wer": 0.2877777777777778, "step": 675 }, { "epoch": 75.56, "learning_rate": 7.724137931034483e-05, "loss": 0.3231, "step": 680 }, { "epoch": 76.0, "eval_loss": 0.926447868347168, "eval_runtime": 2.61, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.916, "eval_wer": 0.28444444444444444, "step": 684 }, { "epoch": 76.67, "learning_rate": 7.379310344827585e-05, "loss": 0.3078, "step": 690 }, { "epoch": 77.0, "eval_loss": 1.0535814762115479, "eval_runtime": 2.597, "eval_samples_per_second": 6.546, "eval_steps_per_second": 1.925, "eval_wer": 0.2911111111111111, "step": 693 }, { "epoch": 77.78, "learning_rate": 7.034482758620688e-05, "loss": 0.4503, "step": 700 }, { "epoch": 78.0, "eval_loss": 0.9473001956939697, "eval_runtime": 2.6482, "eval_samples_per_second": 6.42, "eval_steps_per_second": 1.888, "eval_wer": 0.2966666666666667, "step": 702 }, { "epoch": 78.89, "learning_rate": 6.689655172413792e-05, "loss": 0.3492, "step": 710 }, { "epoch": 79.0, "eval_loss": 0.8909215331077576, "eval_runtime": 2.6344, "eval_samples_per_second": 6.453, "eval_steps_per_second": 1.898, "eval_wer": 0.3088888888888889, "step": 711 }, { "epoch": 80.0, "learning_rate": 6.344827586206895e-05, "loss": 0.347, "step": 720 }, { "epoch": 80.0, "eval_loss": 0.8532315492630005, "eval_runtime": 2.6434, "eval_samples_per_second": 6.431, "eval_steps_per_second": 1.891, "eval_wer": 0.30666666666666664, "step": 720 }, { "epoch": 81.0, "eval_loss": 0.9552628993988037, "eval_runtime": 2.651, "eval_samples_per_second": 6.413, "eval_steps_per_second": 1.886, "eval_wer": 0.2833333333333333, "step": 729 }, { "epoch": 81.11, "learning_rate": 5.9999999999999995e-05, "loss": 0.2949, "step": 730 }, { "epoch": 82.0, "eval_loss": 1.0111045837402344, "eval_runtime": 2.6252, "eval_samples_per_second": 6.476, "eval_steps_per_second": 1.905, "eval_wer": 0.2866666666666667, "step": 738 }, { "epoch": 82.22, "learning_rate": 5.655172413793103e-05, "loss": 0.3447, "step": 740 }, { "epoch": 83.0, "eval_loss": 0.9160183072090149, "eval_runtime": 2.6434, "eval_samples_per_second": 6.431, "eval_steps_per_second": 1.892, "eval_wer": 0.3011111111111111, "step": 747 }, { "epoch": 83.33, "learning_rate": 5.3103448275862065e-05, "loss": 0.2878, "step": 750 }, { "epoch": 84.0, "eval_loss": 0.8401439785957336, "eval_runtime": 2.7557, "eval_samples_per_second": 6.169, "eval_steps_per_second": 1.814, "eval_wer": 0.29888888888888887, "step": 756 }, { "epoch": 84.44, "learning_rate": 4.96551724137931e-05, "loss": 0.3229, "step": 760 }, { "epoch": 85.0, "eval_loss": 0.8815315961837769, "eval_runtime": 2.6105, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.915, "eval_wer": 0.2911111111111111, "step": 765 }, { "epoch": 85.56, "learning_rate": 4.6206896551724135e-05, "loss": 0.276, "step": 770 }, { "epoch": 86.0, "eval_loss": 0.8801678419113159, "eval_runtime": 2.6513, "eval_samples_per_second": 6.412, "eval_steps_per_second": 1.886, "eval_wer": 0.2911111111111111, "step": 774 }, { "epoch": 86.67, "learning_rate": 4.275862068965517e-05, "loss": 0.3469, "step": 780 }, { "epoch": 87.0, "eval_loss": 0.9121254682540894, "eval_runtime": 2.6394, "eval_samples_per_second": 6.441, "eval_steps_per_second": 1.894, "eval_wer": 0.29, "step": 783 }, { "epoch": 87.78, "learning_rate": 3.9310344827586205e-05, "loss": 0.3044, "step": 790 }, { "epoch": 88.0, "eval_loss": 0.8933728337287903, "eval_runtime": 2.6251, "eval_samples_per_second": 6.476, "eval_steps_per_second": 1.905, "eval_wer": 0.29333333333333333, "step": 792 }, { "epoch": 88.89, "learning_rate": 3.586206896551724e-05, "loss": 0.2885, "step": 800 }, { "epoch": 89.0, "eval_loss": 0.8806433081626892, "eval_runtime": 2.618, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.91, "eval_wer": 0.2966666666666667, "step": 801 }, { "epoch": 90.0, "learning_rate": 3.2413793103448275e-05, "loss": 0.3365, "step": 810 }, { "epoch": 90.0, "eval_loss": 0.9036632180213928, "eval_runtime": 2.654, "eval_samples_per_second": 6.406, "eval_steps_per_second": 1.884, "eval_wer": 0.28444444444444444, "step": 810 }, { "epoch": 91.0, "eval_loss": 0.9217983484268188, "eval_runtime": 2.636, "eval_samples_per_second": 6.449, "eval_steps_per_second": 1.897, "eval_wer": 0.2866666666666667, "step": 819 }, { "epoch": 91.11, "learning_rate": 2.8965517241379307e-05, "loss": 0.3239, "step": 820 }, { "epoch": 92.0, "eval_loss": 0.9227918386459351, "eval_runtime": 2.6278, "eval_samples_per_second": 6.469, "eval_steps_per_second": 1.903, "eval_wer": 0.28444444444444444, "step": 828 }, { "epoch": 92.22, "learning_rate": 2.551724137931034e-05, "loss": 0.3219, "step": 830 }, { "epoch": 93.0, "eval_loss": 0.9166781902313232, "eval_runtime": 2.652, "eval_samples_per_second": 6.41, "eval_steps_per_second": 1.885, "eval_wer": 0.28444444444444444, "step": 837 }, { "epoch": 93.33, "learning_rate": 2.2068965517241377e-05, "loss": 0.2736, "step": 840 }, { "epoch": 94.0, "eval_loss": 0.9494596123695374, "eval_runtime": 2.6316, "eval_samples_per_second": 6.46, "eval_steps_per_second": 1.9, "eval_wer": 0.2877777777777778, "step": 846 }, { "epoch": 94.44, "learning_rate": 1.862068965517241e-05, "loss": 0.3587, "step": 850 }, { "epoch": 95.0, "eval_loss": 0.9997347593307495, "eval_runtime": 2.657, "eval_samples_per_second": 6.398, "eval_steps_per_second": 1.882, "eval_wer": 0.28444444444444444, "step": 855 }, { "epoch": 95.56, "learning_rate": 1.5172413793103447e-05, "loss": 0.3386, "step": 860 }, { "epoch": 96.0, "eval_loss": 0.9977498650550842, "eval_runtime": 2.674, "eval_samples_per_second": 6.358, "eval_steps_per_second": 1.87, "eval_wer": 0.28555555555555556, "step": 864 }, { "epoch": 96.67, "learning_rate": 1.1724137931034482e-05, "loss": 0.2895, "step": 870 }, { "epoch": 97.0, "eval_loss": 0.9963868260383606, "eval_runtime": 2.6883, "eval_samples_per_second": 6.324, "eval_steps_per_second": 1.86, "eval_wer": 0.28888888888888886, "step": 873 }, { "epoch": 97.78, "learning_rate": 8.275862068965517e-06, "loss": 0.3496, "step": 880 }, { "epoch": 98.0, "eval_loss": 0.9764975309371948, "eval_runtime": 2.6679, "eval_samples_per_second": 6.372, "eval_steps_per_second": 1.874, "eval_wer": 0.28888888888888886, "step": 882 }, { "epoch": 98.89, "learning_rate": 4.827586206896552e-06, "loss": 0.2789, "step": 890 }, { "epoch": 99.0, "eval_loss": 0.9712777733802795, "eval_runtime": 2.6523, "eval_samples_per_second": 6.41, "eval_steps_per_second": 1.885, "eval_wer": 0.2877777777777778, "step": 891 }, { "epoch": 100.0, "learning_rate": 1.379310344827586e-06, "loss": 0.3284, "step": 900 }, { "epoch": 100.0, "eval_loss": 0.9687079191207886, "eval_runtime": 2.6458, "eval_samples_per_second": 6.425, "eval_steps_per_second": 1.89, "eval_wer": 0.28888888888888886, "step": 900 }, { "epoch": 100.0, "step": 900, "total_flos": 1.389987159899058e+18, "train_loss": 0.9807351756095887, "train_runtime": 3338.9131, "train_samples_per_second": 4.223, "train_steps_per_second": 0.27 } ], "max_steps": 900, "num_train_epochs": 100, "total_flos": 1.389987159899058e+18, "trial_name": null, "trial_params": null }