{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.99916317991632, "global_step": 11940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 0.000194, "loss": 4.6647, "step": 100 }, { "epoch": 0.33, "learning_rate": 0.00039400000000000004, "loss": 3.2495, "step": 200 }, { "epoch": 0.5, "learning_rate": 0.000594, "loss": 2.8044, "step": 300 }, { "epoch": 0.67, "learning_rate": 0.0007940000000000001, "loss": 2.6636, "step": 400 }, { "epoch": 0.84, "learning_rate": 0.000994, "loss": 2.6638, "step": 500 }, { "epoch": 0.84, "eval_loss": 2.3851921558380127, "eval_runtime": 415.3375, "eval_samples_per_second": 25.011, "eval_steps_per_second": 3.128, "eval_wer": 0.9974286401391124, "step": 500 }, { "epoch": 1.01, "learning_rate": 0.0009822669104204754, "loss": 2.6437, "step": 600 }, { "epoch": 1.17, "learning_rate": 0.0009639853747714809, "loss": 2.6394, "step": 700 }, { "epoch": 1.34, "learning_rate": 0.0009457038391224862, "loss": 2.6303, "step": 800 }, { "epoch": 1.51, "learning_rate": 0.0009274223034734918, "loss": 2.6351, "step": 900 }, { "epoch": 1.67, "learning_rate": 0.0009091407678244972, "loss": 2.6578, "step": 1000 }, { "epoch": 1.67, "eval_loss": 2.2796220779418945, "eval_runtime": 416.0072, "eval_samples_per_second": 24.971, "eval_steps_per_second": 3.123, "eval_wer": 0.9970586603030135, "step": 1000 }, { "epoch": 1.84, "learning_rate": 0.0008908592321755028, "loss": 2.6462, "step": 1100 }, { "epoch": 2.01, "learning_rate": 0.0008725776965265082, "loss": 2.6498, "step": 1200 }, { "epoch": 2.18, "learning_rate": 0.0008542961608775137, "loss": 2.614, "step": 1300 }, { "epoch": 2.34, "learning_rate": 0.0008360146252285192, "loss": 2.6209, "step": 1400 }, { "epoch": 2.51, "learning_rate": 0.0008177330895795247, "loss": 2.6016, "step": 1500 }, { "epoch": 2.51, "eval_loss": 2.004575252532959, "eval_runtime": 417.037, "eval_samples_per_second": 24.909, "eval_steps_per_second": 3.115, "eval_wer": 0.9960782137373513, "step": 1500 }, { "epoch": 2.68, "learning_rate": 0.0007994515539305302, "loss": 2.5937, "step": 1600 }, { "epoch": 2.85, "learning_rate": 0.0007811700182815357, "loss": 2.5909, "step": 1700 }, { "epoch": 3.02, "learning_rate": 0.0007628884826325412, "loss": 2.5913, "step": 1800 }, { "epoch": 3.18, "learning_rate": 0.0007446069469835466, "loss": 2.5828, "step": 1900 }, { "epoch": 3.35, "learning_rate": 0.0007263254113345521, "loss": 2.5752, "step": 2000 }, { "epoch": 3.35, "eval_loss": 1.960595726966858, "eval_runtime": 416.9162, "eval_samples_per_second": 24.916, "eval_steps_per_second": 3.116, "eval_wer": 0.9961152117209612, "step": 2000 }, { "epoch": 3.52, "learning_rate": 0.0007080438756855576, "loss": 2.5663, "step": 2100 }, { "epoch": 3.68, "learning_rate": 0.0006897623400365632, "loss": 2.5729, "step": 2200 }, { "epoch": 3.85, "learning_rate": 0.0006714808043875685, "loss": 2.5767, "step": 2300 }, { "epoch": 4.02, "learning_rate": 0.0006531992687385741, "loss": 2.5661, "step": 2400 }, { "epoch": 4.19, "learning_rate": 0.0006349177330895795, "loss": 2.539, "step": 2500 }, { "epoch": 4.19, "eval_loss": 1.8835679292678833, "eval_runtime": 417.6452, "eval_samples_per_second": 24.873, "eval_steps_per_second": 3.11, "eval_wer": 0.9939693286715874, "step": 2500 }, { "epoch": 4.35, "learning_rate": 0.000616636197440585, "loss": 2.5237, "step": 2600 }, { "epoch": 4.52, "learning_rate": 0.0005983546617915904, "loss": 2.5464, "step": 2700 }, { "epoch": 4.69, "learning_rate": 0.000580073126142596, "loss": 2.5135, "step": 2800 }, { "epoch": 4.86, "learning_rate": 0.0005617915904936015, "loss": 2.5058, "step": 2900 }, { "epoch": 5.03, "learning_rate": 0.0005435100548446069, "loss": 2.5214, "step": 3000 }, { "epoch": 5.03, "eval_loss": 1.859293818473816, "eval_runtime": 418.88, "eval_samples_per_second": 24.799, "eval_steps_per_second": 3.101, "eval_wer": 0.9933033649666093, "step": 3000 }, { "epoch": 5.19, "learning_rate": 0.0005252285191956125, "loss": 2.4984, "step": 3100 }, { "epoch": 5.36, "learning_rate": 0.0005069469835466179, "loss": 2.4812, "step": 3200 }, { "epoch": 5.53, "learning_rate": 0.0004886654478976234, "loss": 2.4626, "step": 3300 }, { "epoch": 5.69, "learning_rate": 0.0004703839122486289, "loss": 2.476, "step": 3400 }, { "epoch": 5.86, "learning_rate": 0.00045210237659963436, "loss": 2.4684, "step": 3500 }, { "epoch": 5.86, "eval_loss": 1.781636357307434, "eval_runtime": 415.6525, "eval_samples_per_second": 24.992, "eval_steps_per_second": 3.125, "eval_wer": 0.9884566291137133, "step": 3500 }, { "epoch": 6.03, "learning_rate": 0.0004338208409506398, "loss": 2.4739, "step": 3600 }, { "epoch": 6.2, "learning_rate": 0.0004155393053016453, "loss": 2.4494, "step": 3700 }, { "epoch": 6.36, "learning_rate": 0.00039725776965265084, "loss": 2.4263, "step": 3800 }, { "epoch": 6.53, "learning_rate": 0.00037897623400365635, "loss": 2.4187, "step": 3900 }, { "epoch": 6.7, "learning_rate": 0.0003606946983546618, "loss": 2.4134, "step": 4000 }, { "epoch": 6.7, "eval_loss": 1.7167690992355347, "eval_runtime": 416.8699, "eval_samples_per_second": 24.919, "eval_steps_per_second": 3.116, "eval_wer": 0.9808165454982704, "step": 4000 }, { "epoch": 6.87, "learning_rate": 0.0003424131627056673, "loss": 2.4008, "step": 4100 }, { "epoch": 7.04, "learning_rate": 0.00032413162705667277, "loss": 2.4048, "step": 4200 }, { "epoch": 7.2, "learning_rate": 0.0003058500914076783, "loss": 2.3795, "step": 4300 }, { "epoch": 7.37, "learning_rate": 0.00028756855575868374, "loss": 2.3803, "step": 4400 }, { "epoch": 7.54, "learning_rate": 0.0002692870201096892, "loss": 2.3732, "step": 4500 }, { "epoch": 7.54, "eval_loss": 1.6406092643737793, "eval_runtime": 415.1084, "eval_samples_per_second": 25.025, "eval_steps_per_second": 3.129, "eval_wer": 0.976432284440498, "step": 4500 }, { "epoch": 7.7, "learning_rate": 0.0002510054844606947, "loss": 2.3657, "step": 4600 }, { "epoch": 7.87, "learning_rate": 0.0002327239488117002, "loss": 2.3565, "step": 4700 }, { "epoch": 8.04, "learning_rate": 0.00021462522851919562, "loss": 2.3679, "step": 4800 }, { "epoch": 8.21, "learning_rate": 0.0001963436928702011, "loss": 2.34, "step": 4900 }, { "epoch": 8.37, "learning_rate": 0.00017806215722120658, "loss": 2.3371, "step": 5000 }, { "epoch": 8.37, "eval_loss": 1.6087424755096436, "eval_runtime": 417.7716, "eval_samples_per_second": 24.865, "eval_steps_per_second": 3.109, "eval_wer": 0.9739349205468302, "step": 5000 }, { "epoch": 8.54, "learning_rate": 0.00015978062157221207, "loss": 2.3216, "step": 5100 }, { "epoch": 8.71, "learning_rate": 0.00014149908592321755, "loss": 2.3004, "step": 5200 }, { "epoch": 8.88, "learning_rate": 0.00012321755027422303, "loss": 2.3028, "step": 5300 }, { "epoch": 9.05, "learning_rate": 0.00010493601462522852, "loss": 2.3099, "step": 5400 }, { "epoch": 9.21, "learning_rate": 8.6654478976234e-05, "loss": 2.2824, "step": 5500 }, { "epoch": 9.21, "eval_loss": 1.5476473569869995, "eval_runtime": 417.8751, "eval_samples_per_second": 24.859, "eval_steps_per_second": 3.109, "eval_wer": 0.9695691584808628, "step": 5500 }, { "epoch": 9.38, "learning_rate": 0.0005545454545454546, "loss": 2.3577, "step": 5600 }, { "epoch": 9.55, "learning_rate": 0.0005458041958041959, "loss": 2.3723, "step": 5700 }, { "epoch": 9.71, "learning_rate": 0.000537062937062937, "loss": 2.3758, "step": 5800 }, { "epoch": 9.88, "learning_rate": 0.0005283216783216783, "loss": 2.3833, "step": 5900 }, { "epoch": 10.05, "learning_rate": 0.0005195804195804196, "loss": 2.3771, "step": 6000 }, { "epoch": 10.05, "eval_loss": 1.6468309164047241, "eval_runtime": 414.4359, "eval_samples_per_second": 25.065, "eval_steps_per_second": 3.134, "eval_wer": 0.9773017370553305, "step": 6000 }, { "epoch": 10.22, "learning_rate": 0.0005108391608391608, "loss": 2.3673, "step": 6100 }, { "epoch": 10.38, "learning_rate": 0.0005020979020979021, "loss": 2.3555, "step": 6200 }, { "epoch": 10.55, "learning_rate": 0.0004933566433566434, "loss": 2.3645, "step": 6300 }, { "epoch": 10.72, "learning_rate": 0.0004846153846153846, "loss": 2.3617, "step": 6400 }, { "epoch": 10.89, "learning_rate": 0.0004758741258741259, "loss": 2.3499, "step": 6500 }, { "epoch": 10.89, "eval_loss": 1.6116454601287842, "eval_runtime": 413.4109, "eval_samples_per_second": 25.128, "eval_steps_per_second": 3.142, "eval_wer": 0.9737314316369757, "step": 6500 }, { "epoch": 11.06, "learning_rate": 0.0004671328671328671, "loss": 2.3634, "step": 6600 }, { "epoch": 11.22, "learning_rate": 0.0004583916083916084, "loss": 2.3573, "step": 6700 }, { "epoch": 11.39, "learning_rate": 0.0004496503496503497, "loss": 2.355, "step": 6800 }, { "epoch": 11.56, "learning_rate": 0.00044090909090909093, "loss": 2.3543, "step": 6900 }, { "epoch": 11.72, "learning_rate": 0.00043216783216783216, "loss": 2.3283, "step": 7000 }, { "epoch": 11.72, "eval_loss": 1.6059322357177734, "eval_runtime": 409.6633, "eval_samples_per_second": 25.357, "eval_steps_per_second": 3.171, "eval_wer": 0.9743973953419539, "step": 7000 }, { "epoch": 11.89, "learning_rate": 0.00042342657342657344, "loss": 2.3277, "step": 7100 }, { "epoch": 12.06, "learning_rate": 0.0004146853146853147, "loss": 2.3361, "step": 7200 }, { "epoch": 12.23, "learning_rate": 0.00040594405594405596, "loss": 2.3199, "step": 7300 }, { "epoch": 12.39, "learning_rate": 0.00039720279720279725, "loss": 2.3216, "step": 7400 }, { "epoch": 12.56, "learning_rate": 0.0003884615384615385, "loss": 2.3153, "step": 7500 }, { "epoch": 12.56, "eval_loss": 1.5888867378234863, "eval_runtime": 416.4655, "eval_samples_per_second": 24.943, "eval_steps_per_second": 3.119, "eval_wer": 0.9758218177109348, "step": 7500 }, { "epoch": 12.73, "learning_rate": 0.0003797202797202797, "loss": 2.3192, "step": 7600 }, { "epoch": 12.9, "learning_rate": 0.000370979020979021, "loss": 2.3053, "step": 7700 }, { "epoch": 13.07, "learning_rate": 0.00036223776223776223, "loss": 2.3185, "step": 7800 }, { "epoch": 13.23, "learning_rate": 0.0003534965034965035, "loss": 2.3101, "step": 7900 }, { "epoch": 13.4, "learning_rate": 0.0003447552447552448, "loss": 2.3016, "step": 8000 }, { "epoch": 13.4, "eval_loss": 1.5663487911224365, "eval_runtime": 415.4876, "eval_samples_per_second": 25.002, "eval_steps_per_second": 3.126, "eval_wer": 0.9727509850713136, "step": 8000 }, { "epoch": 13.57, "learning_rate": 0.00033601398601398603, "loss": 2.2916, "step": 8100 }, { "epoch": 13.73, "learning_rate": 0.00032727272727272726, "loss": 2.2904, "step": 8200 }, { "epoch": 13.9, "learning_rate": 0.00031853146853146855, "loss": 2.2708, "step": 8300 }, { "epoch": 14.07, "learning_rate": 0.0003097902097902098, "loss": 2.2876, "step": 8400 }, { "epoch": 14.24, "learning_rate": 0.00030104895104895107, "loss": 2.2731, "step": 8500 }, { "epoch": 14.24, "eval_loss": 1.567448377609253, "eval_runtime": 407.9046, "eval_samples_per_second": 25.467, "eval_steps_per_second": 3.185, "eval_wer": 0.9626135375622029, "step": 8500 }, { "epoch": 14.41, "learning_rate": 0.00029230769230769235, "loss": 2.2482, "step": 8600 }, { "epoch": 14.57, "learning_rate": 0.0002835664335664336, "loss": 2.2559, "step": 8700 }, { "epoch": 14.74, "learning_rate": 0.0002748251748251748, "loss": 2.2726, "step": 8800 }, { "epoch": 14.91, "learning_rate": 0.0002660839160839161, "loss": 2.2508, "step": 8900 }, { "epoch": 15.08, "learning_rate": 0.00025743006993006993, "loss": 2.2617, "step": 9000 }, { "epoch": 15.08, "eval_loss": 1.5032401084899902, "eval_runtime": 409.6274, "eval_samples_per_second": 25.36, "eval_steps_per_second": 3.171, "eval_wer": 0.9583402704552602, "step": 9000 }, { "epoch": 15.24, "learning_rate": 0.0002486888111888112, "loss": 2.2396, "step": 9100 }, { "epoch": 15.41, "learning_rate": 0.00023994755244755245, "loss": 2.2448, "step": 9200 }, { "epoch": 15.58, "learning_rate": 0.0002312062937062937, "loss": 2.225, "step": 9300 }, { "epoch": 15.74, "learning_rate": 0.00022246503496503497, "loss": 2.2319, "step": 9400 }, { "epoch": 15.91, "learning_rate": 0.00021372377622377623, "loss": 2.2252, "step": 9500 }, { "epoch": 15.91, "eval_loss": 1.466213345527649, "eval_runtime": 409.5049, "eval_samples_per_second": 25.367, "eval_steps_per_second": 3.172, "eval_wer": 0.9516436354218695, "step": 9500 }, { "epoch": 16.08, "learning_rate": 0.00020498251748251749, "loss": 2.2324, "step": 9600 }, { "epoch": 16.25, "learning_rate": 0.00019624125874125875, "loss": 2.2197, "step": 9700 }, { "epoch": 16.42, "learning_rate": 0.0001875, "loss": 2.2061, "step": 9800 }, { "epoch": 16.58, "learning_rate": 0.00017875874125874126, "loss": 2.2062, "step": 9900 }, { "epoch": 16.75, "learning_rate": 0.00017001748251748252, "loss": 2.2048, "step": 10000 }, { "epoch": 16.75, "eval_loss": 1.4410929679870605, "eval_runtime": 408.38, "eval_samples_per_second": 25.437, "eval_steps_per_second": 3.181, "eval_wer": 0.9561018924468616, "step": 10000 }, { "epoch": 16.92, "learning_rate": 0.00016127622377622378, "loss": 2.1942, "step": 10100 }, { "epoch": 17.09, "learning_rate": 0.00015253496503496504, "loss": 2.2158, "step": 10200 }, { "epoch": 17.25, "learning_rate": 0.0001437937062937063, "loss": 2.1851, "step": 10300 }, { "epoch": 17.42, "learning_rate": 0.00013505244755244756, "loss": 2.1798, "step": 10400 }, { "epoch": 17.59, "learning_rate": 0.00012631118881118882, "loss": 2.1731, "step": 10500 }, { "epoch": 17.59, "eval_loss": 1.422843337059021, "eval_runtime": 412.7138, "eval_samples_per_second": 25.17, "eval_steps_per_second": 3.147, "eval_wer": 0.9521061102169932, "step": 10500 }, { "epoch": 17.75, "learning_rate": 0.00011756993006993007, "loss": 2.1736, "step": 10600 }, { "epoch": 17.92, "learning_rate": 0.00010882867132867133, "loss": 2.182, "step": 10700 }, { "epoch": 18.09, "learning_rate": 0.00010008741258741259, "loss": 2.1741, "step": 10800 }, { "epoch": 18.26, "learning_rate": 9.134615384615384e-05, "loss": 2.1636, "step": 10900 }, { "epoch": 18.43, "learning_rate": 8.260489510489511e-05, "loss": 2.1732, "step": 11000 }, { "epoch": 18.43, "eval_loss": 1.4052633047103882, "eval_runtime": 410.158, "eval_samples_per_second": 25.327, "eval_steps_per_second": 3.167, "eval_wer": 0.9428566143145198, "step": 11000 }, { "epoch": 18.59, "learning_rate": 7.386363636363637e-05, "loss": 2.1666, "step": 11100 }, { "epoch": 18.76, "learning_rate": 6.512237762237761e-05, "loss": 2.1612, "step": 11200 }, { "epoch": 18.93, "learning_rate": 5.638111888111888e-05, "loss": 2.1616, "step": 11300 }, { "epoch": 19.1, "learning_rate": 4.763986013986014e-05, "loss": 2.1752, "step": 11400 }, { "epoch": 19.26, "learning_rate": 3.88986013986014e-05, "loss": 2.1502, "step": 11500 }, { "epoch": 19.26, "eval_loss": 1.3827834129333496, "eval_runtime": 410.5205, "eval_samples_per_second": 25.304, "eval_steps_per_second": 3.164, "eval_wer": 0.9399522726011432, "step": 11500 }, { "epoch": 19.43, "learning_rate": 3.0157342657342658e-05, "loss": 2.1506, "step": 11600 }, { "epoch": 19.6, "learning_rate": 2.1416083916083917e-05, "loss": 2.1489, "step": 11700 }, { "epoch": 19.76, "learning_rate": 1.2674825174825174e-05, "loss": 2.1472, "step": 11800 }, { "epoch": 19.93, "learning_rate": 3.933566433566434e-06, "loss": 2.1453, "step": 11900 }, { "epoch": 20.0, "step": 11940, "total_flos": 2.027398301943103e+20, "train_loss": 1.2195568717304786, "train_runtime": 26512.2441, "train_samples_per_second": 28.824, "train_steps_per_second": 0.45 } ], "max_steps": 11940, "num_train_epochs": 20, "total_flos": 2.027398301943103e+20, "trial_name": null, "trial_params": null }