{ "best_metric": 25.731181077193543, "best_model_checkpoint": "/workspace/whisper/pretrain_base/checkpoint-20000", "epoch": 9.391727493917275, "global_step": 32880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.425e-05, "loss": 1.3775, "step": 100 }, { "epoch": 0.06, "learning_rate": 4.9250000000000004e-05, "loss": 0.7466, "step": 200 }, { "epoch": 0.09, "learning_rate": 7.425e-05, "loss": 0.7189, "step": 300 }, { "epoch": 0.12, "learning_rate": 9.925000000000001e-05, "loss": 0.7172, "step": 400 }, { "epoch": 0.15, "learning_rate": 0.00012425, "loss": 0.73, "step": 500 }, { "epoch": 0.18, "learning_rate": 0.00014925, "loss": 0.7847, "step": 600 }, { "epoch": 0.21, "learning_rate": 0.00017424999999999998, "loss": 0.7678, "step": 700 }, { "epoch": 0.24, "learning_rate": 0.00019925000000000002, "loss": 0.794, "step": 800 }, { "epoch": 0.27, "learning_rate": 0.00022425, "loss": 0.8324, "step": 900 }, { "epoch": 0.3, "learning_rate": 0.00024925, "loss": 0.8312, "step": 1000 }, { "epoch": 0.33, "learning_rate": 0.00027425, "loss": 0.8452, "step": 1100 }, { "epoch": 0.36, "learning_rate": 0.00029925000000000004, "loss": 0.8678, "step": 1200 }, { "epoch": 0.4, "learning_rate": 0.00032425, "loss": 0.8898, "step": 1300 }, { "epoch": 0.43, "learning_rate": 0.00034925, "loss": 0.8944, "step": 1400 }, { "epoch": 0.46, "learning_rate": 0.00037425, "loss": 0.9126, "step": 1500 }, { "epoch": 0.49, "learning_rate": 0.00039925000000000003, "loss": 0.9184, "step": 1600 }, { "epoch": 0.52, "learning_rate": 0.00042425000000000004, "loss": 0.9372, "step": 1700 }, { "epoch": 0.55, "learning_rate": 0.00044925, "loss": 0.9781, "step": 1800 }, { "epoch": 0.58, "learning_rate": 0.00047425, "loss": 0.9808, "step": 1900 }, { "epoch": 0.61, "learning_rate": 0.00049925, "loss": 1.0286, "step": 2000 }, { "epoch": 0.03, "learning_rate": 0.0004984617875647668, "loss": 0.9408, "step": 2100 }, { "epoch": 0.06, "learning_rate": 0.0004968588082901555, "loss": 0.917, "step": 2200 }, { "epoch": 0.09, "learning_rate": 0.0004952396373056995, "loss": 0.9306, "step": 2300 }, { "epoch": 0.12, "learning_rate": 0.0004936204663212435, "loss": 0.9656, "step": 2400 }, { "epoch": 0.15, "learning_rate": 0.0004920012953367875, "loss": 0.9078, "step": 2500 }, { "epoch": 0.18, "learning_rate": 0.0004903821243523316, "loss": 0.8968, "step": 2600 }, { "epoch": 0.21, "learning_rate": 0.0004887629533678756, "loss": 0.8644, "step": 2700 }, { "epoch": 0.24, "learning_rate": 0.0004871437823834197, "loss": 0.9126, "step": 2800 }, { "epoch": 0.27, "learning_rate": 0.0004855246113989638, "loss": 0.8631, "step": 2900 }, { "epoch": 0.3, "learning_rate": 0.0004839054404145078, "loss": 0.8724, "step": 3000 }, { "epoch": 0.33, "learning_rate": 0.00048228626943005185, "loss": 0.8416, "step": 3100 }, { "epoch": 0.36, "learning_rate": 0.00048066709844559586, "loss": 0.8013, "step": 3200 }, { "epoch": 0.4, "learning_rate": 0.0004790479274611399, "loss": 0.7938, "step": 3300 }, { "epoch": 0.43, "learning_rate": 0.00047742875647668393, "loss": 0.8173, "step": 3400 }, { "epoch": 0.46, "learning_rate": 0.000475809585492228, "loss": 0.8158, "step": 3500 }, { "epoch": 0.49, "learning_rate": 0.000474190414507772, "loss": 0.7718, "step": 3600 }, { "epoch": 0.52, "learning_rate": 0.0004725712435233161, "loss": 0.7731, "step": 3700 }, { "epoch": 0.55, "learning_rate": 0.00047095207253886013, "loss": 0.7525, "step": 3800 }, { "epoch": 0.58, "learning_rate": 0.00046933290155440414, "loss": 0.7354, "step": 3900 }, { "epoch": 0.61, "learning_rate": 0.0004677137305699482, "loss": 0.7298, "step": 4000 }, { "epoch": 0.61, "eval_loss": 0.7294211983680725, "eval_runtime": 497.9031, "eval_samples_per_second": 0.532, "eval_steps_per_second": 0.034, "eval_wer": 62.55393958766182, "step": 4000 }, { "epoch": 0.64, "learning_rate": 0.0004660945595854922, "loss": 0.7686, "step": 4100 }, { "epoch": 0.67, "learning_rate": 0.0004644753886010363, "loss": 0.7231, "step": 4200 }, { "epoch": 0.7, "learning_rate": 0.0004628562176165803, "loss": 0.7244, "step": 4300 }, { "epoch": 0.73, "learning_rate": 0.00046123704663212435, "loss": 0.7147, "step": 4400 }, { "epoch": 0.76, "learning_rate": 0.00045963406735751296, "loss": 0.7183, "step": 4500 }, { "epoch": 0.79, "learning_rate": 0.00045801489637305697, "loss": 0.6829, "step": 4600 }, { "epoch": 0.82, "learning_rate": 0.00045641191709844557, "loss": 0.6598, "step": 4700 }, { "epoch": 0.85, "learning_rate": 0.00045479274611398963, "loss": 0.6828, "step": 4800 }, { "epoch": 0.88, "learning_rate": 0.00045317357512953364, "loss": 0.682, "step": 4900 }, { "epoch": 0.91, "learning_rate": 0.00045155440414507776, "loss": 0.6589, "step": 5000 }, { "epoch": 0.94, "learning_rate": 0.00044993523316062177, "loss": 0.6531, "step": 5100 }, { "epoch": 0.97, "learning_rate": 0.00044831606217616584, "loss": 0.6384, "step": 5200 }, { "epoch": 1.0, "learning_rate": 0.00044669689119170985, "loss": 0.6346, "step": 5300 }, { "epoch": 1.03, "learning_rate": 0.0004450777202072539, "loss": 0.558, "step": 5400 }, { "epoch": 1.06, "learning_rate": 0.0004434585492227979, "loss": 0.5664, "step": 5500 }, { "epoch": 1.09, "learning_rate": 0.000441839378238342, "loss": 0.5653, "step": 5600 }, { "epoch": 1.13, "learning_rate": 0.000440220207253886, "loss": 0.5293, "step": 5700 }, { "epoch": 1.16, "learning_rate": 0.00043860103626943006, "loss": 0.5908, "step": 5800 }, { "epoch": 1.19, "learning_rate": 0.00043698186528497407, "loss": 0.5657, "step": 5900 }, { "epoch": 1.22, "learning_rate": 0.0004353626943005182, "loss": 0.5672, "step": 6000 }, { "epoch": 1.25, "learning_rate": 0.0004337435233160622, "loss": 0.5844, "step": 6100 }, { "epoch": 1.28, "learning_rate": 0.00043212435233160626, "loss": 0.549, "step": 6200 }, { "epoch": 1.31, "learning_rate": 0.00043050518134715027, "loss": 0.556, "step": 6300 }, { "epoch": 1.34, "learning_rate": 0.00042888601036269433, "loss": 0.5631, "step": 6400 }, { "epoch": 1.37, "learning_rate": 0.00042726683937823834, "loss": 0.5604, "step": 6500 }, { "epoch": 1.4, "learning_rate": 0.0004256476683937824, "loss": 0.5169, "step": 6600 }, { "epoch": 1.43, "learning_rate": 0.0004240284974093264, "loss": 0.5497, "step": 6700 }, { "epoch": 1.46, "learning_rate": 0.0004224093264248705, "loss": 0.5315, "step": 6800 }, { "epoch": 1.49, "learning_rate": 0.0004207901554404145, "loss": 0.5453, "step": 6900 }, { "epoch": 1.52, "learning_rate": 0.0004191709844559586, "loss": 0.5372, "step": 7000 }, { "epoch": 1.55, "learning_rate": 0.0004175518134715026, "loss": 0.5304, "step": 7100 }, { "epoch": 1.58, "learning_rate": 0.0004159326424870467, "loss": 0.5226, "step": 7200 }, { "epoch": 1.61, "learning_rate": 0.0004143134715025907, "loss": 0.5347, "step": 7300 }, { "epoch": 1.64, "learning_rate": 0.0004126943005181347, "loss": 0.5393, "step": 7400 }, { "epoch": 1.67, "learning_rate": 0.00041107512953367876, "loss": 0.505, "step": 7500 }, { "epoch": 1.7, "learning_rate": 0.00040945595854922277, "loss": 0.5213, "step": 7600 }, { "epoch": 1.73, "learning_rate": 0.00040783678756476684, "loss": 0.5188, "step": 7700 }, { "epoch": 1.76, "learning_rate": 0.00040621761658031085, "loss": 0.5056, "step": 7800 }, { "epoch": 1.79, "learning_rate": 0.00040459844559585496, "loss": 0.5106, "step": 7900 }, { "epoch": 1.82, "learning_rate": 0.000402979274611399, "loss": 0.5348, "step": 8000 }, { "epoch": 1.82, "eval_loss": 0.5500301718711853, "eval_runtime": 455.7092, "eval_samples_per_second": 0.582, "eval_steps_per_second": 0.037, "eval_wer": 46.73166054019498, "step": 8000 }, { "epoch": 1.86, "learning_rate": 0.00040136010362694304, "loss": 0.5169, "step": 8100 }, { "epoch": 1.89, "learning_rate": 0.00039974093264248705, "loss": 0.5017, "step": 8200 }, { "epoch": 1.92, "learning_rate": 0.0003981217616580311, "loss": 0.5438, "step": 8300 }, { "epoch": 1.95, "learning_rate": 0.0003965025906735751, "loss": 0.4846, "step": 8400 }, { "epoch": 1.98, "learning_rate": 0.0003948834196891192, "loss": 0.5027, "step": 8500 }, { "epoch": 2.01, "learning_rate": 0.0003932642487046632, "loss": 0.4913, "step": 8600 }, { "epoch": 2.04, "learning_rate": 0.0003916612694300518, "loss": 0.393, "step": 8700 }, { "epoch": 2.07, "learning_rate": 0.00039004209844559586, "loss": 0.3817, "step": 8800 }, { "epoch": 2.1, "learning_rate": 0.00038842292746113987, "loss": 0.3933, "step": 8900 }, { "epoch": 2.13, "learning_rate": 0.00038680375647668394, "loss": 0.4169, "step": 9000 }, { "epoch": 2.16, "learning_rate": 0.000385184585492228, "loss": 0.4097, "step": 9100 }, { "epoch": 2.19, "learning_rate": 0.00038356541450777206, "loss": 0.4028, "step": 9200 }, { "epoch": 2.22, "learning_rate": 0.0003819462435233161, "loss": 0.409, "step": 9300 }, { "epoch": 2.25, "learning_rate": 0.00038032707253886014, "loss": 0.4108, "step": 9400 }, { "epoch": 2.28, "learning_rate": 0.00037870790155440415, "loss": 0.4117, "step": 9500 }, { "epoch": 2.31, "learning_rate": 0.0003770887305699482, "loss": 0.3998, "step": 9600 }, { "epoch": 2.34, "learning_rate": 0.0003754695595854922, "loss": 0.4065, "step": 9700 }, { "epoch": 2.37, "learning_rate": 0.0003738503886010363, "loss": 0.4243, "step": 9800 }, { "epoch": 2.4, "learning_rate": 0.0003722312176165803, "loss": 0.4074, "step": 9900 }, { "epoch": 2.43, "learning_rate": 0.00037061204663212436, "loss": 0.4109, "step": 10000 }, { "epoch": 2.46, "learning_rate": 0.0003689928756476684, "loss": 0.4029, "step": 10100 }, { "epoch": 2.49, "learning_rate": 0.0003673737046632125, "loss": 0.388, "step": 10200 }, { "epoch": 2.52, "learning_rate": 0.0003657545336787565, "loss": 0.4309, "step": 10300 }, { "epoch": 2.55, "learning_rate": 0.0003641353626943005, "loss": 0.4171, "step": 10400 }, { "epoch": 2.59, "learning_rate": 0.00036251619170984457, "loss": 0.4115, "step": 10500 }, { "epoch": 2.62, "learning_rate": 0.0003608970207253886, "loss": 0.4077, "step": 10600 }, { "epoch": 2.65, "learning_rate": 0.00035927784974093264, "loss": 0.3923, "step": 10700 }, { "epoch": 2.68, "learning_rate": 0.00035765867875647665, "loss": 0.4002, "step": 10800 }, { "epoch": 2.71, "learning_rate": 0.0003560395077720207, "loss": 0.4078, "step": 10900 }, { "epoch": 2.74, "learning_rate": 0.0003544203367875648, "loss": 0.4, "step": 11000 }, { "epoch": 2.77, "learning_rate": 0.00035280116580310884, "loss": 0.401, "step": 11100 }, { "epoch": 2.8, "learning_rate": 0.00035118199481865285, "loss": 0.3729, "step": 11200 }, { "epoch": 2.83, "learning_rate": 0.0003495628238341969, "loss": 0.402, "step": 11300 }, { "epoch": 2.86, "learning_rate": 0.00034794365284974093, "loss": 0.3933, "step": 11400 }, { "epoch": 2.89, "learning_rate": 0.000346324481865285, "loss": 0.397, "step": 11500 }, { "epoch": 2.92, "learning_rate": 0.000344705310880829, "loss": 0.3834, "step": 11600 }, { "epoch": 2.95, "learning_rate": 0.00034308613989637307, "loss": 0.3744, "step": 11700 }, { "epoch": 2.98, "learning_rate": 0.0003414669689119171, "loss": 0.3905, "step": 11800 }, { "epoch": 3.01, "learning_rate": 0.00033984779792746114, "loss": 0.3626, "step": 11900 }, { "epoch": 3.04, "learning_rate": 0.0003382286269430052, "loss": 0.2828, "step": 12000 }, { "epoch": 3.04, "eval_loss": 0.5047640800476074, "eval_runtime": 460.4394, "eval_samples_per_second": 0.576, "eval_steps_per_second": 0.037, "eval_wer": 34.3455330030366, "step": 12000 }, { "epoch": 3.07, "learning_rate": 0.00033660945595854927, "loss": 0.2933, "step": 12100 }, { "epoch": 3.1, "learning_rate": 0.0003349902849740933, "loss": 0.2943, "step": 12200 }, { "epoch": 3.13, "learning_rate": 0.00033337111398963734, "loss": 0.2911, "step": 12300 }, { "epoch": 3.16, "learning_rate": 0.00033175194300518135, "loss": 0.2928, "step": 12400 }, { "epoch": 3.19, "learning_rate": 0.0003301327720207254, "loss": 0.2983, "step": 12500 }, { "epoch": 3.22, "learning_rate": 0.0003285136010362694, "loss": 0.2919, "step": 12600 }, { "epoch": 3.25, "learning_rate": 0.0003268944300518135, "loss": 0.3078, "step": 12700 }, { "epoch": 3.28, "learning_rate": 0.0003252752590673575, "loss": 0.3201, "step": 12800 }, { "epoch": 3.32, "learning_rate": 0.00032365608808290156, "loss": 0.3012, "step": 12900 }, { "epoch": 3.35, "learning_rate": 0.0003220369170984456, "loss": 0.312, "step": 13000 }, { "epoch": 3.38, "learning_rate": 0.00032041774611398963, "loss": 0.3063, "step": 13100 }, { "epoch": 3.41, "learning_rate": 0.0003187985751295337, "loss": 0.3139, "step": 13200 }, { "epoch": 3.44, "learning_rate": 0.0003171794041450777, "loss": 0.2951, "step": 13300 }, { "epoch": 3.47, "learning_rate": 0.00031556023316062177, "loss": 0.2936, "step": 13400 }, { "epoch": 3.5, "learning_rate": 0.0003139410621761658, "loss": 0.3005, "step": 13500 }, { "epoch": 3.53, "learning_rate": 0.00031232189119170985, "loss": 0.3074, "step": 13600 }, { "epoch": 3.56, "learning_rate": 0.00031070272020725386, "loss": 0.3124, "step": 13700 }, { "epoch": 3.59, "learning_rate": 0.0003090835492227979, "loss": 0.3213, "step": 13800 }, { "epoch": 3.62, "learning_rate": 0.0003074805699481865, "loss": 0.305, "step": 13900 }, { "epoch": 3.65, "learning_rate": 0.00030586139896373053, "loss": 0.3053, "step": 14000 }, { "epoch": 3.68, "learning_rate": 0.00030424222797927465, "loss": 0.2989, "step": 14100 }, { "epoch": 3.71, "learning_rate": 0.00030262305699481866, "loss": 0.3046, "step": 14200 }, { "epoch": 3.74, "learning_rate": 0.0003010038860103627, "loss": 0.2977, "step": 14300 }, { "epoch": 3.77, "learning_rate": 0.00029938471502590673, "loss": 0.3002, "step": 14400 }, { "epoch": 3.8, "learning_rate": 0.0002977655440414508, "loss": 0.318, "step": 14500 }, { "epoch": 3.83, "learning_rate": 0.0002961463730569948, "loss": 0.3133, "step": 14600 }, { "epoch": 3.86, "learning_rate": 0.00029452720207253887, "loss": 0.3196, "step": 14700 }, { "epoch": 3.89, "learning_rate": 0.0002929080310880829, "loss": 0.3131, "step": 14800 }, { "epoch": 3.92, "learning_rate": 0.00029128886010362695, "loss": 0.3102, "step": 14900 }, { "epoch": 3.95, "learning_rate": 0.00028966968911917095, "loss": 0.3007, "step": 15000 }, { "epoch": 3.98, "learning_rate": 0.0002880505181347151, "loss": 0.2975, "step": 15100 }, { "epoch": 4.01, "learning_rate": 0.0002864313471502591, "loss": 0.2461, "step": 15200 }, { "epoch": 4.05, "learning_rate": 0.00028481217616580315, "loss": 0.1983, "step": 15300 }, { "epoch": 4.08, "learning_rate": 0.00028319300518134716, "loss": 0.2076, "step": 15400 }, { "epoch": 4.11, "learning_rate": 0.0002815738341968912, "loss": 0.2054, "step": 15500 }, { "epoch": 4.14, "learning_rate": 0.00027995466321243523, "loss": 0.214, "step": 15600 }, { "epoch": 4.17, "learning_rate": 0.0002783354922279793, "loss": 0.2259, "step": 15700 }, { "epoch": 4.2, "learning_rate": 0.0002767163212435233, "loss": 0.2121, "step": 15800 }, { "epoch": 4.23, "learning_rate": 0.00027509715025906737, "loss": 0.2172, "step": 15900 }, { "epoch": 4.26, "learning_rate": 0.0002734779792746114, "loss": 0.2182, "step": 16000 }, { "epoch": 4.26, "eval_loss": 0.47591984272003174, "eval_runtime": 430.927, "eval_samples_per_second": 0.615, "eval_steps_per_second": 0.039, "eval_wer": 27.457247882371743, "step": 16000 }, { "epoch": 4.29, "learning_rate": 0.0002718588082901555, "loss": 0.213, "step": 16100 }, { "epoch": 4.32, "learning_rate": 0.0002702396373056995, "loss": 0.2187, "step": 16200 }, { "epoch": 4.35, "learning_rate": 0.0002686366580310881, "loss": 0.2357, "step": 16300 }, { "epoch": 4.38, "learning_rate": 0.0002670174870466322, "loss": 0.2168, "step": 16400 }, { "epoch": 4.41, "learning_rate": 0.0002653983160621762, "loss": 0.2099, "step": 16500 }, { "epoch": 4.44, "learning_rate": 0.0002637791450777202, "loss": 0.2252, "step": 16600 }, { "epoch": 4.47, "learning_rate": 0.00026215997409326426, "loss": 0.2218, "step": 16700 }, { "epoch": 4.5, "learning_rate": 0.00026054080310880827, "loss": 0.2186, "step": 16800 }, { "epoch": 4.53, "learning_rate": 0.00025892163212435233, "loss": 0.2165, "step": 16900 }, { "epoch": 4.56, "learning_rate": 0.00025730246113989634, "loss": 0.2275, "step": 17000 }, { "epoch": 4.59, "learning_rate": 0.0002556832901554404, "loss": 0.2414, "step": 17100 }, { "epoch": 4.62, "learning_rate": 0.00025406411917098447, "loss": 0.2358, "step": 17200 }, { "epoch": 4.65, "learning_rate": 0.00025244494818652853, "loss": 0.226, "step": 17300 }, { "epoch": 4.68, "learning_rate": 0.00025082577720207254, "loss": 0.2181, "step": 17400 }, { "epoch": 4.71, "learning_rate": 0.0002492066062176166, "loss": 0.226, "step": 17500 }, { "epoch": 4.74, "learning_rate": 0.0002475874352331606, "loss": 0.2275, "step": 17600 }, { "epoch": 4.77, "learning_rate": 0.0002459682642487047, "loss": 0.2238, "step": 17700 }, { "epoch": 4.81, "learning_rate": 0.0002443490932642487, "loss": 0.2139, "step": 17800 }, { "epoch": 4.84, "learning_rate": 0.00024272992227979275, "loss": 0.2268, "step": 17900 }, { "epoch": 4.87, "learning_rate": 0.0002411107512953368, "loss": 0.2272, "step": 18000 }, { "epoch": 4.9, "learning_rate": 0.00023949158031088083, "loss": 0.2203, "step": 18100 }, { "epoch": 4.93, "learning_rate": 0.00023787240932642486, "loss": 0.2316, "step": 18200 }, { "epoch": 4.96, "learning_rate": 0.0002362532383419689, "loss": 0.2238, "step": 18300 }, { "epoch": 4.99, "learning_rate": 0.00023463406735751296, "loss": 0.2134, "step": 18400 }, { "epoch": 5.02, "learning_rate": 0.000233014896373057, "loss": 0.1554, "step": 18500 }, { "epoch": 5.05, "learning_rate": 0.00023139572538860104, "loss": 0.135, "step": 18600 }, { "epoch": 5.08, "learning_rate": 0.00022979274611398964, "loss": 0.1328, "step": 18700 }, { "epoch": 5.11, "learning_rate": 0.00022817357512953368, "loss": 0.145, "step": 18800 }, { "epoch": 5.14, "learning_rate": 0.00022655440414507771, "loss": 0.1353, "step": 18900 }, { "epoch": 5.17, "learning_rate": 0.00022493523316062175, "loss": 0.146, "step": 19000 }, { "epoch": 5.2, "learning_rate": 0.00022331606217616581, "loss": 0.1508, "step": 19100 }, { "epoch": 5.23, "learning_rate": 0.00022169689119170985, "loss": 0.1587, "step": 19200 }, { "epoch": 5.26, "learning_rate": 0.0002200777202072539, "loss": 0.1504, "step": 19300 }, { "epoch": 5.29, "learning_rate": 0.00021845854922279793, "loss": 0.15, "step": 19400 }, { "epoch": 5.32, "learning_rate": 0.00021683937823834196, "loss": 0.1537, "step": 19500 }, { "epoch": 5.35, "learning_rate": 0.00021522020725388603, "loss": 0.1467, "step": 19600 }, { "epoch": 5.38, "learning_rate": 0.00021360103626943006, "loss": 0.155, "step": 19700 }, { "epoch": 5.41, "learning_rate": 0.0002119818652849741, "loss": 0.148, "step": 19800 }, { "epoch": 5.44, "learning_rate": 0.00021036269430051814, "loss": 0.1419, "step": 19900 }, { "epoch": 5.47, "learning_rate": 0.00020874352331606217, "loss": 0.154, "step": 20000 }, { "epoch": 5.47, "eval_loss": 0.5057598948478699, "eval_runtime": 487.4254, "eval_samples_per_second": 0.544, "eval_steps_per_second": 0.035, "eval_wer": 25.731181077193543, "step": 20000 }, { "epoch": 5.5, "learning_rate": 0.00020712435233160624, "loss": 0.1565, "step": 20100 }, { "epoch": 5.54, "learning_rate": 0.00020550518134715027, "loss": 0.1496, "step": 20200 }, { "epoch": 5.57, "learning_rate": 0.0002038860103626943, "loss": 0.1639, "step": 20300 }, { "epoch": 5.6, "learning_rate": 0.00020226683937823835, "loss": 0.1572, "step": 20400 }, { "epoch": 5.63, "learning_rate": 0.0002006476683937824, "loss": 0.1423, "step": 20500 }, { "epoch": 5.66, "learning_rate": 0.00019902849740932645, "loss": 0.1494, "step": 20600 }, { "epoch": 5.69, "learning_rate": 0.00019740932642487048, "loss": 0.1487, "step": 20700 }, { "epoch": 5.72, "learning_rate": 0.00019579015544041452, "loss": 0.1569, "step": 20800 }, { "epoch": 5.75, "learning_rate": 0.00019417098445595853, "loss": 0.1472, "step": 20900 }, { "epoch": 5.78, "learning_rate": 0.0001925518134715026, "loss": 0.1432, "step": 21000 }, { "epoch": 5.81, "learning_rate": 0.00019093264248704663, "loss": 0.1551, "step": 21100 }, { "epoch": 5.84, "learning_rate": 0.00018931347150259067, "loss": 0.1505, "step": 21200 }, { "epoch": 5.87, "learning_rate": 0.0001876943005181347, "loss": 0.1487, "step": 21300 }, { "epoch": 5.9, "learning_rate": 0.00018607512953367874, "loss": 0.1602, "step": 21400 }, { "epoch": 5.93, "learning_rate": 0.0001844559585492228, "loss": 0.147, "step": 21500 }, { "epoch": 5.96, "learning_rate": 0.00018283678756476684, "loss": 0.1437, "step": 21600 }, { "epoch": 5.99, "learning_rate": 0.00018121761658031088, "loss": 0.1611, "step": 21700 }, { "epoch": 6.02, "learning_rate": 0.00017959844559585492, "loss": 0.0933, "step": 21800 }, { "epoch": 6.05, "learning_rate": 0.00017797927461139895, "loss": 0.0719, "step": 21900 }, { "epoch": 6.08, "learning_rate": 0.00017636010362694302, "loss": 0.0872, "step": 22000 }, { "epoch": 6.11, "learning_rate": 0.0001747571243523316, "loss": 0.0816, "step": 22100 }, { "epoch": 6.14, "learning_rate": 0.00017313795336787566, "loss": 0.0808, "step": 22200 }, { "epoch": 6.17, "learning_rate": 0.0001715187823834197, "loss": 0.0817, "step": 22300 }, { "epoch": 6.2, "learning_rate": 0.00016989961139896373, "loss": 0.0832, "step": 22400 }, { "epoch": 6.23, "learning_rate": 0.00016828044041450777, "loss": 0.0941, "step": 22500 }, { "epoch": 6.27, "learning_rate": 0.0001666612694300518, "loss": 0.0931, "step": 22600 }, { "epoch": 6.3, "learning_rate": 0.00016504209844559587, "loss": 0.0843, "step": 22700 }, { "epoch": 6.33, "learning_rate": 0.00016343911917098445, "loss": 0.0846, "step": 22800 }, { "epoch": 6.36, "learning_rate": 0.0001618199481865285, "loss": 0.0942, "step": 22900 }, { "epoch": 6.39, "learning_rate": 0.00016020077720207255, "loss": 0.0901, "step": 23000 }, { "epoch": 6.42, "learning_rate": 0.00015858160621761658, "loss": 0.0882, "step": 23100 }, { "epoch": 6.45, "learning_rate": 0.00015696243523316062, "loss": 0.0854, "step": 23200 }, { "epoch": 6.48, "learning_rate": 0.00015534326424870468, "loss": 0.0908, "step": 23300 }, { "epoch": 6.51, "learning_rate": 0.00015372409326424872, "loss": 0.0853, "step": 23400 }, { "epoch": 6.54, "learning_rate": 0.00015210492227979276, "loss": 0.088, "step": 23500 }, { "epoch": 6.57, "learning_rate": 0.0001504857512953368, "loss": 0.0928, "step": 23600 }, { "epoch": 6.6, "learning_rate": 0.00014886658031088083, "loss": 0.0987, "step": 23700 }, { "epoch": 6.63, "learning_rate": 0.0001472474093264249, "loss": 0.0913, "step": 23800 }, { "epoch": 6.66, "learning_rate": 0.00014562823834196893, "loss": 0.0932, "step": 23900 }, { "epoch": 6.69, "learning_rate": 0.00014400906735751297, "loss": 0.0941, "step": 24000 }, { "epoch": 6.69, "eval_loss": 0.5185205340385437, "eval_runtime": 511.8024, "eval_samples_per_second": 0.518, "eval_steps_per_second": 0.033, "eval_wer": 29.47099248841298, "step": 24000 }, { "epoch": 6.72, "learning_rate": 0.000142389896373057, "loss": 0.0883, "step": 24100 }, { "epoch": 6.75, "learning_rate": 0.00014077072538860102, "loss": 0.0963, "step": 24200 }, { "epoch": 6.78, "learning_rate": 0.00013915155440414508, "loss": 0.0915, "step": 24300 }, { "epoch": 6.81, "learning_rate": 0.00013753238341968912, "loss": 0.098, "step": 24400 }, { "epoch": 6.84, "learning_rate": 0.00013591321243523315, "loss": 0.0911, "step": 24500 }, { "epoch": 6.87, "learning_rate": 0.0001342940414507772, "loss": 0.0931, "step": 24600 }, { "epoch": 6.9, "learning_rate": 0.00013267487046632123, "loss": 0.0873, "step": 24700 }, { "epoch": 6.93, "learning_rate": 0.0001310556994818653, "loss": 0.0932, "step": 24800 }, { "epoch": 6.96, "learning_rate": 0.00012943652849740933, "loss": 0.0887, "step": 24900 }, { "epoch": 7.0, "learning_rate": 0.00012781735751295336, "loss": 0.0974, "step": 25000 }, { "epoch": 7.03, "learning_rate": 0.0001261981865284974, "loss": 0.0446, "step": 25100 }, { "epoch": 7.06, "learning_rate": 0.00012457901554404146, "loss": 0.0377, "step": 25200 }, { "epoch": 7.09, "learning_rate": 0.0001229598445595855, "loss": 0.038, "step": 25300 }, { "epoch": 7.12, "learning_rate": 0.00012134067357512954, "loss": 0.0402, "step": 25400 }, { "epoch": 7.15, "learning_rate": 0.00011972150259067357, "loss": 0.0425, "step": 25500 }, { "epoch": 7.18, "learning_rate": 0.00011810233160621763, "loss": 0.0398, "step": 25600 }, { "epoch": 7.21, "learning_rate": 0.00011648316062176166, "loss": 0.0441, "step": 25700 }, { "epoch": 7.24, "learning_rate": 0.0001148639896373057, "loss": 0.0439, "step": 25800 }, { "epoch": 7.27, "learning_rate": 0.00011324481865284975, "loss": 0.0412, "step": 25900 }, { "epoch": 7.3, "learning_rate": 0.00011162564766839379, "loss": 0.0435, "step": 26000 }, { "epoch": 7.33, "learning_rate": 0.00011000647668393784, "loss": 0.0462, "step": 26100 }, { "epoch": 7.36, "learning_rate": 0.00010838730569948187, "loss": 0.0414, "step": 26200 }, { "epoch": 7.39, "learning_rate": 0.0001067681347150259, "loss": 0.0432, "step": 26300 }, { "epoch": 7.42, "learning_rate": 0.00010514896373056995, "loss": 0.0428, "step": 26400 }, { "epoch": 7.45, "learning_rate": 0.00010352979274611398, "loss": 0.0456, "step": 26500 }, { "epoch": 7.48, "learning_rate": 0.00010191062176165803, "loss": 0.046, "step": 26600 }, { "epoch": 7.51, "learning_rate": 0.00010029145077720207, "loss": 0.0408, "step": 26700 }, { "epoch": 7.54, "learning_rate": 9.867227979274612e-05, "loss": 0.0437, "step": 26800 }, { "epoch": 7.57, "learning_rate": 9.705310880829016e-05, "loss": 0.0468, "step": 26900 }, { "epoch": 7.6, "learning_rate": 9.54339378238342e-05, "loss": 0.0402, "step": 27000 }, { "epoch": 7.63, "learning_rate": 9.381476683937824e-05, "loss": 0.0416, "step": 27100 }, { "epoch": 7.66, "learning_rate": 9.219559585492228e-05, "loss": 0.0431, "step": 27200 }, { "epoch": 7.69, "learning_rate": 9.057642487046633e-05, "loss": 0.0426, "step": 27300 }, { "epoch": 7.73, "learning_rate": 8.895725388601037e-05, "loss": 0.0459, "step": 27400 }, { "epoch": 7.76, "learning_rate": 8.73380829015544e-05, "loss": 0.0454, "step": 27500 }, { "epoch": 7.79, "learning_rate": 8.571891191709846e-05, "loss": 0.0436, "step": 27600 }, { "epoch": 7.82, "learning_rate": 8.409974093264248e-05, "loss": 0.043, "step": 27700 }, { "epoch": 7.85, "learning_rate": 8.248056994818653e-05, "loss": 0.0464, "step": 27800 }, { "epoch": 7.88, "learning_rate": 8.086139896373057e-05, "loss": 0.0426, "step": 27900 }, { "epoch": 7.91, "learning_rate": 7.924222797927462e-05, "loss": 0.0379, "step": 28000 }, { "epoch": 7.91, "eval_loss": 0.6318183541297913, "eval_runtime": 486.5478, "eval_samples_per_second": 0.545, "eval_steps_per_second": 0.035, "eval_wer": 26.897874380693622, "step": 28000 }, { "epoch": 7.94, "learning_rate": 7.762305699481865e-05, "loss": 0.038, "step": 28100 }, { "epoch": 7.97, "learning_rate": 7.600388601036269e-05, "loss": 0.0413, "step": 28200 }, { "epoch": 8.0, "learning_rate": 7.438471502590674e-05, "loss": 0.043, "step": 28300 }, { "epoch": 8.03, "learning_rate": 7.276554404145078e-05, "loss": 0.0169, "step": 28400 }, { "epoch": 8.06, "learning_rate": 7.114637305699483e-05, "loss": 0.0178, "step": 28500 }, { "epoch": 8.09, "learning_rate": 6.952720207253886e-05, "loss": 0.0138, "step": 28600 }, { "epoch": 8.12, "learning_rate": 6.79080310880829e-05, "loss": 0.0134, "step": 28700 }, { "epoch": 8.15, "learning_rate": 6.628886010362695e-05, "loss": 0.0131, "step": 28800 }, { "epoch": 8.18, "learning_rate": 6.468588082901554e-05, "loss": 0.0165, "step": 28900 }, { "epoch": 8.21, "learning_rate": 6.306670984455959e-05, "loss": 0.0159, "step": 29000 }, { "epoch": 8.24, "learning_rate": 6.144753886010363e-05, "loss": 0.0135, "step": 29100 }, { "epoch": 8.27, "learning_rate": 5.9828367875647666e-05, "loss": 0.0164, "step": 29200 }, { "epoch": 8.3, "learning_rate": 5.820919689119171e-05, "loss": 0.0138, "step": 29300 }, { "epoch": 8.33, "learning_rate": 5.659002590673575e-05, "loss": 0.0147, "step": 29400 }, { "epoch": 8.36, "learning_rate": 5.498704663212435e-05, "loss": 0.015, "step": 29500 }, { "epoch": 8.39, "learning_rate": 5.3367875647668394e-05, "loss": 0.0144, "step": 29600 }, { "epoch": 8.42, "learning_rate": 5.174870466321244e-05, "loss": 0.0171, "step": 29700 }, { "epoch": 8.45, "learning_rate": 5.012953367875648e-05, "loss": 0.014, "step": 29800 }, { "epoch": 8.49, "learning_rate": 4.8510362694300525e-05, "loss": 0.0128, "step": 29900 }, { "epoch": 8.52, "learning_rate": 4.6891191709844555e-05, "loss": 0.017, "step": 30000 }, { "epoch": 8.55, "learning_rate": 4.52720207253886e-05, "loss": 0.014, "step": 30100 }, { "epoch": 8.58, "learning_rate": 4.365284974093264e-05, "loss": 0.0147, "step": 30200 }, { "epoch": 8.61, "learning_rate": 4.2033678756476686e-05, "loss": 0.0143, "step": 30300 }, { "epoch": 8.64, "learning_rate": 4.041450777202073e-05, "loss": 0.016, "step": 30400 }, { "epoch": 8.67, "learning_rate": 3.879533678756477e-05, "loss": 0.0146, "step": 30500 }, { "epoch": 8.7, "learning_rate": 3.717616580310881e-05, "loss": 0.0152, "step": 30600 }, { "epoch": 8.73, "learning_rate": 3.5556994818652846e-05, "loss": 0.013, "step": 30700 }, { "epoch": 8.76, "learning_rate": 3.393782383419689e-05, "loss": 0.013, "step": 30800 }, { "epoch": 8.79, "learning_rate": 3.2318652849740933e-05, "loss": 0.0106, "step": 30900 }, { "epoch": 8.82, "learning_rate": 3.069948186528497e-05, "loss": 0.015, "step": 31000 }, { "epoch": 8.85, "learning_rate": 2.9080310880829017e-05, "loss": 0.0116, "step": 31100 }, { "epoch": 8.88, "learning_rate": 2.7461139896373057e-05, "loss": 0.0136, "step": 31200 }, { "epoch": 8.91, "learning_rate": 2.5841968911917097e-05, "loss": 0.0121, "step": 31300 }, { "epoch": 8.94, "learning_rate": 2.422279792746114e-05, "loss": 0.0131, "step": 31400 }, { "epoch": 8.97, "learning_rate": 2.2603626943005185e-05, "loss": 0.0155, "step": 31500 }, { "epoch": 9.0, "learning_rate": 2.098445595854922e-05, "loss": 0.0111, "step": 31600 }, { "epoch": 9.03, "learning_rate": 1.9365284974093265e-05, "loss": 0.004, "step": 31700 }, { "epoch": 9.06, "learning_rate": 1.774611398963731e-05, "loss": 0.0034, "step": 31800 }, { "epoch": 9.09, "learning_rate": 1.6126943005181345e-05, "loss": 0.0041, "step": 31900 }, { "epoch": 9.12, "learning_rate": 1.4507772020725389e-05, "loss": 0.0039, "step": 32000 }, { "epoch": 9.12, "eval_loss": 0.7823485732078552, "eval_runtime": 510.0267, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.033, "eval_wer": 26.306536678919613, "step": 32000 }, { "epoch": 9.15, "learning_rate": 1.288860103626943e-05, "loss": 0.003, "step": 32100 }, { "epoch": 9.18, "learning_rate": 1.1269430051813473e-05, "loss": 0.0036, "step": 32200 }, { "epoch": 9.22, "learning_rate": 9.650259067357513e-06, "loss": 0.0043, "step": 32300 }, { "epoch": 9.25, "learning_rate": 8.031088082901555e-06, "loss": 0.0044, "step": 32400 }, { "epoch": 9.28, "learning_rate": 6.4119170984455965e-06, "loss": 0.003, "step": 32500 }, { "epoch": 9.31, "learning_rate": 4.7927461139896375e-06, "loss": 0.0032, "step": 32600 }, { "epoch": 9.34, "learning_rate": 3.1735751295336785e-06, "loss": 0.0032, "step": 32700 }, { "epoch": 9.37, "learning_rate": 1.5544041450777201e-06, "loss": 0.003, "step": 32800 }, { "epoch": 9.39, "step": 32880, "total_flos": 3.411337442033664e+19, "train_loss": 0.25426562409440095, "train_runtime": 60905.8756, "train_samples_per_second": 8.635, "train_steps_per_second": 0.54 } ], "max_steps": 32880, "num_train_epochs": 10, "total_flos": 3.411337442033664e+19, "trial_name": null, "trial_params": null }