{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 3870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.78, "eval_loss": 4.564502716064453, "eval_runtime": 179.128, "eval_samples_per_second": 20.142, "eval_steps_per_second": 2.518, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.55, "eval_loss": 2.9015955924987793, "eval_runtime": 175.3683, "eval_samples_per_second": 20.574, "eval_steps_per_second": 2.572, "eval_wer": 1.0, "step": 200 }, { "epoch": 2.33, "eval_loss": 2.266641616821289, "eval_runtime": 174.1906, "eval_samples_per_second": 20.713, "eval_steps_per_second": 2.589, "eval_wer": 1.098215533709174, "step": 300 }, { "epoch": 3.1, "eval_loss": 0.6078956127166748, "eval_runtime": 181.241, "eval_samples_per_second": 19.907, "eval_steps_per_second": 2.488, "eval_wer": 0.6375623165447515, "step": 400 }, { "epoch": 3.88, "learning_rate": 0.0002988, "loss": 3.2188, "step": 500 }, { "epoch": 3.88, "eval_loss": 0.49847397208213806, "eval_runtime": 175.9858, "eval_samples_per_second": 20.502, "eval_steps_per_second": 2.563, "eval_wer": 0.5007687648511392, "step": 500 }, { "epoch": 4.65, "eval_loss": 0.447698175907135, "eval_runtime": 175.6036, "eval_samples_per_second": 20.546, "eval_steps_per_second": 2.568, "eval_wer": 0.44690863346223736, "step": 600 }, { "epoch": 5.43, "eval_loss": 0.39529213309288025, "eval_runtime": 175.4478, "eval_samples_per_second": 20.565, "eval_steps_per_second": 2.571, "eval_wer": 0.3914643805618972, "step": 700 }, { "epoch": 6.2, "eval_loss": 0.43193477392196655, "eval_runtime": 176.2224, "eval_samples_per_second": 20.474, "eval_steps_per_second": 2.559, "eval_wer": 0.3921166658901365, "step": 800 }, { "epoch": 6.98, "eval_loss": 0.4170827865600586, "eval_runtime": 175.9073, "eval_samples_per_second": 20.511, "eval_steps_per_second": 2.564, "eval_wer": 0.3698457811116806, "step": 900 }, { "epoch": 7.75, "learning_rate": 0.00025566765578635014, "loss": 0.2193, "step": 1000 }, { "epoch": 7.75, "eval_loss": 0.3956995904445648, "eval_runtime": 178.52, "eval_samples_per_second": 20.211, "eval_steps_per_second": 2.526, "eval_wer": 0.36001490937893116, "step": 1000 }, { "epoch": 8.53, "eval_loss": 0.37300992012023926, "eval_runtime": 175.8463, "eval_samples_per_second": 20.518, "eval_steps_per_second": 2.565, "eval_wer": 0.34929879327214275, "step": 1100 }, { "epoch": 9.3, "eval_loss": 0.3779752850532532, "eval_runtime": 176.2153, "eval_samples_per_second": 20.475, "eval_steps_per_second": 2.559, "eval_wer": 0.3348087406233984, "step": 1200 }, { "epoch": 10.08, "eval_loss": 0.41326919198036194, "eval_runtime": 173.7883, "eval_samples_per_second": 20.761, "eval_steps_per_second": 2.595, "eval_wer": 0.35680007454689466, "step": 1300 }, { "epoch": 10.85, "eval_loss": 0.3984449505805969, "eval_runtime": 175.4199, "eval_samples_per_second": 20.568, "eval_steps_per_second": 2.571, "eval_wer": 0.31929366817313515, "step": 1400 }, { "epoch": 11.63, "learning_rate": 0.00021115727002967357, "loss": 0.1129, "step": 1500 }, { "epoch": 11.63, "eval_loss": 0.38447898626327515, "eval_runtime": 175.2875, "eval_samples_per_second": 20.583, "eval_steps_per_second": 2.573, "eval_wer": 0.3174299958067372, "step": 1500 }, { "epoch": 12.4, "eval_loss": 0.3882218301296234, "eval_runtime": 175.3702, "eval_samples_per_second": 20.574, "eval_steps_per_second": 2.572, "eval_wer": 0.3162186087685785, "step": 1600 }, { "epoch": 13.18, "eval_loss": 0.39824405312538147, "eval_runtime": 175.3906, "eval_samples_per_second": 20.571, "eval_steps_per_second": 2.571, "eval_wer": 0.3008433117457951, "step": 1700 }, { "epoch": 13.95, "eval_loss": 0.3901589810848236, "eval_runtime": 177.0061, "eval_samples_per_second": 20.383, "eval_steps_per_second": 2.548, "eval_wer": 0.3198061780738946, "step": 1800 }, { "epoch": 14.73, "eval_loss": 0.4082184135913849, "eval_runtime": 175.0779, "eval_samples_per_second": 20.608, "eval_steps_per_second": 2.576, "eval_wer": 0.3237198900433304, "step": 1900 }, { "epoch": 15.5, "learning_rate": 0.000166646884272997, "loss": 0.0765, "step": 2000 }, { "epoch": 15.5, "eval_loss": 0.3732178211212158, "eval_runtime": 175.4721, "eval_samples_per_second": 20.562, "eval_steps_per_second": 2.57, "eval_wer": 0.3125844476541024, "step": 2000 }, { "epoch": 16.28, "eval_loss": 0.3892667293548584, "eval_runtime": 178.0744, "eval_samples_per_second": 20.261, "eval_steps_per_second": 2.533, "eval_wer": 0.3000978427992359, "step": 2100 }, { "epoch": 17.05, "eval_loss": 0.4168277978897095, "eval_runtime": 176.2583, "eval_samples_per_second": 20.47, "eval_steps_per_second": 2.559, "eval_wer": 0.308344593020547, "step": 2200 }, { "epoch": 17.83, "eval_loss": 0.4192778170108795, "eval_runtime": 172.3953, "eval_samples_per_second": 20.929, "eval_steps_per_second": 2.616, "eval_wer": 0.3044308810511112, "step": 2300 }, { "epoch": 18.6, "eval_loss": 0.40058156847953796, "eval_runtime": 174.3893, "eval_samples_per_second": 20.689, "eval_steps_per_second": 2.586, "eval_wer": 0.3013092298373946, "step": 2400 }, { "epoch": 19.38, "learning_rate": 0.00012213649851632047, "loss": 0.0588, "step": 2500 }, { "epoch": 19.38, "eval_loss": 0.38357821106910706, "eval_runtime": 180.4646, "eval_samples_per_second": 19.993, "eval_steps_per_second": 2.499, "eval_wer": 0.2892419512649676, "step": 2500 }, { "epoch": 20.16, "eval_loss": 0.3760845959186554, "eval_runtime": 191.865, "eval_samples_per_second": 18.805, "eval_steps_per_second": 2.351, "eval_wer": 0.2902669710664865, "step": 2600 }, { "epoch": 20.93, "eval_loss": 0.38948509097099304, "eval_runtime": 181.0859, "eval_samples_per_second": 19.924, "eval_steps_per_second": 2.491, "eval_wer": 0.29301588780692356, "step": 2700 }, { "epoch": 21.71, "eval_loss": 0.3884966969490051, "eval_runtime": 179.119, "eval_samples_per_second": 20.143, "eval_steps_per_second": 2.518, "eval_wer": 0.27913152867725854, "step": 2800 }, { "epoch": 22.48, "eval_loss": 0.3901614248752594, "eval_runtime": 182.8151, "eval_samples_per_second": 19.736, "eval_steps_per_second": 2.467, "eval_wer": 0.2891487676466477, "step": 2900 }, { "epoch": 23.26, "learning_rate": 7.762611275964391e-05, "loss": 0.0448, "step": 3000 }, { "epoch": 23.26, "eval_loss": 0.42003825306892395, "eval_runtime": 180.6554, "eval_samples_per_second": 19.972, "eval_steps_per_second": 2.496, "eval_wer": 0.2849089130130923, "step": 3000 }, { "epoch": 24.03, "eval_loss": 0.40127792954444885, "eval_runtime": 180.093, "eval_samples_per_second": 20.034, "eval_steps_per_second": 2.504, "eval_wer": 0.27987699762381774, "step": 3100 }, { "epoch": 24.81, "eval_loss": 0.4039434492588043, "eval_runtime": 182.9792, "eval_samples_per_second": 19.718, "eval_steps_per_second": 2.465, "eval_wer": 0.273121185295625, "step": 3200 }, { "epoch": 25.58, "eval_loss": 0.397048681974411, "eval_runtime": 186.4145, "eval_samples_per_second": 19.355, "eval_steps_per_second": 2.419, "eval_wer": 0.26473465964683407, "step": 3300 }, { "epoch": 26.36, "eval_loss": 0.4080738127231598, "eval_runtime": 183.3845, "eval_samples_per_second": 19.675, "eval_steps_per_second": 2.459, "eval_wer": 0.26902110608954943, "step": 3400 }, { "epoch": 27.13, "learning_rate": 3.311572700296736e-05, "loss": 0.0351, "step": 3500 }, { "epoch": 27.13, "eval_loss": 0.4090190827846527, "eval_runtime": 188.4549, "eval_samples_per_second": 19.145, "eval_steps_per_second": 2.393, "eval_wer": 0.26743698457811116, "step": 3500 }, { "epoch": 27.91, "eval_loss": 0.3952561318874359, "eval_runtime": 183.4934, "eval_samples_per_second": 19.663, "eval_steps_per_second": 2.458, "eval_wer": 0.26627218934911245, "step": 3600 }, { "epoch": 28.68, "eval_loss": 0.40437063574790955, "eval_runtime": 188.3196, "eval_samples_per_second": 19.159, "eval_steps_per_second": 2.395, "eval_wer": 0.26496761869263386, "step": 3700 }, { "epoch": 29.46, "eval_loss": 0.3968600630760193, "eval_runtime": 182.2466, "eval_samples_per_second": 19.797, "eval_steps_per_second": 2.475, "eval_wer": 0.26459488421935423, "step": 3800 }, { "epoch": 30.0, "step": 3870, "total_flos": 4.209827274605221e+19, "train_loss": 0.4894287838800317, "train_runtime": 22523.7187, "train_samples_per_second": 10.974, "train_steps_per_second": 0.172 } ], "max_steps": 3870, "num_train_epochs": 30, "total_flos": 4.209827274605221e+19, "trial_name": null, "trial_params": null }