{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6412722842118763, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03206361421059382, "grad_norm": 11.533838272094727, "learning_rate": 0.0002465, "loss": 4.6618, "step": 500 }, { "epoch": 0.03206361421059382, "eval_loss": 1.5996026992797852, "eval_runtime": 185.1067, "eval_samples_per_second": 37.935, "eval_steps_per_second": 0.594, "eval_wer": 0.9161163448889834, "step": 500 }, { "epoch": 0.06412722842118763, "grad_norm": 4.801280975341797, "learning_rate": 0.0002874574468085106, "loss": 1.0278, "step": 1000 }, { "epoch": 0.06412722842118763, "eval_loss": 1.1463252305984497, "eval_runtime": 184.8935, "eval_samples_per_second": 37.979, "eval_steps_per_second": 0.595, "eval_wer": 0.7792157748574922, "step": 1000 }, { "epoch": 0.09619084263178146, "grad_norm": 4.746730327606201, "learning_rate": 0.0002715, "loss": 0.8164, "step": 1500 }, { "epoch": 0.09619084263178146, "eval_loss": 1.0590689182281494, "eval_runtime": 185.3865, "eval_samples_per_second": 37.878, "eval_steps_per_second": 0.593, "eval_wer": 0.7363238948165668, "step": 1500 }, { "epoch": 0.12825445684237527, "grad_norm": 21.847057342529297, "learning_rate": 0.00025554255319148935, "loss": 0.7124, "step": 2000 }, { "epoch": 0.12825445684237527, "eval_loss": 0.9373884797096252, "eval_runtime": 186.5741, "eval_samples_per_second": 37.637, "eval_steps_per_second": 0.59, "eval_wer": 0.6622596632960842, "step": 2000 }, { "epoch": 0.16031807105296908, "grad_norm": 4.238761901855469, "learning_rate": 0.0002395851063829787, "loss": 0.6566, "step": 2500 }, { "epoch": 0.16031807105296908, "eval_loss": 0.8721805810928345, "eval_runtime": 187.0392, "eval_samples_per_second": 37.543, "eval_steps_per_second": 0.588, "eval_wer": 0.615168949893036, "step": 2500 }, { "epoch": 0.19238168526356292, "grad_norm": 5.65878963470459, "learning_rate": 0.00022362765957446805, "loss": 0.6101, "step": 3000 }, { "epoch": 0.19238168526356292, "eval_loss": 0.8194323182106018, "eval_runtime": 187.1888, "eval_samples_per_second": 37.513, "eval_steps_per_second": 0.588, "eval_wer": 0.5927264513214366, "step": 3000 }, { "epoch": 0.22444529947415673, "grad_norm": 6.294471740722656, "learning_rate": 0.00020767021276595744, "loss": 0.5777, "step": 3500 }, { "epoch": 0.22444529947415673, "eval_loss": 0.7799355387687683, "eval_runtime": 187.6349, "eval_samples_per_second": 37.424, "eval_steps_per_second": 0.586, "eval_wer": 0.5707490134070343, "step": 3500 }, { "epoch": 0.25650891368475054, "grad_norm": 3.848400115966797, "learning_rate": 0.0001917446808510638, "loss": 0.5431, "step": 4000 }, { "epoch": 0.25650891368475054, "eval_loss": 0.7453157901763916, "eval_runtime": 187.5991, "eval_samples_per_second": 37.431, "eval_steps_per_second": 0.586, "eval_wer": 0.550233194700966, "step": 4000 }, { "epoch": 0.2885725278953444, "grad_norm": 9.198270797729492, "learning_rate": 0.00017585106382978722, "loss": 0.512, "step": 4500 }, { "epoch": 0.2885725278953444, "eval_loss": 0.7209311127662659, "eval_runtime": 188.2502, "eval_samples_per_second": 37.301, "eval_steps_per_second": 0.584, "eval_wer": 0.5346735938558843, "step": 4500 }, { "epoch": 0.32063614210593816, "grad_norm": 7.854990482330322, "learning_rate": 0.00015989361702127658, "loss": 0.4953, "step": 5000 }, { "epoch": 0.32063614210593816, "eval_loss": 0.6942155957221985, "eval_runtime": 187.3133, "eval_samples_per_second": 37.488, "eval_steps_per_second": 0.587, "eval_wer": 0.5225022920846676, "step": 5000 }, { "epoch": 0.352699756316532, "grad_norm": 10.654298782348633, "learning_rate": 0.00014393617021276595, "loss": 0.4746, "step": 5500 }, { "epoch": 0.352699756316532, "eval_loss": 0.6680377721786499, "eval_runtime": 188.6546, "eval_samples_per_second": 37.221, "eval_steps_per_second": 0.583, "eval_wer": 0.495714798230112, "step": 5500 }, { "epoch": 0.38476337052712584, "grad_norm": 19.936861038208008, "learning_rate": 0.00012801063829787234, "loss": 0.4535, "step": 6000 }, { "epoch": 0.38476337052712584, "eval_loss": 0.6434958577156067, "eval_runtime": 188.0277, "eval_samples_per_second": 37.346, "eval_steps_per_second": 0.585, "eval_wer": 0.47308627539563375, "step": 6000 }, { "epoch": 0.4168269847377196, "grad_norm": 6.950649261474609, "learning_rate": 0.0001120531914893617, "loss": 0.4249, "step": 6500 }, { "epoch": 0.4168269847377196, "eval_loss": 0.6293027400970459, "eval_runtime": 187.8772, "eval_samples_per_second": 37.375, "eval_steps_per_second": 0.585, "eval_wer": 0.4816832538301067, "step": 6500 }, { "epoch": 0.44889059894831346, "grad_norm": 5.574142932891846, "learning_rate": 9.612765957446806e-05, "loss": 0.4065, "step": 7000 }, { "epoch": 0.44889059894831346, "eval_loss": 0.5997486710548401, "eval_runtime": 188.7633, "eval_samples_per_second": 37.2, "eval_steps_per_second": 0.583, "eval_wer": 0.44947448145736724, "step": 7000 }, { "epoch": 0.4809542131589073, "grad_norm": 5.602737903594971, "learning_rate": 8.017021276595744e-05, "loss": 0.393, "step": 7500 }, { "epoch": 0.4809542131589073, "eval_loss": 0.5802670121192932, "eval_runtime": 189.0166, "eval_samples_per_second": 37.15, "eval_steps_per_second": 0.582, "eval_wer": 0.44260487117819797, "step": 7500 }, { "epoch": 0.5130178273695011, "grad_norm": 15.019088745117188, "learning_rate": 6.424468085106383e-05, "loss": 0.3808, "step": 8000 }, { "epoch": 0.5130178273695011, "eval_loss": 0.5661880970001221, "eval_runtime": 189.1322, "eval_samples_per_second": 37.127, "eval_steps_per_second": 0.582, "eval_wer": 0.4299153589603901, "step": 8000 }, { "epoch": 0.5450814415800949, "grad_norm": 3.79109525680542, "learning_rate": 4.8287234042553194e-05, "loss": 0.3722, "step": 8500 }, { "epoch": 0.5450814415800949, "eval_loss": 0.553141176700592, "eval_runtime": 188.8506, "eval_samples_per_second": 37.183, "eval_steps_per_second": 0.582, "eval_wer": 0.4194182755550831, "step": 8500 }, { "epoch": 0.5771450557906888, "grad_norm": 12.16059398651123, "learning_rate": 3.232978723404255e-05, "loss": 0.3622, "step": 9000 }, { "epoch": 0.5771450557906888, "eval_loss": 0.5399113297462463, "eval_runtime": 188.6804, "eval_samples_per_second": 37.216, "eval_steps_per_second": 0.583, "eval_wer": 0.40727354867856336, "step": 9000 }, { "epoch": 0.6092086700012825, "grad_norm": 7.863190650939941, "learning_rate": 1.6372340425531912e-05, "loss": 0.3526, "step": 9500 }, { "epoch": 0.6092086700012825, "eval_loss": 0.5277913808822632, "eval_runtime": 188.1946, "eval_samples_per_second": 37.312, "eval_steps_per_second": 0.585, "eval_wer": 0.40279567892212226, "step": 9500 }, { "epoch": 0.6412722842118763, "grad_norm": 5.124056339263916, "learning_rate": 4.1489361702127654e-07, "loss": 0.3337, "step": 10000 }, { "epoch": 0.6412722842118763, "eval_loss": 0.5226185917854309, "eval_runtime": 188.4518, "eval_samples_per_second": 37.262, "eval_steps_per_second": 0.584, "eval_wer": 0.40009832711037885, "step": 10000 }, { "epoch": 0.6412722842118763, "step": 10000, "total_flos": 1.1393778193380235e+19, "train_loss": 0.7283544036865235, "train_runtime": 7737.7643, "train_samples_per_second": 10.339, "train_steps_per_second": 1.292 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1393778193380235e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }