|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6412722842118763, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"grad_norm": 11.533838272094727, |
|
"learning_rate": 0.0002465, |
|
"loss": 4.6618, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"eval_loss": 1.5996026992797852, |
|
"eval_runtime": 185.1067, |
|
"eval_samples_per_second": 37.935, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 0.9161163448889834, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"grad_norm": 4.801280975341797, |
|
"learning_rate": 0.0002874574468085106, |
|
"loss": 1.0278, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"eval_loss": 1.1463252305984497, |
|
"eval_runtime": 184.8935, |
|
"eval_samples_per_second": 37.979, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 0.7792157748574922, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"grad_norm": 4.746730327606201, |
|
"learning_rate": 0.0002715, |
|
"loss": 0.8164, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"eval_loss": 1.0590689182281494, |
|
"eval_runtime": 185.3865, |
|
"eval_samples_per_second": 37.878, |
|
"eval_steps_per_second": 0.593, |
|
"eval_wer": 0.7363238948165668, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"grad_norm": 21.847057342529297, |
|
"learning_rate": 0.00025554255319148935, |
|
"loss": 0.7124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"eval_loss": 0.9373884797096252, |
|
"eval_runtime": 186.5741, |
|
"eval_samples_per_second": 37.637, |
|
"eval_steps_per_second": 0.59, |
|
"eval_wer": 0.6622596632960842, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"grad_norm": 4.238761901855469, |
|
"learning_rate": 0.0002395851063829787, |
|
"loss": 0.6566, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"eval_loss": 0.8721805810928345, |
|
"eval_runtime": 187.0392, |
|
"eval_samples_per_second": 37.543, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.615168949893036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"grad_norm": 5.65878963470459, |
|
"learning_rate": 0.00022362765957446805, |
|
"loss": 0.6101, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"eval_loss": 0.8194323182106018, |
|
"eval_runtime": 187.1888, |
|
"eval_samples_per_second": 37.513, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.5927264513214366, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"grad_norm": 6.294471740722656, |
|
"learning_rate": 0.00020767021276595744, |
|
"loss": 0.5777, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"eval_loss": 0.7799355387687683, |
|
"eval_runtime": 187.6349, |
|
"eval_samples_per_second": 37.424, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.5707490134070343, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"grad_norm": 3.848400115966797, |
|
"learning_rate": 0.0001917446808510638, |
|
"loss": 0.5431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"eval_loss": 0.7453157901763916, |
|
"eval_runtime": 187.5991, |
|
"eval_samples_per_second": 37.431, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.550233194700966, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"grad_norm": 9.198270797729492, |
|
"learning_rate": 0.00017585106382978722, |
|
"loss": 0.512, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"eval_loss": 0.7209311127662659, |
|
"eval_runtime": 188.2502, |
|
"eval_samples_per_second": 37.301, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5346735938558843, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"grad_norm": 7.854990482330322, |
|
"learning_rate": 0.00015989361702127658, |
|
"loss": 0.4953, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"eval_loss": 0.6942155957221985, |
|
"eval_runtime": 187.3133, |
|
"eval_samples_per_second": 37.488, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.5225022920846676, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"grad_norm": 10.654298782348633, |
|
"learning_rate": 0.00014393617021276595, |
|
"loss": 0.4746, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"eval_loss": 0.6680377721786499, |
|
"eval_runtime": 188.6546, |
|
"eval_samples_per_second": 37.221, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.495714798230112, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"grad_norm": 19.936861038208008, |
|
"learning_rate": 0.00012801063829787234, |
|
"loss": 0.4535, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"eval_loss": 0.6434958577156067, |
|
"eval_runtime": 188.0277, |
|
"eval_samples_per_second": 37.346, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.47308627539563375, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"grad_norm": 6.950649261474609, |
|
"learning_rate": 0.0001120531914893617, |
|
"loss": 0.4249, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"eval_loss": 0.6293027400970459, |
|
"eval_runtime": 187.8772, |
|
"eval_samples_per_second": 37.375, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.4816832538301067, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"grad_norm": 5.574142932891846, |
|
"learning_rate": 9.612765957446806e-05, |
|
"loss": 0.4065, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"eval_loss": 0.5997486710548401, |
|
"eval_runtime": 188.7633, |
|
"eval_samples_per_second": 37.2, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.44947448145736724, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"grad_norm": 5.602737903594971, |
|
"learning_rate": 8.017021276595744e-05, |
|
"loss": 0.393, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"eval_loss": 0.5802670121192932, |
|
"eval_runtime": 189.0166, |
|
"eval_samples_per_second": 37.15, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.44260487117819797, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"grad_norm": 15.019088745117188, |
|
"learning_rate": 6.424468085106383e-05, |
|
"loss": 0.3808, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"eval_loss": 0.5661880970001221, |
|
"eval_runtime": 189.1322, |
|
"eval_samples_per_second": 37.127, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.4299153589603901, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"grad_norm": 3.79109525680542, |
|
"learning_rate": 4.8287234042553194e-05, |
|
"loss": 0.3722, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"eval_loss": 0.553141176700592, |
|
"eval_runtime": 188.8506, |
|
"eval_samples_per_second": 37.183, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.4194182755550831, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"grad_norm": 12.16059398651123, |
|
"learning_rate": 3.232978723404255e-05, |
|
"loss": 0.3622, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"eval_loss": 0.5399113297462463, |
|
"eval_runtime": 188.6804, |
|
"eval_samples_per_second": 37.216, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.40727354867856336, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"grad_norm": 7.863190650939941, |
|
"learning_rate": 1.6372340425531912e-05, |
|
"loss": 0.3526, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"eval_loss": 0.5277913808822632, |
|
"eval_runtime": 188.1946, |
|
"eval_samples_per_second": 37.312, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.40279567892212226, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"grad_norm": 5.124056339263916, |
|
"learning_rate": 4.1489361702127654e-07, |
|
"loss": 0.3337, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"eval_loss": 0.5226185917854309, |
|
"eval_runtime": 188.4518, |
|
"eval_samples_per_second": 37.262, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.40009832711037885, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"step": 10000, |
|
"total_flos": 1.1393778193380235e+19, |
|
"train_loss": 0.7283544036865235, |
|
"train_runtime": 7737.7643, |
|
"train_samples_per_second": 10.339, |
|
"train_steps_per_second": 1.292 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1393778193380235e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|