|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 200, |
|
"global_step": 8312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 3.2670648097991943, |
|
"eval_runtime": 692.0326, |
|
"eval_samples_per_second": 5.263, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.8740603923797607, |
|
"eval_runtime": 683.8046, |
|
"eval_samples_per_second": 5.326, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 1.0006963141769567, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009517778860204579, |
|
"loss": 3.8381, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.761221170425415, |
|
"eval_runtime": 683.642, |
|
"eval_samples_per_second": 5.327, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 0.9954507473772166, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.633348226547241, |
|
"eval_runtime": 684.1815, |
|
"eval_samples_per_second": 5.323, |
|
"eval_steps_per_second": 0.666, |
|
"eval_wer": 0.9981431621947823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000890891378470531, |
|
"loss": 2.6996, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 2.3073549270629883, |
|
"eval_runtime": 686.3923, |
|
"eval_samples_per_second": 5.306, |
|
"eval_steps_per_second": 0.664, |
|
"eval_wer": 0.9770680531055612, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.0154612064361572, |
|
"eval_runtime": 686.5478, |
|
"eval_samples_per_second": 5.305, |
|
"eval_steps_per_second": 0.664, |
|
"eval_wer": 0.9286045863893789, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.9155136346817017, |
|
"eval_runtime": 689.547, |
|
"eval_samples_per_second": 5.282, |
|
"eval_steps_per_second": 0.661, |
|
"eval_wer": 0.8947172964441557, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.000830004870920604, |
|
"loss": 2.2919, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.641204595565796, |
|
"eval_runtime": 685.3748, |
|
"eval_samples_per_second": 5.314, |
|
"eval_steps_per_second": 0.665, |
|
"eval_wer": 0.8813944851917185, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.4531193971633911, |
|
"eval_runtime": 689.2035, |
|
"eval_samples_per_second": 5.284, |
|
"eval_steps_per_second": 0.662, |
|
"eval_wer": 0.8285210286881441, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0007691183633706771, |
|
"loss": 1.5872, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.1812867820262909, |
|
"eval_runtime": 685.9058, |
|
"eval_samples_per_second": 5.31, |
|
"eval_steps_per_second": 0.665, |
|
"eval_wer": 0.2060161544889054, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 0.1635832041501999, |
|
"eval_runtime": 687.9409, |
|
"eval_samples_per_second": 5.294, |
|
"eval_steps_per_second": 0.663, |
|
"eval_wer": 0.18062389750255314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.155806764960289, |
|
"eval_runtime": 692.1735, |
|
"eval_samples_per_second": 5.262, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 0.17444991180020425, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0007084754018509498, |
|
"loss": 0.2659, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.152183398604393, |
|
"eval_runtime": 688.556, |
|
"eval_samples_per_second": 5.289, |
|
"eval_steps_per_second": 0.662, |
|
"eval_wer": 0.1646550923776808, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.15532232820987701, |
|
"eval_runtime": 688.1144, |
|
"eval_samples_per_second": 5.293, |
|
"eval_steps_per_second": 0.663, |
|
"eval_wer": 0.16641908829263763, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0006475888943010228, |
|
"loss": 0.2436, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.1840931922197342, |
|
"eval_runtime": 692.811, |
|
"eval_samples_per_second": 5.257, |
|
"eval_steps_per_second": 0.658, |
|
"eval_wer": 0.1960820722309906, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.14190182089805603, |
|
"eval_runtime": 690.3365, |
|
"eval_samples_per_second": 5.276, |
|
"eval_steps_per_second": 0.661, |
|
"eval_wer": 0.1640051991458546, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.14559713006019592, |
|
"eval_runtime": 685.0999, |
|
"eval_samples_per_second": 5.316, |
|
"eval_steps_per_second": 0.666, |
|
"eval_wer": 0.17143255036672547, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.000586702386751096, |
|
"loss": 0.2464, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.14024095237255096, |
|
"eval_runtime": 692.5402, |
|
"eval_samples_per_second": 5.259, |
|
"eval_steps_per_second": 0.658, |
|
"eval_wer": 0.16070931204159317, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.1345185786485672, |
|
"eval_runtime": 694.4502, |
|
"eval_samples_per_second": 5.244, |
|
"eval_steps_per_second": 0.657, |
|
"eval_wer": 0.1528177513694179, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.000525815879201169, |
|
"loss": 0.2292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.134234219789505, |
|
"eval_runtime": 693.8578, |
|
"eval_samples_per_second": 5.249, |
|
"eval_steps_per_second": 0.657, |
|
"eval_wer": 0.155556587132114, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 0.13340923190116882, |
|
"eval_runtime": 684.0209, |
|
"eval_samples_per_second": 5.324, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 0.15518521957107045, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.13518257439136505, |
|
"eval_runtime": 687.2622, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 0.664, |
|
"eval_wer": 0.1543496425587225, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0004649293716512421, |
|
"loss": 0.2209, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 0.13499902188777924, |
|
"eval_runtime": 691.8679, |
|
"eval_samples_per_second": 5.264, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 0.1537925912171572, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.13418444991111755, |
|
"eval_runtime": 685.3615, |
|
"eval_samples_per_second": 5.314, |
|
"eval_steps_per_second": 0.665, |
|
"eval_wer": 0.1530498560950701, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00040404286410131515, |
|
"loss": 0.2136, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.1319747269153595, |
|
"eval_runtime": 688.5799, |
|
"eval_samples_per_second": 5.289, |
|
"eval_steps_per_second": 0.662, |
|
"eval_wer": 0.1540246959428094, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.13689081370830536, |
|
"eval_runtime": 691.8314, |
|
"eval_samples_per_second": 5.264, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 0.15690279454089684, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.13139554858207703, |
|
"eval_runtime": 689.159, |
|
"eval_samples_per_second": 5.285, |
|
"eval_steps_per_second": 0.662, |
|
"eval_wer": 0.1516572277411568, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0003431563565513882, |
|
"loss": 0.2154, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 0.1303856372833252, |
|
"eval_runtime": 694.7157, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 0.656, |
|
"eval_wer": 0.15063596694828707, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 0.13201411068439484, |
|
"eval_runtime": 691.5101, |
|
"eval_samples_per_second": 5.267, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 0.15072880883854795, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0002822698490014613, |
|
"loss": 0.2123, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.13187964260578156, |
|
"eval_runtime": 687.2712, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 0.663, |
|
"eval_wer": 0.1523999628632439, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.12917861342430115, |
|
"eval_runtime": 691.2948, |
|
"eval_samples_per_second": 5.268, |
|
"eval_steps_per_second": 0.66, |
|
"eval_wer": 0.1523999628632439, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 0.12825024127960205, |
|
"eval_runtime": 689.7813, |
|
"eval_samples_per_second": 5.28, |
|
"eval_steps_per_second": 0.661, |
|
"eval_wer": 0.1488255500881998, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00022138334145153436, |
|
"loss": 0.2109, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 0.1257564276456833, |
|
"eval_runtime": 687.694, |
|
"eval_samples_per_second": 5.296, |
|
"eval_steps_per_second": 0.663, |
|
"eval_wer": 0.14919691764924334, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 0.12906372547149658, |
|
"eval_runtime": 687.8093, |
|
"eval_samples_per_second": 5.295, |
|
"eval_steps_per_second": 0.663, |
|
"eval_wer": 0.1488255500881998, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0001604968339016074, |
|
"loss": 0.2103, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 0.12778830528259277, |
|
"eval_runtime": 694.3257, |
|
"eval_samples_per_second": 5.245, |
|
"eval_steps_per_second": 0.657, |
|
"eval_wer": 0.14840776158202582, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 0.12501177191734314, |
|
"eval_runtime": 679.1124, |
|
"eval_samples_per_second": 5.363, |
|
"eval_steps_per_second": 0.671, |
|
"eval_wer": 0.14780428929533004, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 0.12769711017608643, |
|
"eval_runtime": 683.2755, |
|
"eval_samples_per_second": 5.33, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 0.14822207780150404, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.961032635168047e-05, |
|
"loss": 0.1986, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_loss": 0.1256353259086609, |
|
"eval_runtime": 680.6384, |
|
"eval_samples_per_second": 5.351, |
|
"eval_steps_per_second": 0.67, |
|
"eval_wer": 0.14757218456967783, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.12579868733882904, |
|
"eval_runtime": 683.2757, |
|
"eval_samples_per_second": 5.33, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 0.14682944944759074, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.884559181685338e-05, |
|
"loss": 0.1954, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 0.12557055056095123, |
|
"eval_runtime": 690.701, |
|
"eval_samples_per_second": 5.273, |
|
"eval_steps_per_second": 0.66, |
|
"eval_wer": 0.14645808188654721, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.12530682981014252, |
|
"eval_runtime": 692.3328, |
|
"eval_samples_per_second": 5.26, |
|
"eval_steps_per_second": 0.659, |
|
"eval_wer": 0.1455760839290688, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 8312, |
|
"total_flos": 1.5580571693960135e+19, |
|
"train_loss": 0.7944976037459608, |
|
"train_runtime": 46739.0877, |
|
"train_samples_per_second": 0.711, |
|
"train_steps_per_second": 0.178 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"total_flos": 1.5580571693960135e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|