|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4367816091954024, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7599999999999997e-05, |
|
"loss": 10.9066, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.6399999999999995e-05, |
|
"loss": 7.3263, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_cer": 0.9998702550415184, |
|
"eval_loss": 5.000309467315674, |
|
"eval_runtime": 31.7393, |
|
"eval_samples_per_second": 14.399, |
|
"eval_steps_per_second": 0.914, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.639999999999999e-05, |
|
"loss": 3.4967, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001164, |
|
"loss": 3.1464, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_cer": 0.9998702550415184, |
|
"eval_loss": 3.5314910411834717, |
|
"eval_runtime": 31.3912, |
|
"eval_samples_per_second": 14.558, |
|
"eval_steps_per_second": 0.924, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014639999999999998, |
|
"loss": 3.1238, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017639999999999998, |
|
"loss": 3.1152, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_cer": 0.9998702550415184, |
|
"eval_loss": 3.6040682792663574, |
|
"eval_runtime": 31.267, |
|
"eval_samples_per_second": 14.616, |
|
"eval_steps_per_second": 0.927, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00020639999999999998, |
|
"loss": 3.088, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002364, |
|
"loss": 3.0626, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_cer": 0.9743290332147093, |
|
"eval_loss": 3.385751247406006, |
|
"eval_runtime": 31.1188, |
|
"eval_samples_per_second": 14.686, |
|
"eval_steps_per_second": 0.932, |
|
"eval_wer": 0.9992471013401596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00026639999999999997, |
|
"loss": 3.032, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002964, |
|
"loss": 2.3387, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_cer": 0.3497553380782918, |
|
"eval_loss": 1.502172589302063, |
|
"eval_runtime": 31.3947, |
|
"eval_samples_per_second": 14.557, |
|
"eval_steps_per_second": 0.924, |
|
"eval_wer": 1.0904984189128144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00029168765743073046, |
|
"loss": 1.1185, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002822418136020151, |
|
"loss": 0.7737, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_cer": 0.21517274614472123, |
|
"eval_loss": 0.6938613057136536, |
|
"eval_runtime": 31.1722, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 0.93, |
|
"eval_wer": 0.872910706218943, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00027279596977329974, |
|
"loss": 0.647, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00026335012594458433, |
|
"loss": 0.5643, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_cer": 0.15467452550415184, |
|
"eval_loss": 0.4780799448490143, |
|
"eval_runtime": 30.8612, |
|
"eval_samples_per_second": 14.808, |
|
"eval_steps_per_second": 0.94, |
|
"eval_wer": 0.7375395271796417, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00025390428211586897, |
|
"loss": 0.5188, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0002444584382871536, |
|
"loss": 0.4762, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_cer": 0.1396055753262159, |
|
"eval_loss": 0.41990911960601807, |
|
"eval_runtime": 31.306, |
|
"eval_samples_per_second": 14.598, |
|
"eval_steps_per_second": 0.926, |
|
"eval_wer": 0.6905586508056015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00023501259445843828, |
|
"loss": 0.4614, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002255667506297229, |
|
"loss": 0.4371, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_cer": 0.12713152431791222, |
|
"eval_loss": 0.382140189409256, |
|
"eval_runtime": 31.4276, |
|
"eval_samples_per_second": 14.541, |
|
"eval_steps_per_second": 0.923, |
|
"eval_wer": 0.6390603824725192, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00021612090680100753, |
|
"loss": 0.4259, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00020667506297229217, |
|
"loss": 0.4138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_cer": 0.12171930604982206, |
|
"eval_loss": 0.36281564831733704, |
|
"eval_runtime": 31.2629, |
|
"eval_samples_per_second": 14.618, |
|
"eval_steps_per_second": 0.928, |
|
"eval_wer": 0.6143653064297545, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 2088, |
|
"num_train_epochs": 3, |
|
"total_flos": 8.198176189032398e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|