|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.9937106918239, |
|
"global_step": 3950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.849999999999999e-06, |
|
"loss": 18.5969, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.849999999999999e-06, |
|
"loss": 4.2811, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.485e-05, |
|
"loss": 3.9192, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.9849999999999998e-05, |
|
"loss": 2.7895, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 2.4849999999999998e-05, |
|
"loss": 2.142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.985e-05, |
|
"loss": 1.9161, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 3.485e-05, |
|
"loss": 1.7888, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 3.984999999999999e-05, |
|
"loss": 1.7494, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.484999999999999e-05, |
|
"loss": 1.7129, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 4.984999999999999e-05, |
|
"loss": 1.7019, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_cer": 0.2589387701435627, |
|
"eval_loss": 1.051009178161621, |
|
"eval_runtime": 302.233, |
|
"eval_samples_per_second": 14.777, |
|
"eval_steps_per_second": 1.85, |
|
"eval_wer": 0.9832177220854778, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 5.484999999999999e-05, |
|
"loss": 1.6797, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 5.985e-05, |
|
"loss": 1.6992, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 6.484999999999999e-05, |
|
"loss": 1.703, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 6.984999999999999e-05, |
|
"loss": 1.7304, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 7.484999999999999e-05, |
|
"loss": 1.7436, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 7.203061224489795e-05, |
|
"loss": 1.7564, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 6.896938775510203e-05, |
|
"loss": 1.6982, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 6.590816326530612e-05, |
|
"loss": 1.7184, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"learning_rate": 6.28469387755102e-05, |
|
"loss": 1.692, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 25.31, |
|
"learning_rate": 5.978571428571428e-05, |
|
"loss": 1.6385, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.31, |
|
"eval_cer": 0.18507013781031975, |
|
"eval_loss": 0.6670215129852295, |
|
"eval_runtime": 300.7027, |
|
"eval_samples_per_second": 14.852, |
|
"eval_steps_per_second": 1.859, |
|
"eval_wer": 0.9914969791899754, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 26.58, |
|
"learning_rate": 5.6724489795918356e-05, |
|
"loss": 1.6092, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 5.3663265306122446e-05, |
|
"loss": 1.6209, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"learning_rate": 5.060204081632652e-05, |
|
"loss": 1.5933, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 30.38, |
|
"learning_rate": 4.754081632653061e-05, |
|
"loss": 1.5673, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 31.64, |
|
"learning_rate": 4.447959183673469e-05, |
|
"loss": 1.5518, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.91, |
|
"learning_rate": 4.141836734693877e-05, |
|
"loss": 1.5398, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 34.18, |
|
"learning_rate": 3.8357142857142855e-05, |
|
"loss": 1.5095, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 35.44, |
|
"learning_rate": 3.529591836734693e-05, |
|
"loss": 1.508, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"learning_rate": 3.2234693877551015e-05, |
|
"loss": 1.4769, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"learning_rate": 2.9173469387755098e-05, |
|
"loss": 1.4344, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_cer": 0.17974508788797672, |
|
"eval_loss": 0.6182843446731567, |
|
"eval_runtime": 301.8085, |
|
"eval_samples_per_second": 14.797, |
|
"eval_steps_per_second": 1.852, |
|
"eval_wer": 1.0212575520250615, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"learning_rate": 2.611224489795918e-05, |
|
"loss": 1.4331, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 40.5, |
|
"learning_rate": 2.3051020408163264e-05, |
|
"loss": 1.4042, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"learning_rate": 2.0020408163265303e-05, |
|
"loss": 1.3822, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"learning_rate": 1.6959183673469386e-05, |
|
"loss": 1.3768, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 44.3, |
|
"learning_rate": 1.3897959183673468e-05, |
|
"loss": 1.3447, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"learning_rate": 1.0836734693877551e-05, |
|
"loss": 1.3359, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"learning_rate": 7.77551020408163e-06, |
|
"loss": 1.3113, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 48.1, |
|
"learning_rate": 4.714285714285714e-06, |
|
"loss": 1.3053, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 49.36, |
|
"learning_rate": 1.6530612244897958e-06, |
|
"loss": 1.2802, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"step": 3950, |
|
"total_flos": 2.372869475210465e+20, |
|
"train_loss": 2.1743111564539657, |
|
"train_runtime": 44274.9554, |
|
"train_samples_per_second": 11.465, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"max_steps": 3950, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.372869475210465e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|