|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.3640661938534278, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.892434988179669e-05, |
|
"loss": 9.0256, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.774231678486998e-05, |
|
"loss": 5.1429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.656028368794326e-05, |
|
"loss": 4.6719, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.537825059101655e-05, |
|
"loss": 4.3761, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.419621749408984e-05, |
|
"loss": 4.1596, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.301418439716312e-05, |
|
"loss": 3.9129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1832151300236414e-05, |
|
"loss": 3.7629, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.0650118203309695e-05, |
|
"loss": 3.5985, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.946808510638298e-05, |
|
"loss": 3.3403, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.828605200945627e-05, |
|
"loss": 3.103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_cer": 11.597362920083615, |
|
"eval_loss": 8.085211753845215, |
|
"eval_runtime": 146.0243, |
|
"eval_samples_per_second": 5.143, |
|
"eval_steps_per_second": 0.322, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.710401891252955e-05, |
|
"loss": 2.9059, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.592198581560284e-05, |
|
"loss": 2.7383, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.473995271867612e-05, |
|
"loss": 2.5918, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.3557919621749415e-05, |
|
"loss": 2.4015, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.2375886524822696e-05, |
|
"loss": 2.2971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.119385342789598e-05, |
|
"loss": 2.0906, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.0011820330969272e-05, |
|
"loss": 1.7917, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.8829787234042553e-05, |
|
"loss": 1.4973, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.7647754137115837e-05, |
|
"loss": 1.3962, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.646572104018913e-05, |
|
"loss": 1.2535, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_cer": 0.49043254542530956, |
|
"eval_loss": 2.039987325668335, |
|
"eval_runtime": 35.6188, |
|
"eval_samples_per_second": 21.084, |
|
"eval_steps_per_second": 1.32, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 4230, |
|
"num_train_epochs": 5, |
|
"total_flos": 5.790864009434628e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|