|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.208955223880597, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.92749244712991e-05, |
|
"loss": 0.5968, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.5482025742530823, |
|
"eval_matthews_correlation": 0.4044551659076911, |
|
"eval_runtime": 7.0014, |
|
"eval_samples_per_second": 148.97, |
|
"eval_steps_per_second": 4.713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.8066465256797584e-05, |
|
"loss": 0.4748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.7374169826507568, |
|
"eval_matthews_correlation": 0.295093127326412, |
|
"eval_runtime": 4.195, |
|
"eval_samples_per_second": 248.628, |
|
"eval_steps_per_second": 7.866, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.685800604229608e-05, |
|
"loss": 0.4165, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.7154484391212463, |
|
"eval_matthews_correlation": 0.4656950548188858, |
|
"eval_runtime": 4.0204, |
|
"eval_samples_per_second": 259.424, |
|
"eval_steps_per_second": 8.208, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.564954682779456e-05, |
|
"loss": 0.3461, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 0.5880667567253113, |
|
"eval_matthews_correlation": 0.4966579188966687, |
|
"eval_runtime": 4.1325, |
|
"eval_samples_per_second": 252.389, |
|
"eval_steps_per_second": 7.985, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.4441087613293056e-05, |
|
"loss": 0.2666, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.6024960875511169, |
|
"eval_matthews_correlation": 0.5243351914860497, |
|
"eval_runtime": 4.2714, |
|
"eval_samples_per_second": 244.185, |
|
"eval_steps_per_second": 7.726, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 3.323262839879154e-05, |
|
"loss": 0.233, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.7757831811904907, |
|
"eval_matthews_correlation": 0.5180330680945964, |
|
"eval_runtime": 5.4027, |
|
"eval_samples_per_second": 193.052, |
|
"eval_steps_per_second": 6.108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.2024169184290035e-05, |
|
"loss": 0.2067, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 0.8261585831642151, |
|
"eval_matthews_correlation": 0.4812305179480984, |
|
"eval_runtime": 4.5778, |
|
"eval_samples_per_second": 227.84, |
|
"eval_steps_per_second": 7.209, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.081570996978852e-05, |
|
"loss": 0.174, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 1.0030035972595215, |
|
"eval_matthews_correlation": 0.4690871215881841, |
|
"eval_runtime": 4.1699, |
|
"eval_samples_per_second": 250.124, |
|
"eval_steps_per_second": 7.914, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.9607250755287014e-05, |
|
"loss": 0.1504, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 1.007808804512024, |
|
"eval_matthews_correlation": 0.505928717066083, |
|
"eval_runtime": 5.5718, |
|
"eval_samples_per_second": 187.193, |
|
"eval_steps_per_second": 5.923, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 2.83987915407855e-05, |
|
"loss": 0.1261, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_loss": 1.0862607955932617, |
|
"eval_matthews_correlation": 0.4929950873963667, |
|
"eval_runtime": 4.1938, |
|
"eval_samples_per_second": 248.698, |
|
"eval_steps_per_second": 7.869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 2.719033232628399e-05, |
|
"loss": 0.1141, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"eval_loss": 0.9651995301246643, |
|
"eval_matthews_correlation": 0.5632084517291658, |
|
"eval_runtime": 4.2997, |
|
"eval_samples_per_second": 242.575, |
|
"eval_steps_per_second": 7.675, |
|
"step": 2200 |
|
} |
|
], |
|
"max_steps": 6700, |
|
"num_train_epochs": 25, |
|
"total_flos": 5382361281024000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|