|
{ |
|
"best_metric": 0.4374756335282651, |
|
"best_model_checkpoint": "./results/checkpoint-2724", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 2724, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_auc_pr": 0.3741202821573497, |
|
"eval_auc_roc": 0.5344017094017094, |
|
"eval_loss": 0.6598178148269653, |
|
"eval_runtime": 64.1274, |
|
"eval_samples_per_second": 1.575, |
|
"eval_steps_per_second": 0.795, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.729569673538208, |
|
"learning_rate": 3.2701908957415565e-05, |
|
"loss": 0.7137, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_auc_pr": 0.3336610256770358, |
|
"eval_auc_roc": 0.43162393162393164, |
|
"eval_loss": 0.6951384544372559, |
|
"eval_runtime": 63.851, |
|
"eval_samples_per_second": 1.582, |
|
"eval_steps_per_second": 0.799, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 4.023996829986572, |
|
"learning_rate": 2.535976505139501e-05, |
|
"loss": 0.6916, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_auc_pr": 0.3415869066362487, |
|
"eval_auc_roc": 0.4920940170940171, |
|
"eval_loss": 0.653586208820343, |
|
"eval_runtime": 63.8711, |
|
"eval_samples_per_second": 1.581, |
|
"eval_steps_per_second": 0.798, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 3.374420642852783, |
|
"learning_rate": 1.801762114537445e-05, |
|
"loss": 0.7041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_auc_pr": 0.3571268522434573, |
|
"eval_auc_roc": 0.5004273504273504, |
|
"eval_loss": 0.6515412330627441, |
|
"eval_runtime": 63.7749, |
|
"eval_samples_per_second": 1.584, |
|
"eval_steps_per_second": 0.8, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 2.160428285598755, |
|
"learning_rate": 1.0675477239353893e-05, |
|
"loss": 0.6616, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_auc_pr": 0.39159998056113526, |
|
"eval_auc_roc": 0.5651709401709402, |
|
"eval_loss": 0.6515726447105408, |
|
"eval_runtime": 63.9137, |
|
"eval_samples_per_second": 1.58, |
|
"eval_steps_per_second": 0.798, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"grad_norm": 1.967381477355957, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.6673, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_auc_pr": 0.4374756335282651, |
|
"eval_auc_roc": 0.6269230769230769, |
|
"eval_loss": 0.6512559652328491, |
|
"eval_runtime": 63.8923, |
|
"eval_samples_per_second": 1.581, |
|
"eval_steps_per_second": 0.798, |
|
"step": 2724 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2724, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 1.4298345882353664e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|