|
{ |
|
"best_metric": 1.0906689167022705, |
|
"best_model_checkpoint": "./results/checkpoint-2699", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 10796, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.6147003173828125, |
|
"learning_rate": 4.672437244961049e-05, |
|
"loss": 0.327, |
|
"step": 2699 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7347354138398915, |
|
"eval_conf_mat": [ |
|
[ |
|
6123, |
|
717 |
|
], |
|
[ |
|
2802, |
|
3624 |
|
] |
|
], |
|
"eval_f1": 0.673168013374199, |
|
"eval_loss": 1.0906689167022705, |
|
"eval_precision": 0.8348306841741534, |
|
"eval_recall": 0.5639589169000934, |
|
"eval_runtime": 50.9369, |
|
"eval_samples_per_second": 260.44, |
|
"eval_steps_per_second": 8.147, |
|
"step": 2699 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.085843563079834, |
|
"learning_rate": 4.338691727463831e-05, |
|
"loss": 0.14, |
|
"step": 5398 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7100105532941354, |
|
"eval_conf_mat": [ |
|
[ |
|
6405, |
|
435 |
|
], |
|
[ |
|
3412, |
|
3014 |
|
] |
|
], |
|
"eval_f1": 0.6104303797468354, |
|
"eval_loss": 1.5652275085449219, |
|
"eval_precision": 0.8738764859379531, |
|
"eval_recall": 0.46903205726735137, |
|
"eval_runtime": 51.0274, |
|
"eval_samples_per_second": 259.978, |
|
"eval_steps_per_second": 8.133, |
|
"step": 5398 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.21763277053833, |
|
"learning_rate": 4.004946209966614e-05, |
|
"loss": 0.0959, |
|
"step": 8097 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7397105382180009, |
|
"eval_conf_mat": [ |
|
[ |
|
6322, |
|
518 |
|
], |
|
[ |
|
2935, |
|
3491 |
|
] |
|
], |
|
"eval_f1": 0.6690943938667945, |
|
"eval_loss": 1.292863130569458, |
|
"eval_precision": 0.8707907208780244, |
|
"eval_recall": 0.5432617491441021, |
|
"eval_runtime": 49.9344, |
|
"eval_samples_per_second": 265.669, |
|
"eval_steps_per_second": 8.311, |
|
"step": 8097 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 18.75651741027832, |
|
"learning_rate": 3.6712006924693956e-05, |
|
"loss": 0.0676, |
|
"step": 10796 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7308156188753203, |
|
"eval_conf_mat": [ |
|
[ |
|
6358, |
|
482 |
|
], |
|
[ |
|
3089, |
|
3337 |
|
] |
|
], |
|
"eval_f1": 0.6514397266959493, |
|
"eval_loss": 1.5299923419952393, |
|
"eval_precision": 0.8737889499869076, |
|
"eval_recall": 0.5192966075319017, |
|
"eval_runtime": 51.18, |
|
"eval_samples_per_second": 259.203, |
|
"eval_steps_per_second": 8.109, |
|
"step": 10796 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40485, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 4.192333290845046e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|