nstrn-mo's picture
Training in progress, epoch 1
d7703e8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 747,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.9472099821124733,
"eval_f1": 0.5949084412684235,
"eval_loss": 0.25175559520721436,
"eval_precision": 0.5244094488188976,
"eval_recall": 0.6873065015479877,
"eval_runtime": 2.6615,
"eval_samples_per_second": 318.997,
"eval_steps_per_second": 40.203,
"step": 249
},
{
"epoch": 2.0,
"eval_accuracy": 0.9473844945682998,
"eval_f1": 0.5956873315363881,
"eval_loss": 0.2594073712825775,
"eval_precision": 0.5274463007159904,
"eval_recall": 0.6842105263157895,
"eval_runtime": 2.6621,
"eval_samples_per_second": 318.925,
"eval_steps_per_second": 40.194,
"step": 498
},
{
"epoch": 2.0080321285140563,
"grad_norm": 0.15671595931053162,
"learning_rate": 5.926980687011047e-07,
"loss": 0.0134,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9474281226822564,
"eval_f1": 0.5947888589398024,
"eval_loss": 0.2625073194503784,
"eval_precision": 0.5266507557677009,
"eval_recall": 0.6831785345717234,
"eval_runtime": 3.0901,
"eval_samples_per_second": 274.748,
"eval_steps_per_second": 34.627,
"step": 747
}
],
"logging_steps": 500,
"max_steps": 747,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 134021308141506.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": {
"learning_rate": 1.7924917300393731e-06,
"weight_decay": 7.731086067996299e-05
}
}