|
{ |
|
"best_metric": 1.0985817909240723, |
|
"best_model_checkpoint": "add_BERT_48_mnli/checkpoint-18408", |
|
"epoch": 11.0, |
|
"global_step": 33748, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 1.1006, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.33010697911360165, |
|
"eval_loss": 1.0990883111953735, |
|
"eval_runtime": 18.8354, |
|
"eval_samples_per_second": 521.094, |
|
"eval_steps_per_second": 4.088, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 1.0989, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0987157821655273, |
|
"eval_runtime": 18.8101, |
|
"eval_samples_per_second": 521.794, |
|
"eval_steps_per_second": 4.094, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 1.0988, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.098842978477478, |
|
"eval_runtime": 18.8979, |
|
"eval_samples_per_second": 519.37, |
|
"eval_steps_per_second": 4.075, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 1.0986, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0985958576202393, |
|
"eval_runtime": 18.9455, |
|
"eval_samples_per_second": 518.064, |
|
"eval_steps_per_second": 4.064, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.0987, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0986104011535645, |
|
"eval_runtime": 18.85, |
|
"eval_samples_per_second": 520.689, |
|
"eval_steps_per_second": 4.085, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 1.0994, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0985817909240723, |
|
"eval_runtime": 18.8392, |
|
"eval_samples_per_second": 520.989, |
|
"eval_steps_per_second": 4.087, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 1.0986, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0985937118530273, |
|
"eval_runtime": 18.8521, |
|
"eval_samples_per_second": 520.632, |
|
"eval_steps_per_second": 4.084, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 1.0986, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098621129989624, |
|
"eval_runtime": 18.8177, |
|
"eval_samples_per_second": 521.584, |
|
"eval_steps_per_second": 4.092, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.28e-05, |
|
"loss": 1.0986, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986088514328003, |
|
"eval_runtime": 18.8232, |
|
"eval_samples_per_second": 521.43, |
|
"eval_steps_per_second": 4.091, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.0986, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986006259918213, |
|
"eval_runtime": 18.8354, |
|
"eval_samples_per_second": 521.094, |
|
"eval_steps_per_second": 4.088, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.1200000000000006e-05, |
|
"loss": 1.0986, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0986229181289673, |
|
"eval_runtime": 18.817, |
|
"eval_samples_per_second": 521.602, |
|
"eval_steps_per_second": 4.092, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 33748, |
|
"total_flos": 6.169649224246886e+17, |
|
"train_loss": 1.0989104475867089, |
|
"train_runtime": 25503.8517, |
|
"train_samples_per_second": 769.888, |
|
"train_steps_per_second": 6.015 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.169649224246886e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|