roberta-large-ca-paraphrase / trainer_state.json
Blanca's picture
model
31e0fb1
{
"best_metric": 0.8438365477338476,
"best_model_checkpoint": "output/roberta-large-ca-v2/parafraseja.py_8_0.00005_date_22-10-14_time_16-03-07/checkpoint-4995",
"epoch": 5.0,
"global_step": 4995,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4e-05,
"loss": 0.5335,
"step": 999
},
{
"epoch": 1.0,
"eval_accuracy": 0.836,
"eval_combined_score": 0.8360819180819181,
"eval_f1": 0.8361638361638362,
"eval_loss": 0.39040425419807434,
"eval_runtime": 58.4758,
"eval_samples_per_second": 34.202,
"eval_steps_per_second": 2.138,
"step": 999
},
{
"epoch": 2.0,
"learning_rate": 3e-05,
"loss": 0.3403,
"step": 1998
},
{
"epoch": 2.0,
"eval_accuracy": 0.832,
"eval_combined_score": 0.8342083739045765,
"eval_f1": 0.8364167478091529,
"eval_loss": 0.4153313934803009,
"eval_runtime": 58.4308,
"eval_samples_per_second": 34.229,
"eval_steps_per_second": 2.139,
"step": 1998
},
{
"epoch": 3.0,
"learning_rate": 2e-05,
"loss": 0.2345,
"step": 2997
},
{
"epoch": 3.0,
"eval_accuracy": 0.8405,
"eval_combined_score": 0.8427098930481283,
"eval_f1": 0.8449197860962566,
"eval_loss": 0.4383120834827423,
"eval_runtime": 58.4693,
"eval_samples_per_second": 34.206,
"eval_steps_per_second": 2.138,
"step": 2997
},
{
"epoch": 4.0,
"learning_rate": 1e-05,
"loss": 0.1644,
"step": 3996
},
{
"epoch": 4.0,
"eval_accuracy": 0.836,
"eval_combined_score": 0.8401272554605888,
"eval_f1": 0.8442545109211776,
"eval_loss": 0.6573591828346252,
"eval_runtime": 58.3674,
"eval_samples_per_second": 34.266,
"eval_steps_per_second": 2.142,
"step": 3996
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 0.1097,
"step": 4995
},
{
"epoch": 5.0,
"eval_accuracy": 0.841,
"eval_combined_score": 0.8438365477338476,
"eval_f1": 0.8466730954676953,
"eval_loss": 0.7086848616600037,
"eval_runtime": 58.3516,
"eval_samples_per_second": 34.275,
"eval_steps_per_second": 2.142,
"step": 4995
},
{
"epoch": 5.0,
"step": 4995,
"total_flos": 7.447995455717376e+16,
"train_loss": 0.27648499787629427,
"train_runtime": 7079.06,
"train_samples_per_second": 11.29,
"train_steps_per_second": 0.706
}
],
"max_steps": 4995,
"num_train_epochs": 5,
"total_flos": 7.447995455717376e+16,
"trial_name": null,
"trial_params": null
}