|
{ |
|
"best_metric": 0.7072979807853699, |
|
"best_model_checkpoint": "sa_BERT_48_mnli/checkpoint-16364", |
|
"epoch": 9.0, |
|
"global_step": 36819, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.9145, |
|
"step": 4091 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6535914416709119, |
|
"eval_loss": 0.8005583882331848, |
|
"eval_runtime": 20.7416, |
|
"eval_samples_per_second": 473.203, |
|
"eval_steps_per_second": 4.966, |
|
"step": 4091 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.7442, |
|
"step": 8182 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6902699949057565, |
|
"eval_loss": 0.7245084047317505, |
|
"eval_runtime": 20.6753, |
|
"eval_samples_per_second": 474.721, |
|
"eval_steps_per_second": 4.982, |
|
"step": 8182 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.6631, |
|
"step": 12273 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6979113601630158, |
|
"eval_loss": 0.7323198914527893, |
|
"eval_runtime": 20.6762, |
|
"eval_samples_per_second": 474.702, |
|
"eval_steps_per_second": 4.982, |
|
"step": 12273 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.5942, |
|
"step": 16364 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7075904228222109, |
|
"eval_loss": 0.7072979807853699, |
|
"eval_runtime": 20.589, |
|
"eval_samples_per_second": 476.712, |
|
"eval_steps_per_second": 5.003, |
|
"step": 16364 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.5241, |
|
"step": 20455 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7015792154865003, |
|
"eval_loss": 0.7474916577339172, |
|
"eval_runtime": 20.6918, |
|
"eval_samples_per_second": 474.342, |
|
"eval_steps_per_second": 4.978, |
|
"step": 20455 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.4526, |
|
"step": 24546 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7088130412633724, |
|
"eval_loss": 0.8377429842948914, |
|
"eval_runtime": 20.7171, |
|
"eval_samples_per_second": 473.764, |
|
"eval_steps_per_second": 4.972, |
|
"step": 24546 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 0.3842, |
|
"step": 28637 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6955680081507896, |
|
"eval_loss": 0.8736042380332947, |
|
"eval_runtime": 20.7409, |
|
"eval_samples_per_second": 473.219, |
|
"eval_steps_per_second": 4.966, |
|
"step": 28637 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.3213, |
|
"step": 32728 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6945491594498217, |
|
"eval_loss": 0.9334085583686829, |
|
"eval_runtime": 20.6303, |
|
"eval_samples_per_second": 475.756, |
|
"eval_steps_per_second": 4.993, |
|
"step": 32728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.28e-05, |
|
"loss": 0.2669, |
|
"step": 36819 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7026999490575649, |
|
"eval_loss": 1.0196205377578735, |
|
"eval_runtime": 20.722, |
|
"eval_samples_per_second": 473.652, |
|
"eval_steps_per_second": 4.971, |
|
"step": 36819 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 36819, |
|
"total_flos": 4.6496322950057165e+17, |
|
"train_loss": 0.5405747011563323, |
|
"train_runtime": 22664.7689, |
|
"train_samples_per_second": 866.327, |
|
"train_steps_per_second": 9.025 |
|
} |
|
], |
|
"max_steps": 204550, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.6496322950057165e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|