|
{ |
|
"best_metric": 0.6487034559249878, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_qnli/checkpoint-1638", |
|
"epoch": 7.0, |
|
"global_step": 5733, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6754, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6177924217462932, |
|
"eval_loss": 0.6491163372993469, |
|
"eval_runtime": 8.8255, |
|
"eval_samples_per_second": 619.001, |
|
"eval_steps_per_second": 4.872, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6369, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6093721398498994, |
|
"eval_loss": 0.6487034559249878, |
|
"eval_runtime": 8.8229, |
|
"eval_samples_per_second": 619.186, |
|
"eval_steps_per_second": 4.874, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7001221001221e-05, |
|
"loss": 0.6125, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6088229910305692, |
|
"eval_loss": 0.6555132269859314, |
|
"eval_runtime": 8.8047, |
|
"eval_samples_per_second": 620.464, |
|
"eval_steps_per_second": 4.884, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.6001221001221e-05, |
|
"loss": 0.5942, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6027823540179389, |
|
"eval_loss": 0.6647323369979858, |
|
"eval_runtime": 8.7921, |
|
"eval_samples_per_second": 621.356, |
|
"eval_steps_per_second": 4.891, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5001221001221004e-05, |
|
"loss": 0.5805, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5934468240893283, |
|
"eval_loss": 0.6735221147537231, |
|
"eval_runtime": 8.6724, |
|
"eval_samples_per_second": 629.928, |
|
"eval_steps_per_second": 4.958, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.400244200244201e-05, |
|
"loss": 0.5689, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5978400146439685, |
|
"eval_loss": 0.6893225312232971, |
|
"eval_runtime": 8.8418, |
|
"eval_samples_per_second": 617.861, |
|
"eval_steps_per_second": 4.863, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3002442002442004e-05, |
|
"loss": 0.5587, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.589602782354018, |
|
"eval_loss": 0.7054734230041504, |
|
"eval_runtime": 8.8696, |
|
"eval_samples_per_second": 615.924, |
|
"eval_steps_per_second": 4.848, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5733, |
|
"total_flos": 2.2988974586855424e+16, |
|
"train_loss": 0.603873611681752, |
|
"train_runtime": 4016.5926, |
|
"train_samples_per_second": 1303.879, |
|
"train_steps_per_second": 10.195 |
|
} |
|
], |
|
"max_steps": 40950, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.2988974586855424e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|