|
{ |
|
"best_metric": 0.7282186150550842, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_sst2_128/checkpoint-2635", |
|
"epoch": 10.0, |
|
"global_step": 5270, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.5487, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5779816513761468, |
|
"eval_loss": 1.3928688764572144, |
|
"eval_runtime": 1.4162, |
|
"eval_samples_per_second": 615.727, |
|
"eval_steps_per_second": 4.943, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.3629, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5504587155963303, |
|
"eval_loss": 1.4979432821273804, |
|
"eval_runtime": 1.4117, |
|
"eval_samples_per_second": 617.704, |
|
"eval_steps_per_second": 4.959, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.1397, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6754587155963303, |
|
"eval_loss": 1.3926728963851929, |
|
"eval_runtime": 1.4106, |
|
"eval_samples_per_second": 618.195, |
|
"eval_steps_per_second": 4.963, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.5649, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.7289367318153381, |
|
"eval_runtime": 1.4117, |
|
"eval_samples_per_second": 617.688, |
|
"eval_steps_per_second": 4.959, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4112, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.7282186150550842, |
|
"eval_runtime": 1.5054, |
|
"eval_samples_per_second": 579.245, |
|
"eval_steps_per_second": 4.65, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3462, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.805045871559633, |
|
"eval_loss": 0.7653937339782715, |
|
"eval_runtime": 1.4197, |
|
"eval_samples_per_second": 614.218, |
|
"eval_steps_per_second": 4.931, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3069, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7970183486238532, |
|
"eval_loss": 0.8302631974220276, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 616.903, |
|
"eval_steps_per_second": 4.952, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2833, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7924311926605505, |
|
"eval_loss": 0.880594789981842, |
|
"eval_runtime": 1.4122, |
|
"eval_samples_per_second": 617.467, |
|
"eval_steps_per_second": 4.957, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.2662, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7958715596330275, |
|
"eval_loss": 0.9296879768371582, |
|
"eval_runtime": 1.4088, |
|
"eval_samples_per_second": 618.966, |
|
"eval_steps_per_second": 4.969, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2521, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7717889908256881, |
|
"eval_loss": 1.097933292388916, |
|
"eval_runtime": 1.4221, |
|
"eval_samples_per_second": 613.19, |
|
"eval_steps_per_second": 4.922, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5270, |
|
"total_flos": 1.607798324461568e+16, |
|
"train_loss": 0.6482070778081268, |
|
"train_runtime": 2637.7784, |
|
"train_samples_per_second": 1276.624, |
|
"train_steps_per_second": 9.989 |
|
} |
|
], |
|
"max_steps": 26350, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.607798324461568e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|