|
{ |
|
"best_metric": 0.3441176414489746, |
|
"best_model_checkpoint": "distilbert_sa_GLUE_Experiment_logit_kd_wnli_96/checkpoint-15", |
|
"epoch": 10.0, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.348, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.345082551240921, |
|
"eval_runtime": 0.113, |
|
"eval_samples_per_second": 628.265, |
|
"eval_steps_per_second": 8.849, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.3477, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3447287976741791, |
|
"eval_runtime": 0.1177, |
|
"eval_samples_per_second": 603.052, |
|
"eval_steps_per_second": 8.494, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.3467, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3444860577583313, |
|
"eval_runtime": 0.1121, |
|
"eval_samples_per_second": 633.302, |
|
"eval_steps_per_second": 8.92, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.3473, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34416645765304565, |
|
"eval_runtime": 0.1108, |
|
"eval_samples_per_second": 641.052, |
|
"eval_steps_per_second": 9.029, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3474, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3441176414489746, |
|
"eval_runtime": 0.1102, |
|
"eval_samples_per_second": 644.292, |
|
"eval_steps_per_second": 9.075, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3476, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34430181980133057, |
|
"eval_runtime": 0.1109, |
|
"eval_samples_per_second": 640.012, |
|
"eval_steps_per_second": 9.014, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3477, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3445564806461334, |
|
"eval_runtime": 0.1111, |
|
"eval_samples_per_second": 638.78, |
|
"eval_steps_per_second": 8.997, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.347, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3449000120162964, |
|
"eval_runtime": 0.111, |
|
"eval_samples_per_second": 639.483, |
|
"eval_steps_per_second": 9.007, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.3477, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34510910511016846, |
|
"eval_runtime": 0.1348, |
|
"eval_samples_per_second": 526.598, |
|
"eval_steps_per_second": 7.417, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3472, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3452550768852234, |
|
"eval_runtime": 0.1099, |
|
"eval_samples_per_second": 645.899, |
|
"eval_steps_per_second": 9.097, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 30, |
|
"total_flos": 36999717191680.0, |
|
"train_loss": 0.34743192195892336, |
|
"train_runtime": 34.4199, |
|
"train_samples_per_second": 922.431, |
|
"train_steps_per_second": 4.358 |
|
} |
|
], |
|
"max_steps": 150, |
|
"num_train_epochs": 50, |
|
"total_flos": 36999717191680.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|