|
{ |
|
"best_metric": 0.8307240704500979, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-5362", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 5362, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.658814907073975, |
|
"learning_rate": 5.312164802258909e-05, |
|
"loss": 0.5144, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7847358121330724, |
|
"eval_f1": 0.8053097345132744, |
|
"eval_loss": 0.4449388384819031, |
|
"eval_precision": 0.7350565428109854, |
|
"eval_recall": 0.8904109589041096, |
|
"eval_runtime": 131.9263, |
|
"eval_samples_per_second": 7.747, |
|
"eval_steps_per_second": 1.94, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 21.609981536865234, |
|
"learning_rate": 4.648144201976546e-05, |
|
"loss": 0.4571, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.815068493150685, |
|
"eval_f1": 0.8298829882988299, |
|
"eval_loss": 0.4288274347782135, |
|
"eval_precision": 0.7683333333333333, |
|
"eval_recall": 0.9021526418786693, |
|
"eval_runtime": 132.7828, |
|
"eval_samples_per_second": 7.697, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.596930027008057, |
|
"learning_rate": 3.984123601694182e-05, |
|
"loss": 0.444, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.812133072407045, |
|
"eval_f1": 0.8218923933209648, |
|
"eval_loss": 0.4297088086605072, |
|
"eval_precision": 0.781305114638448, |
|
"eval_recall": 0.8669275929549902, |
|
"eval_runtime": 133.7052, |
|
"eval_samples_per_second": 7.644, |
|
"eval_steps_per_second": 1.915, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.766616344451904, |
|
"learning_rate": 3.320103001411819e-05, |
|
"loss": 0.4335, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8160469667318982, |
|
"eval_f1": 0.8278388278388279, |
|
"eval_loss": 0.43032753467559814, |
|
"eval_precision": 0.7779690189328744, |
|
"eval_recall": 0.8845401174168297, |
|
"eval_runtime": 132.4524, |
|
"eval_samples_per_second": 7.716, |
|
"eval_steps_per_second": 1.933, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 18.048282623291016, |
|
"learning_rate": 2.6560824011294545e-05, |
|
"loss": 0.4137, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8350230414746544, |
|
"eval_loss": 0.4249579608440399, |
|
"eval_precision": 0.789198606271777, |
|
"eval_recall": 0.8864970645792564, |
|
"eval_runtime": 130.8699, |
|
"eval_samples_per_second": 7.809, |
|
"eval_steps_per_second": 1.956, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 19.333023071289062, |
|
"learning_rate": 1.992061800847091e-05, |
|
"loss": 0.4157, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8180039138943248, |
|
"eval_f1": 0.8351063829787234, |
|
"eval_loss": 0.43369239568710327, |
|
"eval_precision": 0.7633711507293355, |
|
"eval_recall": 0.9217221135029354, |
|
"eval_runtime": 131.2119, |
|
"eval_samples_per_second": 7.789, |
|
"eval_steps_per_second": 1.951, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.5701804161071777, |
|
"learning_rate": 1.3280412005647272e-05, |
|
"loss": 0.4096, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8464951197870452, |
|
"eval_loss": 0.4228073060512543, |
|
"eval_precision": 0.7743506493506493, |
|
"eval_recall": 0.9334637964774951, |
|
"eval_runtime": 131.6336, |
|
"eval_samples_per_second": 7.764, |
|
"eval_steps_per_second": 1.945, |
|
"step": 5362 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6894, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 1649866380960.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7939954008608587, |
|
"learning_rate": 5.976185402541273e-05, |
|
"num_train_epochs": 9, |
|
"temperature": 18 |
|
} |
|
} |
|
|