|
{ |
|
"best_metric": 0.7544031311154599, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-744", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 744, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4286664128303528, |
|
"learning_rate": 1.945409831472016e-05, |
|
"loss": 0.4806, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.4703535735607147, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 28.42, |
|
"eval_samples_per_second": 35.961, |
|
"eval_steps_per_second": 1.126, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.94158935546875, |
|
"learning_rate": 1.7022336025380143e-05, |
|
"loss": 0.4648, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5176125244618396, |
|
"eval_f1": 0.10849909584086799, |
|
"eval_loss": 0.4538751244544983, |
|
"eval_precision": 0.7142857142857143, |
|
"eval_recall": 0.05870841487279843, |
|
"eval_runtime": 28.3351, |
|
"eval_samples_per_second": 36.068, |
|
"eval_steps_per_second": 1.129, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.0048439502716064, |
|
"learning_rate": 1.459057373604012e-05, |
|
"loss": 0.4406, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5929549902152642, |
|
"eval_f1": 0.3677811550151976, |
|
"eval_loss": 0.4149659276008606, |
|
"eval_precision": 0.8231292517006803, |
|
"eval_recall": 0.23679060665362034, |
|
"eval_runtime": 28.3925, |
|
"eval_samples_per_second": 35.995, |
|
"eval_steps_per_second": 1.127, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.1001150608062744, |
|
"learning_rate": 1.2158811446700102e-05, |
|
"loss": 0.4126, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7025440313111546, |
|
"eval_f1": 0.6456876456876457, |
|
"eval_loss": 0.39196181297302246, |
|
"eval_precision": 0.7982708933717579, |
|
"eval_recall": 0.5420743639921722, |
|
"eval_runtime": 28.4943, |
|
"eval_samples_per_second": 35.867, |
|
"eval_steps_per_second": 1.123, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.2996047735214233, |
|
"learning_rate": 9.72704915736008e-06, |
|
"loss": 0.4021, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.735812133072407, |
|
"eval_f1": 0.7133757961783439, |
|
"eval_loss": 0.3851335346698761, |
|
"eval_precision": 0.7795823665893271, |
|
"eval_recall": 0.6575342465753424, |
|
"eval_runtime": 28.8344, |
|
"eval_samples_per_second": 35.444, |
|
"eval_steps_per_second": 1.11, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.8318911790847778, |
|
"learning_rate": 7.29528686802006e-06, |
|
"loss": 0.3976, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7524461839530333, |
|
"eval_f1": 0.7394438722966015, |
|
"eval_loss": 0.3816056251525879, |
|
"eval_precision": 0.7804347826086957, |
|
"eval_recall": 0.7025440313111546, |
|
"eval_runtime": 28.629, |
|
"eval_samples_per_second": 35.698, |
|
"eval_steps_per_second": 1.118, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5164391994476318, |
|
"learning_rate": 4.86352457868004e-06, |
|
"loss": 0.3934, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7504892367906066, |
|
"eval_f1": 0.7357512953367875, |
|
"eval_loss": 0.37981584668159485, |
|
"eval_precision": 0.7819383259911894, |
|
"eval_recall": 0.6947162426614482, |
|
"eval_runtime": 29.4404, |
|
"eval_samples_per_second": 34.714, |
|
"eval_steps_per_second": 1.087, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.8273214101791382, |
|
"learning_rate": 2.43176228934002e-06, |
|
"loss": 0.3903, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7544031311154599, |
|
"eval_f1": 0.7462082912032356, |
|
"eval_loss": 0.3790663480758667, |
|
"eval_precision": 0.7719665271966527, |
|
"eval_recall": 0.7221135029354208, |
|
"eval_runtime": 28.4026, |
|
"eval_samples_per_second": 35.983, |
|
"eval_steps_per_second": 1.127, |
|
"step": 744 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 837, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 1885561578240.0, |
|
"train_batch_size": 33, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6122687021783514, |
|
"learning_rate": 2.188586060406018e-05, |
|
"num_train_epochs": 9, |
|
"per_device_train_batch_size": 33, |
|
"temperature": 14 |
|
} |
|
} |
|
|