|
{ |
|
"best_metric": 0.6773178026129387, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-14292", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 14292, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5482791066169739, |
|
"learning_rate": 9.642076934385773e-06, |
|
"loss": 0.5031, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666929009916575, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.49551922082901, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 23.2895, |
|
"eval_samples_per_second": 545.567, |
|
"eval_steps_per_second": 17.089, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0544217824935913, |
|
"learning_rate": 8.436817317587551e-06, |
|
"loss": 0.4941, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6666929009916575, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.48784464597702026, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 23.5003, |
|
"eval_samples_per_second": 540.673, |
|
"eval_steps_per_second": 16.936, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.3678545951843262, |
|
"learning_rate": 7.23155770078933e-06, |
|
"loss": 0.4903, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6667716039666299, |
|
"eval_f1": 0.003295668549905838, |
|
"eval_loss": 0.48430460691452026, |
|
"eval_mcc": 0.013927887759262332, |
|
"eval_precision": 0.5384615384615384, |
|
"eval_recall": 0.001652892561983471, |
|
"eval_runtime": 23.4354, |
|
"eval_samples_per_second": 542.171, |
|
"eval_steps_per_second": 16.983, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.989233136177063, |
|
"learning_rate": 6.026298083991109e-06, |
|
"loss": 0.4879, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6692900991657484, |
|
"eval_f1": 0.05742485419470615, |
|
"eval_loss": 0.4830881953239441, |
|
"eval_mcc": 0.0682424873004379, |
|
"eval_precision": 0.5739910313901345, |
|
"eval_recall": 0.030224321133412044, |
|
"eval_runtime": 23.418, |
|
"eval_samples_per_second": 542.574, |
|
"eval_steps_per_second": 16.995, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5889469385147095, |
|
"learning_rate": 4.821038467192886e-06, |
|
"loss": 0.4867, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.669211396190776, |
|
"eval_f1": 0.049310110834652796, |
|
"eval_loss": 0.48194119334220886, |
|
"eval_mcc": 0.0653430061639141, |
|
"eval_precision": 0.5860215053763441, |
|
"eval_recall": 0.025737898465171192, |
|
"eval_runtime": 23.4842, |
|
"eval_samples_per_second": 541.044, |
|
"eval_steps_per_second": 16.948, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.164549708366394, |
|
"learning_rate": 3.615778850394665e-06, |
|
"loss": 0.4851, |
|
"step": 9528 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6764520698882418, |
|
"eval_f1": 0.1451445206903722, |
|
"eval_loss": 0.48082929849624634, |
|
"eval_mcc": 0.12675760316839801, |
|
"eval_precision": 0.60801393728223, |
|
"eval_recall": 0.08240850059031878, |
|
"eval_runtime": 23.4779, |
|
"eval_samples_per_second": 541.19, |
|
"eval_steps_per_second": 16.952, |
|
"step": 9528 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5342378616333008, |
|
"learning_rate": 2.410519233596443e-06, |
|
"loss": 0.4833, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6767668817881316, |
|
"eval_f1": 0.1523219814241486, |
|
"eval_loss": 0.4799216091632843, |
|
"eval_mcc": 0.12939159899915856, |
|
"eval_precision": 0.6049180327868853, |
|
"eval_recall": 0.0871310507674144, |
|
"eval_runtime": 23.2444, |
|
"eval_samples_per_second": 546.625, |
|
"eval_steps_per_second": 17.122, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1848862171173096, |
|
"learning_rate": 1.2052596167982216e-06, |
|
"loss": 0.4837, |
|
"step": 12704 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6771603966629939, |
|
"eval_f1": 0.15527182866556835, |
|
"eval_loss": 0.47950050234794617, |
|
"eval_mcc": 0.13165465344587687, |
|
"eval_precision": 0.607085346215781, |
|
"eval_recall": 0.08902007083825265, |
|
"eval_runtime": 23.1542, |
|
"eval_samples_per_second": 548.757, |
|
"eval_steps_per_second": 17.189, |
|
"step": 12704 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.0717182159423828, |
|
"learning_rate": 0.0, |
|
"loss": 0.4838, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6773178026129387, |
|
"eval_f1": 0.15707236842105263, |
|
"eval_loss": 0.4793970286846161, |
|
"eval_mcc": 0.13265414298730402, |
|
"eval_precision": 0.6073131955484896, |
|
"eval_recall": 0.09020070838252657, |
|
"eval_runtime": 23.3667, |
|
"eval_samples_per_second": 543.764, |
|
"eval_steps_per_second": 17.033, |
|
"step": 14292 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 35180046686700.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7681424055105933, |
|
"learning_rate": 1.0847336551183995e-05, |
|
"num_train_epochs": 9, |
|
"temperature": 16 |
|
} |
|
} |
|
|