|
{ |
|
"best_metric": 0.8038990825688074, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-2635", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 3162, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.212067127227783, |
|
"learning_rate": 1.7783082729779372e-05, |
|
"loss": 1.8496, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7717889908256881, |
|
"eval_loss": 1.3957021236419678, |
|
"eval_runtime": 2.8127, |
|
"eval_samples_per_second": 310.024, |
|
"eval_steps_per_second": 2.489, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4233214981823416e-05, |
|
"loss": 1.2959, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7924311926605505, |
|
"eval_loss": 1.2182828187942505, |
|
"eval_runtime": 2.8191, |
|
"eval_samples_per_second": 309.323, |
|
"eval_steps_per_second": 2.483, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 32.567195892333984, |
|
"learning_rate": 1.067659843586754e-05, |
|
"loss": 1.0858, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7958715596330275, |
|
"eval_loss": 1.1549702882766724, |
|
"eval_runtime": 2.8186, |
|
"eval_samples_per_second": 309.375, |
|
"eval_steps_per_second": 2.484, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 19.192340850830078, |
|
"learning_rate": 7.119981889911666e-06, |
|
"loss": 0.9641, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.801605504587156, |
|
"eval_loss": 1.103049874305725, |
|
"eval_runtime": 2.81, |
|
"eval_samples_per_second": 310.323, |
|
"eval_steps_per_second": 2.491, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 11.936226844787598, |
|
"learning_rate": 3.563365343955791e-06, |
|
"loss": 0.9032, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8038990825688074, |
|
"eval_loss": 1.0780220031738281, |
|
"eval_runtime": 2.8278, |
|
"eval_samples_per_second": 308.366, |
|
"eval_steps_per_second": 2.475, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 28.104488372802734, |
|
"learning_rate": 6.748797999916271e-09, |
|
"loss": 0.8596, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8038990825688074, |
|
"eval_loss": 1.0743507146835327, |
|
"eval_runtime": 2.8183, |
|
"eval_samples_per_second": 309.402, |
|
"eval_steps_per_second": 2.484, |
|
"step": 3162 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3162, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 48527917525620.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6704103438400755, |
|
"learning_rate": 2.1339699275735247e-05, |
|
"num_train_epochs": 6, |
|
"temperature": 15 |
|
} |
|
} |
|
|