|
{ |
|
"best_metric": 0.8944954128440367, |
|
"best_model_checkpoint": "distilbert-base-uncased-finetuned-sst2/run-1/checkpoint-4210", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4210, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1187648456057007, |
|
"grad_norm": 10.556523323059082, |
|
"learning_rate": 3.3640213484135133e-06, |
|
"loss": 0.4628, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2375296912114014, |
|
"grad_norm": 27.427276611328125, |
|
"learning_rate": 3.261083118657898e-06, |
|
"loss": 0.3038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35629453681710216, |
|
"grad_norm": 10.17439079284668, |
|
"learning_rate": 3.1581448889022824e-06, |
|
"loss": 0.2837, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4750593824228028, |
|
"grad_norm": 13.1768159866333, |
|
"learning_rate": 3.0552066591466673e-06, |
|
"loss": 0.2588, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5938242280285035, |
|
"grad_norm": 7.38680362701416, |
|
"learning_rate": 2.952268429391051e-06, |
|
"loss": 0.2554, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7125890736342043, |
|
"grad_norm": 13.285799980163574, |
|
"learning_rate": 2.849330199635436e-06, |
|
"loss": 0.2433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.831353919239905, |
|
"grad_norm": 10.867403984069824, |
|
"learning_rate": 2.7463919698798204e-06, |
|
"loss": 0.2491, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9501187648456056, |
|
"grad_norm": 8.815483093261719, |
|
"learning_rate": 2.643453740124205e-06, |
|
"loss": 0.232, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8944954128440367, |
|
"eval_loss": 0.2890743613243103, |
|
"eval_runtime": 1.2151, |
|
"eval_samples_per_second": 717.616, |
|
"eval_steps_per_second": 45.262, |
|
"step": 4210 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16840, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 582173519062080.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.466959578169129e-06, |
|
"num_train_epochs": 4, |
|
"per_device_train_batch_size": 16, |
|
"seed": 22 |
|
} |
|
} |
|
|