|
{ |
|
"best_metric": 0.9128113879003559, |
|
"best_model_checkpoint": "./SocalDisner-strong/run-1/checkpoint-4430", |
|
"epoch": 7.122186495176849, |
|
"global_step": 4430, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.3346599605377065e-05, |
|
"loss": 0.0902, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_f1": 0.9015075376884422, |
|
"eval_loss": 0.04874825105071068, |
|
"eval_runtime": 13.1719, |
|
"eval_samples_per_second": 189.797, |
|
"eval_steps_per_second": 23.763, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.963685146642776e-05, |
|
"loss": 0.0494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.5909883223667728e-05, |
|
"loss": 0.0284, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_f1": 0.9087016748520338, |
|
"eval_loss": 0.049770649522542953, |
|
"eval_runtime": 13.2365, |
|
"eval_samples_per_second": 188.871, |
|
"eval_steps_per_second": 23.647, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.21829149809077e-05, |
|
"loss": 0.0141, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.845594673814767e-05, |
|
"loss": 0.0075, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_f1": 0.9092276830491474, |
|
"eval_loss": 0.0691143274307251, |
|
"eval_runtime": 13.1876, |
|
"eval_samples_per_second": 189.573, |
|
"eval_steps_per_second": 23.734, |
|
"step": 2658 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.472897849538764e-05, |
|
"loss": 0.0029, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.1002010252627608e-05, |
|
"loss": 0.0024, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_f1": 0.9093625498007968, |
|
"eval_loss": 0.06731252372264862, |
|
"eval_runtime": 13.216, |
|
"eval_samples_per_second": 189.165, |
|
"eval_steps_per_second": 23.683, |
|
"step": 3544 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 7.275042009867579e-06, |
|
"loss": 0.0017, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_f1": 0.9128113879003559, |
|
"eval_loss": 0.07908429950475693, |
|
"eval_runtime": 13.1476, |
|
"eval_samples_per_second": 190.149, |
|
"eval_steps_per_second": 23.807, |
|
"step": 4430 |
|
} |
|
], |
|
"max_steps": 4976, |
|
"num_train_epochs": 8, |
|
"total_flos": 1557691318801926.0, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 1.0695999090544144e-10, |
|
"learning_rate": 3.198484145936658e-05, |
|
"num_train_epochs": 8, |
|
"per_device_eval_batch_size": 8, |
|
"per_device_train_batch_size": 32, |
|
"seed": 326, |
|
"warmup_steps": 685, |
|
"weight_decay": 7.355540557094741e-05 |
|
} |
|
} |
|
|