|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.361930294906166, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.758632242679596, |
|
"eval_loss": 0.4715859889984131, |
|
"eval_runtime": 9.5006, |
|
"eval_samples_per_second": 313.981, |
|
"eval_steps_per_second": 19.683, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.3404825737265416, |
|
"grad_norm": 9.62618637084961, |
|
"learning_rate": 1.4776902224208135e-05, |
|
"loss": 0.5101, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7727120518684387, |
|
"eval_loss": 0.47308608889579773, |
|
"eval_runtime": 9.4715, |
|
"eval_samples_per_second": 314.944, |
|
"eval_steps_per_second": 19.743, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.680965147453083, |
|
"grad_norm": 17.83710289001465, |
|
"learning_rate": 1.2489456059470035e-05, |
|
"loss": 0.2485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7743881940841675, |
|
"eval_loss": 0.7006397843360901, |
|
"eval_runtime": 9.4464, |
|
"eval_samples_per_second": 315.781, |
|
"eval_steps_per_second": 19.796, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7750586867332458, |
|
"eval_loss": 0.8257986903190613, |
|
"eval_runtime": 9.4942, |
|
"eval_samples_per_second": 314.193, |
|
"eval_steps_per_second": 19.696, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 4.021447721179625, |
|
"grad_norm": 0.4933696985244751, |
|
"learning_rate": 1.0202009894731932e-05, |
|
"loss": 0.0969, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.782433807849884, |
|
"eval_loss": 0.9035575985908508, |
|
"eval_runtime": 9.4688, |
|
"eval_samples_per_second": 315.034, |
|
"eval_steps_per_second": 19.749, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 5.361930294906166, |
|
"grad_norm": 15.212503433227539, |
|
"learning_rate": 7.91456372999383e-06, |
|
"loss": 0.0396, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3730, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.1737297309933476e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.7064348388946237e-05, |
|
"per_device_train_batch_size": 32 |
|
} |
|
} |
|
|