|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 22.0, |
|
"eval_steps": 500, |
|
"global_step": 2354, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5887850467289719, |
|
"eval_loss": 1.2691198587417603, |
|
"eval_runtime": 4.8417, |
|
"eval_samples_per_second": 44.199, |
|
"eval_steps_per_second": 5.577, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6214953271028038, |
|
"eval_loss": 1.0963133573532104, |
|
"eval_runtime": 5.0021, |
|
"eval_samples_per_second": 42.782, |
|
"eval_steps_per_second": 5.398, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6308411214953271, |
|
"eval_loss": 0.860569179058075, |
|
"eval_runtime": 4.6891, |
|
"eval_samples_per_second": 45.638, |
|
"eval_steps_per_second": 5.758, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7990654205607477, |
|
"eval_loss": 0.672334611415863, |
|
"eval_runtime": 4.717, |
|
"eval_samples_per_second": 45.368, |
|
"eval_steps_per_second": 5.724, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 4.0654205607476636e-05, |
|
"loss": 1.1331, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8457943925233645, |
|
"eval_loss": 0.48274144530296326, |
|
"eval_runtime": 4.6309, |
|
"eval_samples_per_second": 46.212, |
|
"eval_steps_per_second": 5.83, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8551401869158879, |
|
"eval_loss": 0.35962656140327454, |
|
"eval_runtime": 4.7112, |
|
"eval_samples_per_second": 45.424, |
|
"eval_steps_per_second": 5.731, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8925233644859814, |
|
"eval_loss": 0.26292115449905396, |
|
"eval_runtime": 5.4422, |
|
"eval_samples_per_second": 39.322, |
|
"eval_steps_per_second": 4.961, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9158878504672897, |
|
"eval_loss": 0.22579917311668396, |
|
"eval_runtime": 4.8554, |
|
"eval_samples_per_second": 44.074, |
|
"eval_steps_per_second": 5.561, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9158878504672897, |
|
"eval_loss": 0.19785191118717194, |
|
"eval_runtime": 4.9901, |
|
"eval_samples_per_second": 42.885, |
|
"eval_steps_per_second": 5.411, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.130841121495327e-05, |
|
"loss": 0.6031, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9719626168224299, |
|
"eval_loss": 0.16760671138763428, |
|
"eval_runtime": 4.8143, |
|
"eval_samples_per_second": 44.451, |
|
"eval_steps_per_second": 5.608, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.1423913985490799, |
|
"eval_runtime": 4.7195, |
|
"eval_samples_per_second": 45.343, |
|
"eval_steps_per_second": 5.721, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.985981308411215, |
|
"eval_loss": 0.12256418913602829, |
|
"eval_runtime": 5.1046, |
|
"eval_samples_per_second": 41.923, |
|
"eval_steps_per_second": 5.289, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9953271028037384, |
|
"eval_loss": 0.11292136460542679, |
|
"eval_runtime": 4.8426, |
|
"eval_samples_per_second": 44.191, |
|
"eval_steps_per_second": 5.576, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9906542056074766, |
|
"eval_loss": 0.106930673122406, |
|
"eval_runtime": 4.9097, |
|
"eval_samples_per_second": 43.587, |
|
"eval_steps_per_second": 5.499, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.196261682242991e-05, |
|
"loss": 0.4317, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9953271028037384, |
|
"eval_loss": 0.09224073588848114, |
|
"eval_runtime": 4.854, |
|
"eval_samples_per_second": 44.087, |
|
"eval_steps_per_second": 5.562, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9953271028037384, |
|
"eval_loss": 0.08621260523796082, |
|
"eval_runtime": 4.7695, |
|
"eval_samples_per_second": 44.868, |
|
"eval_steps_per_second": 5.661, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07688089460134506, |
|
"eval_runtime": 4.8878, |
|
"eval_samples_per_second": 43.782, |
|
"eval_steps_per_second": 5.524, |
|
"step": 1819 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.061450209468603134, |
|
"eval_runtime": 4.7171, |
|
"eval_samples_per_second": 45.367, |
|
"eval_steps_per_second": 5.724, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 1.2616822429906542e-05, |
|
"loss": 0.3584, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0667119175195694, |
|
"eval_runtime": 4.8093, |
|
"eval_samples_per_second": 44.498, |
|
"eval_steps_per_second": 5.614, |
|
"step": 2033 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9953271028037384, |
|
"eval_loss": 0.05547282472252846, |
|
"eval_runtime": 4.7518, |
|
"eval_samples_per_second": 45.036, |
|
"eval_steps_per_second": 5.682, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.054027605801820755, |
|
"eval_runtime": 4.7251, |
|
"eval_samples_per_second": 45.29, |
|
"eval_steps_per_second": 5.714, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04496881738305092, |
|
"eval_runtime": 4.6636, |
|
"eval_samples_per_second": 45.887, |
|
"eval_steps_per_second": 5.79, |
|
"step": 2354 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2675, |
|
"num_train_epochs": 25, |
|
"save_steps": 200, |
|
"total_flos": 3.9904029741839155e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|