|
{ |
|
"best_metric": 0.4778192639350891, |
|
"best_model_checkpoint": "./vit-lr-0.0001/checkpoint-642", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 3852, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.167571067810059, |
|
"learning_rate": 9.997548148581541e-05, |
|
"loss": 0.6718, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8242024965325936, |
|
"eval_f1": 0.8162630829948523, |
|
"eval_loss": 0.4934074282646179, |
|
"eval_precision": 0.8254417232097437, |
|
"eval_recall": 0.8242024965325936, |
|
"eval_runtime": 37.6292, |
|
"eval_samples_per_second": 76.643, |
|
"eval_steps_per_second": 9.594, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.953319072723389, |
|
"learning_rate": 9.990164344447551e-05, |
|
"loss": 0.4045, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8311373092926491, |
|
"eval_f1": 0.8268544179848872, |
|
"eval_loss": 0.4778192639350891, |
|
"eval_precision": 0.8433264041291242, |
|
"eval_recall": 0.8311373092926491, |
|
"eval_runtime": 37.5244, |
|
"eval_samples_per_second": 76.857, |
|
"eval_steps_per_second": 9.62, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.354413986206055, |
|
"learning_rate": 9.977855850575433e-05, |
|
"loss": 0.2419, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.840499306518724, |
|
"eval_f1": 0.8409562025706393, |
|
"eval_loss": 0.5132780075073242, |
|
"eval_precision": 0.8524873378932092, |
|
"eval_recall": 0.840499306518724, |
|
"eval_runtime": 36.8758, |
|
"eval_samples_per_second": 78.209, |
|
"eval_steps_per_second": 9.79, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.17630255222320557, |
|
"learning_rate": 9.960634813962617e-05, |
|
"loss": 0.1267, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8495145631067961, |
|
"eval_f1": 0.8447806876150558, |
|
"eval_loss": 0.6153807044029236, |
|
"eval_precision": 0.8490665425851771, |
|
"eval_recall": 0.8495145631067961, |
|
"eval_runtime": 37.1161, |
|
"eval_samples_per_second": 77.702, |
|
"eval_steps_per_second": 9.726, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.5389394760131836, |
|
"learning_rate": 9.938518229693118e-05, |
|
"loss": 0.0733, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8422330097087378, |
|
"eval_f1": 0.8361023278787632, |
|
"eval_loss": 0.7844527959823608, |
|
"eval_precision": 0.8420613182520028, |
|
"eval_recall": 0.8422330097087378, |
|
"eval_runtime": 37.3092, |
|
"eval_samples_per_second": 77.3, |
|
"eval_steps_per_second": 9.676, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 24.989282608032227, |
|
"learning_rate": 9.911527924165445e-05, |
|
"loss": 0.0446, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8470873786407767, |
|
"eval_f1": 0.840751584936853, |
|
"eval_loss": 0.8766722679138184, |
|
"eval_precision": 0.8407202080357687, |
|
"eval_recall": 0.8470873786407767, |
|
"eval_runtime": 37.9466, |
|
"eval_samples_per_second": 76.002, |
|
"eval_steps_per_second": 9.513, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.16091497242450714, |
|
"learning_rate": 9.879690533552573e-05, |
|
"loss": 0.0523, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.84500693481276, |
|
"eval_f1": 0.8406651419661996, |
|
"eval_loss": 0.8674203753471375, |
|
"eval_precision": 0.8491745633887275, |
|
"eval_recall": 0.84500693481276, |
|
"eval_runtime": 37.6505, |
|
"eval_samples_per_second": 76.599, |
|
"eval_steps_per_second": 9.588, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0028496491722762585, |
|
"learning_rate": 9.843159102999166e-05, |
|
"loss": 0.0388, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8398058252427184, |
|
"eval_f1": 0.8387317635385871, |
|
"eval_loss": 0.9753792881965637, |
|
"eval_precision": 0.8565844319804308, |
|
"eval_recall": 0.8398058252427184, |
|
"eval_runtime": 37.8044, |
|
"eval_samples_per_second": 76.287, |
|
"eval_steps_per_second": 9.549, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.0019187598954886198, |
|
"learning_rate": 9.801741382013225e-05, |
|
"loss": 0.0402, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8491678224687933, |
|
"eval_f1": 0.8461066193854649, |
|
"eval_loss": 0.9369620084762573, |
|
"eval_precision": 0.8547133286785668, |
|
"eval_recall": 0.8491678224687933, |
|
"eval_runtime": 37.5745, |
|
"eval_samples_per_second": 76.754, |
|
"eval_steps_per_second": 9.608, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.00408038217574358, |
|
"learning_rate": 9.755584922004499e-05, |
|
"loss": 0.0283, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8509015256588072, |
|
"eval_f1": 0.8482666902922076, |
|
"eval_loss": 0.9217829704284668, |
|
"eval_precision": 0.8495520865912046, |
|
"eval_recall": 0.8509015256588072, |
|
"eval_runtime": 37.6867, |
|
"eval_samples_per_second": 76.526, |
|
"eval_steps_per_second": 9.579, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3583085536956787, |
|
"learning_rate": 9.70473527382648e-05, |
|
"loss": 0.0451, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8474341192787794, |
|
"eval_f1": 0.8401062715209746, |
|
"eval_loss": 0.987198531627655, |
|
"eval_precision": 0.8400167652295873, |
|
"eval_recall": 0.8474341192787794, |
|
"eval_runtime": 37.5611, |
|
"eval_samples_per_second": 76.782, |
|
"eval_steps_per_second": 9.611, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.217573165893555, |
|
"learning_rate": 9.649242619942769e-05, |
|
"loss": 0.0549, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8457004160887656, |
|
"eval_f1": 0.8415779150905001, |
|
"eval_loss": 1.0202850103378296, |
|
"eval_precision": 0.8487681253829549, |
|
"eval_recall": 0.8457004160887656, |
|
"eval_runtime": 37.4542, |
|
"eval_samples_per_second": 77.001, |
|
"eval_steps_per_second": 9.638, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 3852, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_loss": 0.15186076372211968, |
|
"train_runtime": 1747.1919, |
|
"train_samples_per_second": 293.5, |
|
"train_steps_per_second": 18.372 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|