|
{ |
|
"best_metric": 0.4735751152038574, |
|
"best_model_checkpoint": "./vit-lr-cosine-warmup/checkpoint-963", |
|
"epoch": 13.0, |
|
"eval_steps": 500, |
|
"global_step": 4173, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.814958572387695, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 0.86, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8141470180305131, |
|
"eval_f1": 0.8010598763076213, |
|
"eval_loss": 0.5250416994094849, |
|
"eval_precision": 0.8100096575743447, |
|
"eval_recall": 0.8141470180305131, |
|
"eval_runtime": 36.0914, |
|
"eval_samples_per_second": 79.908, |
|
"eval_steps_per_second": 10.002, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.845722198486328, |
|
"learning_rate": 8.311688311688312e-05, |
|
"loss": 0.4517, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8221220527045769, |
|
"eval_f1": 0.8099830282273331, |
|
"eval_loss": 0.5117006897926331, |
|
"eval_precision": 0.8347375649374938, |
|
"eval_recall": 0.8221220527045769, |
|
"eval_runtime": 37.2073, |
|
"eval_samples_per_second": 77.512, |
|
"eval_steps_per_second": 9.702, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5204238891601562, |
|
"learning_rate": 9.985334621908699e-05, |
|
"loss": 0.3512, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8394590846047156, |
|
"eval_f1": 0.830758624078281, |
|
"eval_loss": 0.4735751152038574, |
|
"eval_precision": 0.8318317467279469, |
|
"eval_recall": 0.8394590846047156, |
|
"eval_runtime": 35.8524, |
|
"eval_samples_per_second": 80.441, |
|
"eval_steps_per_second": 10.069, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.594597816467285, |
|
"learning_rate": 9.894936461151184e-05, |
|
"loss": 0.2184, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8567961165048543, |
|
"eval_f1": 0.85051019948, |
|
"eval_loss": 0.4796653389930725, |
|
"eval_precision": 0.8536361493542505, |
|
"eval_recall": 0.8567961165048543, |
|
"eval_runtime": 35.3028, |
|
"eval_samples_per_second": 81.693, |
|
"eval_steps_per_second": 10.226, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 9.165299415588379, |
|
"learning_rate": 9.723506398349735e-05, |
|
"loss": 0.1264, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8547156726768377, |
|
"eval_f1": 0.8530254035056796, |
|
"eval_loss": 0.6211732029914856, |
|
"eval_precision": 0.8551837556766221, |
|
"eval_recall": 0.8547156726768377, |
|
"eval_runtime": 36.6331, |
|
"eval_samples_per_second": 78.727, |
|
"eval_steps_per_second": 9.854, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.9397739768028259, |
|
"learning_rate": 9.473882326123909e-05, |
|
"loss": 0.0687, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8463938973647711, |
|
"eval_f1": 0.840249522586874, |
|
"eval_loss": 0.7659199237823486, |
|
"eval_precision": 0.8475689441425499, |
|
"eval_recall": 0.8463938973647711, |
|
"eval_runtime": 35.8316, |
|
"eval_samples_per_second": 80.488, |
|
"eval_steps_per_second": 10.075, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 11.412993431091309, |
|
"learning_rate": 9.15019657867844e-05, |
|
"loss": 0.0463, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8519417475728155, |
|
"eval_f1": 0.84690540461018, |
|
"eval_loss": 0.8237490057945251, |
|
"eval_precision": 0.8546320390871954, |
|
"eval_recall": 0.8519417475728155, |
|
"eval_runtime": 36.3237, |
|
"eval_samples_per_second": 79.397, |
|
"eval_steps_per_second": 9.938, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.15119314193725586, |
|
"learning_rate": 8.759130166350091e-05, |
|
"loss": 0.0373, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8377253814147018, |
|
"eval_f1": 0.8414971604167042, |
|
"eval_loss": 0.871150553226471, |
|
"eval_precision": 0.8492780112281874, |
|
"eval_recall": 0.8377253814147018, |
|
"eval_runtime": 37.0971, |
|
"eval_samples_per_second": 77.742, |
|
"eval_steps_per_second": 9.731, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.9838098883628845, |
|
"learning_rate": 8.304716115113689e-05, |
|
"loss": 0.0347, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8567961165048543, |
|
"eval_f1": 0.8533749018674412, |
|
"eval_loss": 0.8180708885192871, |
|
"eval_precision": 0.8549859977362129, |
|
"eval_recall": 0.8567961165048543, |
|
"eval_runtime": 36.43, |
|
"eval_samples_per_second": 79.166, |
|
"eval_steps_per_second": 9.909, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.4911880493164062, |
|
"learning_rate": 7.795595034552552e-05, |
|
"loss": 0.0263, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8446601941747572, |
|
"eval_f1": 0.8308350673322552, |
|
"eval_loss": 1.0705382823944092, |
|
"eval_precision": 0.8388632159592988, |
|
"eval_recall": 0.8446601941747572, |
|
"eval_runtime": 36.2711, |
|
"eval_samples_per_second": 79.512, |
|
"eval_steps_per_second": 9.953, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.003689270233735442, |
|
"learning_rate": 7.240195031927308e-05, |
|
"loss": 0.0289, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.858876560332871, |
|
"eval_f1": 0.855018220816544, |
|
"eval_loss": 0.9376017451286316, |
|
"eval_precision": 0.8605983316828895, |
|
"eval_recall": 0.858876560332871, |
|
"eval_runtime": 36.7783, |
|
"eval_samples_per_second": 78.416, |
|
"eval_steps_per_second": 9.816, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.08966358751058578, |
|
"learning_rate": 6.647710326399964e-05, |
|
"loss": 0.0164, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.863384188626907, |
|
"eval_f1": 0.8610588511525862, |
|
"eval_loss": 0.9714025259017944, |
|
"eval_precision": 0.8611342448885915, |
|
"eval_recall": 0.863384188626907, |
|
"eval_runtime": 36.4831, |
|
"eval_samples_per_second": 79.05, |
|
"eval_steps_per_second": 9.895, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.05049363151192665, |
|
"learning_rate": 6.027949045818934e-05, |
|
"loss": 0.0077, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8398058252427184, |
|
"eval_f1": 0.8242916893123671, |
|
"eval_loss": 1.2992373704910278, |
|
"eval_precision": 0.8395816522197255, |
|
"eval_recall": 0.8398058252427184, |
|
"eval_runtime": 37.0798, |
|
"eval_samples_per_second": 77.778, |
|
"eval_steps_per_second": 9.736, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 4173, |
|
"total_flos": 5.166157498470679e+18, |
|
"train_loss": 0.1749291451406399, |
|
"train_runtime": 1863.5469, |
|
"train_samples_per_second": 275.174, |
|
"train_steps_per_second": 17.225 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 5.166157498470679e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|