|
{ |
|
"best_metric": 0.49287769198417664, |
|
"best_model_checkpoint": "./vit-lr-cosine-restarts/checkpoint-642", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 3852, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.734107971191406, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 0.8832, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7999306518723994, |
|
"eval_f1": 0.7874950822007427, |
|
"eval_loss": 0.5542528629302979, |
|
"eval_precision": 0.8092011027858591, |
|
"eval_recall": 0.7999306518723994, |
|
"eval_runtime": 37.6681, |
|
"eval_samples_per_second": 76.563, |
|
"eval_steps_per_second": 9.584, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.56775426864624, |
|
"learning_rate": 8.311688311688312e-05, |
|
"loss": 0.4647, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8262829403606102, |
|
"eval_f1": 0.8158028786365703, |
|
"eval_loss": 0.49287769198417664, |
|
"eval_precision": 0.8255125137698959, |
|
"eval_recall": 0.8262829403606102, |
|
"eval_runtime": 37.8104, |
|
"eval_samples_per_second": 76.275, |
|
"eval_steps_per_second": 9.548, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.852621078491211, |
|
"learning_rate": 9.171100544010777e-05, |
|
"loss": 0.3465, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8214285714285714, |
|
"eval_f1": 0.8025208154787117, |
|
"eval_loss": 0.6518540978431702, |
|
"eval_precision": 0.8133669627117774, |
|
"eval_recall": 0.8214285714285714, |
|
"eval_runtime": 37.046, |
|
"eval_samples_per_second": 77.849, |
|
"eval_steps_per_second": 9.745, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 9.967479705810547, |
|
"learning_rate": 5.025483282385923e-05, |
|
"loss": 0.1493, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8401525658807212, |
|
"eval_f1": 0.8265360989719667, |
|
"eval_loss": 0.6805647611618042, |
|
"eval_precision": 0.8385311261178104, |
|
"eval_recall": 0.8401525658807212, |
|
"eval_runtime": 37.4024, |
|
"eval_samples_per_second": 77.107, |
|
"eval_steps_per_second": 9.652, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.2563266158103943, |
|
"learning_rate": 8.572203615936175e-06, |
|
"loss": 0.0211, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8762135922330098, |
|
"eval_f1": 0.8742202382058613, |
|
"eval_loss": 0.5605077743530273, |
|
"eval_precision": 0.8749370661972383, |
|
"eval_recall": 0.8762135922330098, |
|
"eval_runtime": 37.3835, |
|
"eval_samples_per_second": 77.146, |
|
"eval_steps_per_second": 9.657, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.11774127185344696, |
|
"learning_rate": 9.629571142901386e-05, |
|
"loss": 0.0084, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8283633841886269, |
|
"eval_f1": 0.8089134462154245, |
|
"eval_loss": 0.9771777391433716, |
|
"eval_precision": 0.8382740264211974, |
|
"eval_recall": 0.8283633841886269, |
|
"eval_runtime": 36.767, |
|
"eval_samples_per_second": 78.44, |
|
"eval_steps_per_second": 9.819, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 8.762543678283691, |
|
"learning_rate": 6.002305101515785e-05, |
|
"loss": 0.1058, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8581830790568654, |
|
"eval_f1": 0.8533407001043691, |
|
"eval_loss": 0.6204692721366882, |
|
"eval_precision": 0.8542132893520025, |
|
"eval_recall": 0.8581830790568654, |
|
"eval_runtime": 37.6784, |
|
"eval_samples_per_second": 76.543, |
|
"eval_steps_per_second": 9.581, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.16941487789154053, |
|
"learning_rate": 1.484342980756105e-05, |
|
"loss": 0.0171, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671983356449375, |
|
"eval_f1": 0.8617834655772643, |
|
"eval_loss": 0.7407661080360413, |
|
"eval_precision": 0.8622466117266067, |
|
"eval_recall": 0.8671983356449375, |
|
"eval_runtime": 37.6949, |
|
"eval_samples_per_second": 76.509, |
|
"eval_steps_per_second": 9.577, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.5445165634155273, |
|
"learning_rate": 9.909438769446831e-05, |
|
"loss": 0.0029, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8564493758668515, |
|
"eval_f1": 0.8473856298855159, |
|
"eval_loss": 0.9039825797080994, |
|
"eval_precision": 0.8486485221307185, |
|
"eval_recall": 0.8564493758668515, |
|
"eval_runtime": 37.879, |
|
"eval_samples_per_second": 76.137, |
|
"eval_steps_per_second": 9.53, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.17326785624027252, |
|
"learning_rate": 6.94045926336125e-05, |
|
"loss": 0.1232, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.829750346740638, |
|
"eval_f1": 0.8157566455477707, |
|
"eval_loss": 0.9558870792388916, |
|
"eval_precision": 0.84362582840677, |
|
"eval_recall": 0.829750346740638, |
|
"eval_runtime": 37.6559, |
|
"eval_samples_per_second": 76.588, |
|
"eval_steps_per_second": 9.587, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.0039984057657420635, |
|
"learning_rate": 2.2470951807159123e-05, |
|
"loss": 0.0233, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8564493758668515, |
|
"eval_f1": 0.857178695277239, |
|
"eval_loss": 0.7810959219932556, |
|
"eval_precision": 0.8607102524458344, |
|
"eval_recall": 0.8564493758668515, |
|
"eval_runtime": 37.2622, |
|
"eval_samples_per_second": 77.397, |
|
"eval_steps_per_second": 9.688, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.003946017008274794, |
|
"learning_rate": 9.351378430000336e-10, |
|
"loss": 0.0009, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8626907073509015, |
|
"eval_f1": 0.859450727771903, |
|
"eval_loss": 0.799753725528717, |
|
"eval_precision": 0.8599141248108649, |
|
"eval_recall": 0.8626907073509015, |
|
"eval_runtime": 37.2156, |
|
"eval_samples_per_second": 77.494, |
|
"eval_steps_per_second": 9.7, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 3852, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_loss": 0.17886438471413105, |
|
"train_runtime": 1744.4117, |
|
"train_samples_per_second": 35.276, |
|
"train_steps_per_second": 2.208 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3852, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|