|
{ |
|
"best_metric": 0.511895477771759, |
|
"best_model_checkpoint": "./vit-beta2-0.995/checkpoint-2889", |
|
"epoch": 19.0, |
|
"eval_steps": 500, |
|
"global_step": 6099, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 30.705322265625, |
|
"learning_rate": 1.8234275822273514e-05, |
|
"loss": 1.7709, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7038834951456311, |
|
"eval_f1": 0.6317206539589807, |
|
"eval_loss": 0.9409339427947998, |
|
"eval_precision": 0.6600572679415466, |
|
"eval_recall": 0.7038834951456311, |
|
"eval_runtime": 22.6099, |
|
"eval_samples_per_second": 127.555, |
|
"eval_steps_per_second": 15.966, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 38.1316032409668, |
|
"learning_rate": 3.675706866705136e-05, |
|
"loss": 1.1633, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7371705963938974, |
|
"eval_f1": 0.6969962032458188, |
|
"eval_loss": 0.7317262887954712, |
|
"eval_precision": 0.7193055769194622, |
|
"eval_recall": 0.7371705963938974, |
|
"eval_runtime": 22.4171, |
|
"eval_samples_per_second": 128.652, |
|
"eval_steps_per_second": 16.104, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.181236267089844, |
|
"learning_rate": 5.52798615118292e-05, |
|
"loss": 1.0429, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7624826629680999, |
|
"eval_f1": 0.7239854360368981, |
|
"eval_loss": 0.6350400447845459, |
|
"eval_precision": 0.7357005373068777, |
|
"eval_recall": 0.7624826629680999, |
|
"eval_runtime": 22.621, |
|
"eval_samples_per_second": 127.492, |
|
"eval_steps_per_second": 15.959, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 36.46014404296875, |
|
"learning_rate": 7.380265435660705e-05, |
|
"loss": 0.9649, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7694174757281553, |
|
"eval_f1": 0.78081186335, |
|
"eval_loss": 0.5759614706039429, |
|
"eval_precision": 0.803793715931661, |
|
"eval_recall": 0.7694174757281553, |
|
"eval_runtime": 22.7229, |
|
"eval_samples_per_second": 126.92, |
|
"eval_steps_per_second": 15.887, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 8.231016159057617, |
|
"learning_rate": 9.232544720138489e-05, |
|
"loss": 0.9051, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7669902912621359, |
|
"eval_f1": 0.77317554088632, |
|
"eval_loss": 0.6440545320510864, |
|
"eval_precision": 0.7941009873640001, |
|
"eval_recall": 0.7669902912621359, |
|
"eval_runtime": 22.7099, |
|
"eval_samples_per_second": 126.993, |
|
"eval_steps_per_second": 15.896, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 7.757928848266602, |
|
"learning_rate": 9.984876489938473e-05, |
|
"loss": 0.9826, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7850208044382802, |
|
"eval_f1": 0.7892025324025785, |
|
"eval_loss": 0.5661589503288269, |
|
"eval_precision": 0.7956162433005612, |
|
"eval_recall": 0.7850208044382802, |
|
"eval_runtime": 22.6328, |
|
"eval_samples_per_second": 127.425, |
|
"eval_steps_per_second": 15.95, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 11.347550392150879, |
|
"learning_rate": 9.889494151200358e-05, |
|
"loss": 0.8855, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7264216366158114, |
|
"eval_f1": 0.7458569130340702, |
|
"eval_loss": 0.6881958246231079, |
|
"eval_precision": 0.7937071962372872, |
|
"eval_recall": 0.7264216366158114, |
|
"eval_runtime": 22.7206, |
|
"eval_samples_per_second": 126.933, |
|
"eval_steps_per_second": 15.889, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.9686567783355713, |
|
"learning_rate": 9.707962612088379e-05, |
|
"loss": 0.789, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7364771151178918, |
|
"eval_f1": 0.7563577225180272, |
|
"eval_loss": 0.6491453051567078, |
|
"eval_precision": 0.8089248672404397, |
|
"eval_recall": 0.7364771151178918, |
|
"eval_runtime": 22.3088, |
|
"eval_samples_per_second": 129.277, |
|
"eval_steps_per_second": 16.182, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.504317760467529, |
|
"learning_rate": 9.443480321450928e-05, |
|
"loss": 0.7192, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8075589459084604, |
|
"eval_f1": 0.8098127959154342, |
|
"eval_loss": 0.511895477771759, |
|
"eval_precision": 0.8207082348032128, |
|
"eval_recall": 0.8075589459084604, |
|
"eval_runtime": 22.6645, |
|
"eval_samples_per_second": 127.247, |
|
"eval_steps_per_second": 15.928, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 12.737798690795898, |
|
"learning_rate": 9.100707257835249e-05, |
|
"loss": 0.7012, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.79750346740638, |
|
"eval_f1": 0.807690598198484, |
|
"eval_loss": 0.5413523316383362, |
|
"eval_precision": 0.8340905013524049, |
|
"eval_recall": 0.79750346740638, |
|
"eval_runtime": 22.8925, |
|
"eval_samples_per_second": 125.98, |
|
"eval_steps_per_second": 15.769, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 33.7946891784668, |
|
"learning_rate": 8.685682824178951e-05, |
|
"loss": 0.6376, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7947295423023578, |
|
"eval_f1": 0.8066140477733247, |
|
"eval_loss": 0.5712208151817322, |
|
"eval_precision": 0.8331612477677219, |
|
"eval_recall": 0.7947295423023578, |
|
"eval_runtime": 22.3907, |
|
"eval_samples_per_second": 128.804, |
|
"eval_steps_per_second": 16.123, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.9330718517303467, |
|
"learning_rate": 8.205719438083829e-05, |
|
"loss": 0.5412, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8058252427184466, |
|
"eval_f1": 0.8145494176055466, |
|
"eval_loss": 0.5660970211029053, |
|
"eval_precision": 0.8327663750154051, |
|
"eval_recall": 0.8058252427184466, |
|
"eval_runtime": 23.2238, |
|
"eval_samples_per_second": 124.183, |
|
"eval_steps_per_second": 15.544, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 15.180066108703613, |
|
"learning_rate": 7.669273692531118e-05, |
|
"loss": 0.4667, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.819001386962552, |
|
"eval_f1": 0.8179067223293143, |
|
"eval_loss": 0.6374972462654114, |
|
"eval_precision": 0.8409999751324982, |
|
"eval_recall": 0.819001386962552, |
|
"eval_runtime": 22.8695, |
|
"eval_samples_per_second": 126.107, |
|
"eval_steps_per_second": 15.785, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.8769447803497314, |
|
"learning_rate": 7.085797357089247e-05, |
|
"loss": 0.4766, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8252427184466019, |
|
"eval_f1": 0.83131443421793, |
|
"eval_loss": 0.5736179351806641, |
|
"eval_precision": 0.850771343089076, |
|
"eval_recall": 0.8252427184466019, |
|
"eval_runtime": 22.5348, |
|
"eval_samples_per_second": 127.98, |
|
"eval_steps_per_second": 16.02, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.6918177604675293, |
|
"learning_rate": 6.46557084486047e-05, |
|
"loss": 0.384, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8356449375866851, |
|
"eval_f1": 0.8371244710047189, |
|
"eval_loss": 0.5305333137512207, |
|
"eval_precision": 0.8414602896223483, |
|
"eval_recall": 0.8356449375866851, |
|
"eval_runtime": 22.1675, |
|
"eval_samples_per_second": 130.1, |
|
"eval_steps_per_second": 16.285, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.413918495178223, |
|
"learning_rate": 5.8195220793532045e-05, |
|
"loss": 0.37, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8314840499306518, |
|
"eval_f1": 0.8379252493969135, |
|
"eval_loss": 0.5530928373336792, |
|
"eval_precision": 0.8498567684865045, |
|
"eval_recall": 0.8314840499306518, |
|
"eval_runtime": 22.2316, |
|
"eval_samples_per_second": 129.725, |
|
"eval_steps_per_second": 16.238, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 8.914341926574707, |
|
"learning_rate": 5.15903395270923e-05, |
|
"loss": 0.2809, |
|
"step": 5457 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8637309292649098, |
|
"eval_f1": 0.860814136748839, |
|
"eval_loss": 0.5173911452293396, |
|
"eval_precision": 0.8629791065379673, |
|
"eval_recall": 0.8637309292649098, |
|
"eval_runtime": 22.2757, |
|
"eval_samples_per_second": 129.468, |
|
"eval_steps_per_second": 16.206, |
|
"step": 5457 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 18.052043914794922, |
|
"learning_rate": 4.495743767726598e-05, |
|
"loss": 0.2681, |
|
"step": 5778 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8477808599167822, |
|
"eval_f1": 0.8504360089943235, |
|
"eval_loss": 0.5555988550186157, |
|
"eval_precision": 0.8554685811338774, |
|
"eval_recall": 0.8477808599167822, |
|
"eval_runtime": 22.3572, |
|
"eval_samples_per_second": 128.997, |
|
"eval_steps_per_second": 16.147, |
|
"step": 5778 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.04609627276659012, |
|
"learning_rate": 3.841338197358591e-05, |
|
"loss": 0.2139, |
|
"step": 6099 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8255894590846047, |
|
"eval_f1": 0.8335016397329735, |
|
"eval_loss": 0.6290740966796875, |
|
"eval_precision": 0.8517909910468258, |
|
"eval_recall": 0.8255894590846047, |
|
"eval_runtime": 22.1886, |
|
"eval_samples_per_second": 129.977, |
|
"eval_steps_per_second": 16.27, |
|
"step": 6099 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"step": 6099, |
|
"total_flos": 7.550537882380222e+18, |
|
"train_loss": 0.713874793126947, |
|
"train_runtime": 2273.3603, |
|
"train_samples_per_second": 225.569, |
|
"train_steps_per_second": 14.12 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 7.550537882380222e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|