|
{ |
|
"best_metric": 0.49241259694099426, |
|
"best_model_checkpoint": "./vit-dropout-0.4/checkpoint-2889", |
|
"epoch": 19.0, |
|
"eval_steps": 500, |
|
"global_step": 6099, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 31.014915466308594, |
|
"learning_rate": 2.6004922067268256e-05, |
|
"loss": 1.6459, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7291955617198336, |
|
"eval_f1": 0.6910099536320229, |
|
"eval_loss": 0.8160464763641357, |
|
"eval_precision": 0.7119244517951253, |
|
"eval_recall": 0.7291955617198336, |
|
"eval_runtime": 23.8108, |
|
"eval_samples_per_second": 121.122, |
|
"eval_steps_per_second": 15.161, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 51.539710998535156, |
|
"learning_rate": 5.233798195242002e-05, |
|
"loss": 1.0644, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7513869625520111, |
|
"eval_f1": 0.7575064499227677, |
|
"eval_loss": 0.6323224306106567, |
|
"eval_precision": 0.7687292978490674, |
|
"eval_recall": 0.7513869625520111, |
|
"eval_runtime": 22.9246, |
|
"eval_samples_per_second": 125.804, |
|
"eval_steps_per_second": 15.747, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.284310340881348, |
|
"learning_rate": 7.867104183757178e-05, |
|
"loss": 1.0213, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7635228848821082, |
|
"eval_f1": 0.7421686424281113, |
|
"eval_loss": 0.6378183960914612, |
|
"eval_precision": 0.771790199319593, |
|
"eval_recall": 0.7635228848821082, |
|
"eval_runtime": 23.1763, |
|
"eval_samples_per_second": 124.438, |
|
"eval_steps_per_second": 15.576, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 26.305831909179688, |
|
"learning_rate": 9.99845966779335e-05, |
|
"loss": 0.9727, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7437586685159501, |
|
"eval_f1": 0.7593875900140131, |
|
"eval_loss": 0.604518473148346, |
|
"eval_precision": 0.7972091673281284, |
|
"eval_recall": 0.7437586685159501, |
|
"eval_runtime": 23.2491, |
|
"eval_samples_per_second": 124.048, |
|
"eval_steps_per_second": 15.527, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.570508003234863, |
|
"learning_rate": 9.93971225198763e-05, |
|
"loss": 0.9062, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7902219140083218, |
|
"eval_f1": 0.7801714004286384, |
|
"eval_loss": 0.5953397750854492, |
|
"eval_precision": 0.7964554700693868, |
|
"eval_recall": 0.7902219140083218, |
|
"eval_runtime": 23.2969, |
|
"eval_samples_per_second": 123.793, |
|
"eval_steps_per_second": 15.496, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 14.241063117980957, |
|
"learning_rate": 9.796799913911281e-05, |
|
"loss": 0.8719, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7742718446601942, |
|
"eval_f1": 0.783910785094317, |
|
"eval_loss": 0.6095318794250488, |
|
"eval_precision": 0.8083582615215125, |
|
"eval_recall": 0.7742718446601942, |
|
"eval_runtime": 23.8601, |
|
"eval_samples_per_second": 120.871, |
|
"eval_steps_per_second": 15.13, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 7.0408935546875, |
|
"learning_rate": 9.572157654878572e-05, |
|
"loss": 0.7537, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.763869625520111, |
|
"eval_f1": 0.7777621987888312, |
|
"eval_loss": 0.5970358848571777, |
|
"eval_precision": 0.8124970718532031, |
|
"eval_recall": 0.763869625520111, |
|
"eval_runtime": 23.585, |
|
"eval_samples_per_second": 122.281, |
|
"eval_steps_per_second": 15.306, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.94745135307312, |
|
"learning_rate": 9.26961302542397e-05, |
|
"loss": 0.677, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7073509015256588, |
|
"eval_f1": 0.7301082277170373, |
|
"eval_loss": 0.7107765078544617, |
|
"eval_precision": 0.8147970005525182, |
|
"eval_recall": 0.7073509015256588, |
|
"eval_runtime": 22.7748, |
|
"eval_samples_per_second": 126.631, |
|
"eval_steps_per_second": 15.851, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 11.812759399414062, |
|
"learning_rate": 8.89432090986511e-05, |
|
"loss": 0.6638, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8290568654646324, |
|
"eval_f1": 0.8244010809843568, |
|
"eval_loss": 0.49241259694099426, |
|
"eval_precision": 0.8249534082413604, |
|
"eval_recall": 0.8290568654646324, |
|
"eval_runtime": 23.6529, |
|
"eval_samples_per_second": 121.93, |
|
"eval_steps_per_second": 15.262, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 8.607577323913574, |
|
"learning_rate": 8.45267569518721e-05, |
|
"loss": 0.5787, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8162274618585298, |
|
"eval_f1": 0.82222451204188, |
|
"eval_loss": 0.54153972864151, |
|
"eval_precision": 0.8405647026780846, |
|
"eval_recall": 0.8162274618585298, |
|
"eval_runtime": 23.1538, |
|
"eval_samples_per_second": 124.559, |
|
"eval_steps_per_second": 15.591, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 22.480600357055664, |
|
"learning_rate": 7.952202320752798e-05, |
|
"loss": 0.5373, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8103328710124826, |
|
"eval_f1": 0.8188747354879483, |
|
"eval_loss": 0.5297914147377014, |
|
"eval_precision": 0.8409211427101868, |
|
"eval_recall": 0.8103328710124826, |
|
"eval_runtime": 23.2794, |
|
"eval_samples_per_second": 123.886, |
|
"eval_steps_per_second": 15.507, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 37.149688720703125, |
|
"learning_rate": 7.401428065178325e-05, |
|
"loss": 0.4923, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8117198335644937, |
|
"eval_f1": 0.8212533704879323, |
|
"eval_loss": 0.5427994728088379, |
|
"eval_precision": 0.8444195978517209, |
|
"eval_recall": 0.8117198335644937, |
|
"eval_runtime": 23.1273, |
|
"eval_samples_per_second": 124.701, |
|
"eval_steps_per_second": 15.609, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 7.792755126953125, |
|
"learning_rate": 6.80973725492743e-05, |
|
"loss": 0.3798, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8498613037447988, |
|
"eval_f1": 0.8466618158413786, |
|
"eval_loss": 0.49677935242652893, |
|
"eval_precision": 0.8470322647118588, |
|
"eval_recall": 0.8498613037447988, |
|
"eval_runtime": 22.6968, |
|
"eval_samples_per_second": 127.066, |
|
"eval_steps_per_second": 15.905, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 12.735553741455078, |
|
"learning_rate": 6.187211370157784e-05, |
|
"loss": 0.3912, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8443134535367545, |
|
"eval_f1": 0.8460191446346724, |
|
"eval_loss": 0.5338780879974365, |
|
"eval_precision": 0.8530926029478479, |
|
"eval_recall": 0.8443134535367545, |
|
"eval_runtime": 22.4911, |
|
"eval_samples_per_second": 128.229, |
|
"eval_steps_per_second": 16.051, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.7020573616027832, |
|
"learning_rate": 5.544457272166217e-05, |
|
"loss": 0.3002, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.84500693481276, |
|
"eval_f1": 0.8481459887384893, |
|
"eval_loss": 0.521908164024353, |
|
"eval_precision": 0.8547788879749323, |
|
"eval_recall": 0.84500693481276, |
|
"eval_runtime": 22.516, |
|
"eval_samples_per_second": 128.087, |
|
"eval_steps_per_second": 16.033, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 10.823068618774414, |
|
"learning_rate": 4.894460661440583e-05, |
|
"loss": 0.2744, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8203883495145631, |
|
"eval_f1": 0.827968123001256, |
|
"eval_loss": 0.636923611164093, |
|
"eval_precision": 0.8481543275037586, |
|
"eval_recall": 0.8203883495145631, |
|
"eval_runtime": 22.8235, |
|
"eval_samples_per_second": 126.361, |
|
"eval_steps_per_second": 15.817, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.4310046434402466, |
|
"learning_rate": 4.244239774409037e-05, |
|
"loss": 0.2251, |
|
"step": 5457 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8555949794732268, |
|
"eval_loss": 0.515616238117218, |
|
"eval_precision": 0.8560689662280283, |
|
"eval_recall": 0.8571428571428571, |
|
"eval_runtime": 22.9434, |
|
"eval_samples_per_second": 125.701, |
|
"eval_steps_per_second": 15.734, |
|
"step": 5457 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 13.193678855895996, |
|
"learning_rate": 3.606895852147351e-05, |
|
"loss": 0.2187, |
|
"step": 5778 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8457004160887656, |
|
"eval_f1": 0.849063653899852, |
|
"eval_loss": 0.5825002789497375, |
|
"eval_precision": 0.8549951611862869, |
|
"eval_recall": 0.8457004160887656, |
|
"eval_runtime": 23.1083, |
|
"eval_samples_per_second": 124.804, |
|
"eval_steps_per_second": 15.622, |
|
"step": 5778 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.2162574678659439, |
|
"learning_rate": 2.9932882319894417e-05, |
|
"loss": 0.1767, |
|
"step": 6099 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.852635228848821, |
|
"eval_f1": 0.8550909208177824, |
|
"eval_loss": 0.5693491697311401, |
|
"eval_precision": 0.8604835069555719, |
|
"eval_recall": 0.852635228848821, |
|
"eval_runtime": 22.8714, |
|
"eval_samples_per_second": 126.097, |
|
"eval_steps_per_second": 15.784, |
|
"step": 6099 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"step": 6099, |
|
"total_flos": 7.550537882380222e+18, |
|
"train_loss": 0.6395374234923966, |
|
"train_runtime": 2241.3643, |
|
"train_samples_per_second": 228.789, |
|
"train_steps_per_second": 14.322 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 7.550537882380222e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|