{ "best_metric": 0.49241259694099426, "best_model_checkpoint": "./vit-dropout-0.4/checkpoint-2889", "epoch": 19.0, "eval_steps": 500, "global_step": 6099, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 31.014915466308594, "learning_rate": 2.6004922067268256e-05, "loss": 1.6459, "step": 321 }, { "epoch": 1.0, "eval_accuracy": 0.7291955617198336, "eval_f1": 0.6910099536320229, "eval_loss": 0.8160464763641357, "eval_precision": 0.7119244517951253, "eval_recall": 0.7291955617198336, "eval_runtime": 23.8108, "eval_samples_per_second": 121.122, "eval_steps_per_second": 15.161, "step": 321 }, { "epoch": 2.0, "grad_norm": 51.539710998535156, "learning_rate": 5.233798195242002e-05, "loss": 1.0644, "step": 642 }, { "epoch": 2.0, "eval_accuracy": 0.7513869625520111, "eval_f1": 0.7575064499227677, "eval_loss": 0.6323224306106567, "eval_precision": 0.7687292978490674, "eval_recall": 0.7513869625520111, "eval_runtime": 22.9246, "eval_samples_per_second": 125.804, "eval_steps_per_second": 15.747, "step": 642 }, { "epoch": 3.0, "grad_norm": 6.284310340881348, "learning_rate": 7.867104183757178e-05, "loss": 1.0213, "step": 963 }, { "epoch": 3.0, "eval_accuracy": 0.7635228848821082, "eval_f1": 0.7421686424281113, "eval_loss": 0.6378183960914612, "eval_precision": 0.771790199319593, "eval_recall": 0.7635228848821082, "eval_runtime": 23.1763, "eval_samples_per_second": 124.438, "eval_steps_per_second": 15.576, "step": 963 }, { "epoch": 4.0, "grad_norm": 26.305831909179688, "learning_rate": 9.99845966779335e-05, "loss": 0.9727, "step": 1284 }, { "epoch": 4.0, "eval_accuracy": 0.7437586685159501, "eval_f1": 0.7593875900140131, "eval_loss": 0.604518473148346, "eval_precision": 0.7972091673281284, "eval_recall": 0.7437586685159501, "eval_runtime": 23.2491, "eval_samples_per_second": 124.048, "eval_steps_per_second": 15.527, "step": 1284 }, { "epoch": 5.0, "grad_norm": 7.570508003234863, "learning_rate": 9.93971225198763e-05, "loss": 0.9062, "step": 1605 }, { "epoch": 5.0, "eval_accuracy": 0.7902219140083218, "eval_f1": 0.7801714004286384, "eval_loss": 0.5953397750854492, "eval_precision": 0.7964554700693868, "eval_recall": 0.7902219140083218, "eval_runtime": 23.2969, "eval_samples_per_second": 123.793, "eval_steps_per_second": 15.496, "step": 1605 }, { "epoch": 6.0, "grad_norm": 14.241063117980957, "learning_rate": 9.796799913911281e-05, "loss": 0.8719, "step": 1926 }, { "epoch": 6.0, "eval_accuracy": 0.7742718446601942, "eval_f1": 0.783910785094317, "eval_loss": 0.6095318794250488, "eval_precision": 0.8083582615215125, "eval_recall": 0.7742718446601942, "eval_runtime": 23.8601, "eval_samples_per_second": 120.871, "eval_steps_per_second": 15.13, "step": 1926 }, { "epoch": 7.0, "grad_norm": 7.0408935546875, "learning_rate": 9.572157654878572e-05, "loss": 0.7537, "step": 2247 }, { "epoch": 7.0, "eval_accuracy": 0.763869625520111, "eval_f1": 0.7777621987888312, "eval_loss": 0.5970358848571777, "eval_precision": 0.8124970718532031, "eval_recall": 0.763869625520111, "eval_runtime": 23.585, "eval_samples_per_second": 122.281, "eval_steps_per_second": 15.306, "step": 2247 }, { "epoch": 8.0, "grad_norm": 2.94745135307312, "learning_rate": 9.26961302542397e-05, "loss": 0.677, "step": 2568 }, { "epoch": 8.0, "eval_accuracy": 0.7073509015256588, "eval_f1": 0.7301082277170373, "eval_loss": 0.7107765078544617, "eval_precision": 0.8147970005525182, "eval_recall": 0.7073509015256588, "eval_runtime": 22.7748, "eval_samples_per_second": 126.631, "eval_steps_per_second": 15.851, "step": 2568 }, { "epoch": 9.0, "grad_norm": 11.812759399414062, "learning_rate": 8.89432090986511e-05, "loss": 0.6638, "step": 2889 }, { "epoch": 9.0, "eval_accuracy": 0.8290568654646324, "eval_f1": 0.8244010809843568, "eval_loss": 0.49241259694099426, "eval_precision": 0.8249534082413604, "eval_recall": 0.8290568654646324, "eval_runtime": 23.6529, "eval_samples_per_second": 121.93, "eval_steps_per_second": 15.262, "step": 2889 }, { "epoch": 10.0, "grad_norm": 8.607577323913574, "learning_rate": 8.45267569518721e-05, "loss": 0.5787, "step": 3210 }, { "epoch": 10.0, "eval_accuracy": 0.8162274618585298, "eval_f1": 0.82222451204188, "eval_loss": 0.54153972864151, "eval_precision": 0.8405647026780846, "eval_recall": 0.8162274618585298, "eval_runtime": 23.1538, "eval_samples_per_second": 124.559, "eval_steps_per_second": 15.591, "step": 3210 }, { "epoch": 11.0, "grad_norm": 22.480600357055664, "learning_rate": 7.952202320752798e-05, "loss": 0.5373, "step": 3531 }, { "epoch": 11.0, "eval_accuracy": 0.8103328710124826, "eval_f1": 0.8188747354879483, "eval_loss": 0.5297914147377014, "eval_precision": 0.8409211427101868, "eval_recall": 0.8103328710124826, "eval_runtime": 23.2794, "eval_samples_per_second": 123.886, "eval_steps_per_second": 15.507, "step": 3531 }, { "epoch": 12.0, "grad_norm": 37.149688720703125, "learning_rate": 7.401428065178325e-05, "loss": 0.4923, "step": 3852 }, { "epoch": 12.0, "eval_accuracy": 0.8117198335644937, "eval_f1": 0.8212533704879323, "eval_loss": 0.5427994728088379, "eval_precision": 0.8444195978517209, "eval_recall": 0.8117198335644937, "eval_runtime": 23.1273, "eval_samples_per_second": 124.701, "eval_steps_per_second": 15.609, "step": 3852 }, { "epoch": 13.0, "grad_norm": 7.792755126953125, "learning_rate": 6.80973725492743e-05, "loss": 0.3798, "step": 4173 }, { "epoch": 13.0, "eval_accuracy": 0.8498613037447988, "eval_f1": 0.8466618158413786, "eval_loss": 0.49677935242652893, "eval_precision": 0.8470322647118588, "eval_recall": 0.8498613037447988, "eval_runtime": 22.6968, "eval_samples_per_second": 127.066, "eval_steps_per_second": 15.905, "step": 4173 }, { "epoch": 14.0, "grad_norm": 12.735553741455078, "learning_rate": 6.187211370157784e-05, "loss": 0.3912, "step": 4494 }, { "epoch": 14.0, "eval_accuracy": 0.8443134535367545, "eval_f1": 0.8460191446346724, "eval_loss": 0.5338780879974365, "eval_precision": 0.8530926029478479, "eval_recall": 0.8443134535367545, "eval_runtime": 22.4911, "eval_samples_per_second": 128.229, "eval_steps_per_second": 16.051, "step": 4494 }, { "epoch": 15.0, "grad_norm": 1.7020573616027832, "learning_rate": 5.544457272166217e-05, "loss": 0.3002, "step": 4815 }, { "epoch": 15.0, "eval_accuracy": 0.84500693481276, "eval_f1": 0.8481459887384893, "eval_loss": 0.521908164024353, "eval_precision": 0.8547788879749323, "eval_recall": 0.84500693481276, "eval_runtime": 22.516, "eval_samples_per_second": 128.087, "eval_steps_per_second": 16.033, "step": 4815 }, { "epoch": 16.0, "grad_norm": 10.823068618774414, "learning_rate": 4.894460661440583e-05, "loss": 0.2744, "step": 5136 }, { "epoch": 16.0, "eval_accuracy": 0.8203883495145631, "eval_f1": 0.827968123001256, "eval_loss": 0.636923611164093, "eval_precision": 0.8481543275037586, "eval_recall": 0.8203883495145631, "eval_runtime": 22.8235, "eval_samples_per_second": 126.361, "eval_steps_per_second": 15.817, "step": 5136 }, { "epoch": 17.0, "grad_norm": 1.4310046434402466, "learning_rate": 4.244239774409037e-05, "loss": 0.2251, "step": 5457 }, { "epoch": 17.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8555949794732268, "eval_loss": 0.515616238117218, "eval_precision": 0.8560689662280283, "eval_recall": 0.8571428571428571, "eval_runtime": 22.9434, "eval_samples_per_second": 125.701, "eval_steps_per_second": 15.734, "step": 5457 }, { "epoch": 18.0, "grad_norm": 13.193678855895996, "learning_rate": 3.606895852147351e-05, "loss": 0.2187, "step": 5778 }, { "epoch": 18.0, "eval_accuracy": 0.8457004160887656, "eval_f1": 0.849063653899852, "eval_loss": 0.5825002789497375, "eval_precision": 0.8549951611862869, "eval_recall": 0.8457004160887656, "eval_runtime": 23.1083, "eval_samples_per_second": 124.804, "eval_steps_per_second": 15.622, "step": 5778 }, { "epoch": 19.0, "grad_norm": 0.2162574678659439, "learning_rate": 2.9932882319894417e-05, "loss": 0.1767, "step": 6099 }, { "epoch": 19.0, "eval_accuracy": 0.852635228848821, "eval_f1": 0.8550909208177824, "eval_loss": 0.5693491697311401, "eval_precision": 0.8604835069555719, "eval_recall": 0.852635228848821, "eval_runtime": 22.8714, "eval_samples_per_second": 126.097, "eval_steps_per_second": 15.784, "step": 6099 }, { "epoch": 19.0, "step": 6099, "total_flos": 7.550537882380222e+18, "train_loss": 0.6395374234923966, "train_runtime": 2241.3643, "train_samples_per_second": 228.789, "train_steps_per_second": 14.322 } ], "logging_steps": 500, "max_steps": 32100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 7.550537882380222e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }