vit-dropout-0.4 / trainer_state.json
sharren's picture
🍻 cheers
36fc753 verified
raw
history blame contribute delete
No virus
10.8 kB
{
"best_metric": 0.49241259694099426,
"best_model_checkpoint": "./vit-dropout-0.4/checkpoint-2889",
"epoch": 19.0,
"eval_steps": 500,
"global_step": 6099,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 31.014915466308594,
"learning_rate": 2.6004922067268256e-05,
"loss": 1.6459,
"step": 321
},
{
"epoch": 1.0,
"eval_accuracy": 0.7291955617198336,
"eval_f1": 0.6910099536320229,
"eval_loss": 0.8160464763641357,
"eval_precision": 0.7119244517951253,
"eval_recall": 0.7291955617198336,
"eval_runtime": 23.8108,
"eval_samples_per_second": 121.122,
"eval_steps_per_second": 15.161,
"step": 321
},
{
"epoch": 2.0,
"grad_norm": 51.539710998535156,
"learning_rate": 5.233798195242002e-05,
"loss": 1.0644,
"step": 642
},
{
"epoch": 2.0,
"eval_accuracy": 0.7513869625520111,
"eval_f1": 0.7575064499227677,
"eval_loss": 0.6323224306106567,
"eval_precision": 0.7687292978490674,
"eval_recall": 0.7513869625520111,
"eval_runtime": 22.9246,
"eval_samples_per_second": 125.804,
"eval_steps_per_second": 15.747,
"step": 642
},
{
"epoch": 3.0,
"grad_norm": 6.284310340881348,
"learning_rate": 7.867104183757178e-05,
"loss": 1.0213,
"step": 963
},
{
"epoch": 3.0,
"eval_accuracy": 0.7635228848821082,
"eval_f1": 0.7421686424281113,
"eval_loss": 0.6378183960914612,
"eval_precision": 0.771790199319593,
"eval_recall": 0.7635228848821082,
"eval_runtime": 23.1763,
"eval_samples_per_second": 124.438,
"eval_steps_per_second": 15.576,
"step": 963
},
{
"epoch": 4.0,
"grad_norm": 26.305831909179688,
"learning_rate": 9.99845966779335e-05,
"loss": 0.9727,
"step": 1284
},
{
"epoch": 4.0,
"eval_accuracy": 0.7437586685159501,
"eval_f1": 0.7593875900140131,
"eval_loss": 0.604518473148346,
"eval_precision": 0.7972091673281284,
"eval_recall": 0.7437586685159501,
"eval_runtime": 23.2491,
"eval_samples_per_second": 124.048,
"eval_steps_per_second": 15.527,
"step": 1284
},
{
"epoch": 5.0,
"grad_norm": 7.570508003234863,
"learning_rate": 9.93971225198763e-05,
"loss": 0.9062,
"step": 1605
},
{
"epoch": 5.0,
"eval_accuracy": 0.7902219140083218,
"eval_f1": 0.7801714004286384,
"eval_loss": 0.5953397750854492,
"eval_precision": 0.7964554700693868,
"eval_recall": 0.7902219140083218,
"eval_runtime": 23.2969,
"eval_samples_per_second": 123.793,
"eval_steps_per_second": 15.496,
"step": 1605
},
{
"epoch": 6.0,
"grad_norm": 14.241063117980957,
"learning_rate": 9.796799913911281e-05,
"loss": 0.8719,
"step": 1926
},
{
"epoch": 6.0,
"eval_accuracy": 0.7742718446601942,
"eval_f1": 0.783910785094317,
"eval_loss": 0.6095318794250488,
"eval_precision": 0.8083582615215125,
"eval_recall": 0.7742718446601942,
"eval_runtime": 23.8601,
"eval_samples_per_second": 120.871,
"eval_steps_per_second": 15.13,
"step": 1926
},
{
"epoch": 7.0,
"grad_norm": 7.0408935546875,
"learning_rate": 9.572157654878572e-05,
"loss": 0.7537,
"step": 2247
},
{
"epoch": 7.0,
"eval_accuracy": 0.763869625520111,
"eval_f1": 0.7777621987888312,
"eval_loss": 0.5970358848571777,
"eval_precision": 0.8124970718532031,
"eval_recall": 0.763869625520111,
"eval_runtime": 23.585,
"eval_samples_per_second": 122.281,
"eval_steps_per_second": 15.306,
"step": 2247
},
{
"epoch": 8.0,
"grad_norm": 2.94745135307312,
"learning_rate": 9.26961302542397e-05,
"loss": 0.677,
"step": 2568
},
{
"epoch": 8.0,
"eval_accuracy": 0.7073509015256588,
"eval_f1": 0.7301082277170373,
"eval_loss": 0.7107765078544617,
"eval_precision": 0.8147970005525182,
"eval_recall": 0.7073509015256588,
"eval_runtime": 22.7748,
"eval_samples_per_second": 126.631,
"eval_steps_per_second": 15.851,
"step": 2568
},
{
"epoch": 9.0,
"grad_norm": 11.812759399414062,
"learning_rate": 8.89432090986511e-05,
"loss": 0.6638,
"step": 2889
},
{
"epoch": 9.0,
"eval_accuracy": 0.8290568654646324,
"eval_f1": 0.8244010809843568,
"eval_loss": 0.49241259694099426,
"eval_precision": 0.8249534082413604,
"eval_recall": 0.8290568654646324,
"eval_runtime": 23.6529,
"eval_samples_per_second": 121.93,
"eval_steps_per_second": 15.262,
"step": 2889
},
{
"epoch": 10.0,
"grad_norm": 8.607577323913574,
"learning_rate": 8.45267569518721e-05,
"loss": 0.5787,
"step": 3210
},
{
"epoch": 10.0,
"eval_accuracy": 0.8162274618585298,
"eval_f1": 0.82222451204188,
"eval_loss": 0.54153972864151,
"eval_precision": 0.8405647026780846,
"eval_recall": 0.8162274618585298,
"eval_runtime": 23.1538,
"eval_samples_per_second": 124.559,
"eval_steps_per_second": 15.591,
"step": 3210
},
{
"epoch": 11.0,
"grad_norm": 22.480600357055664,
"learning_rate": 7.952202320752798e-05,
"loss": 0.5373,
"step": 3531
},
{
"epoch": 11.0,
"eval_accuracy": 0.8103328710124826,
"eval_f1": 0.8188747354879483,
"eval_loss": 0.5297914147377014,
"eval_precision": 0.8409211427101868,
"eval_recall": 0.8103328710124826,
"eval_runtime": 23.2794,
"eval_samples_per_second": 123.886,
"eval_steps_per_second": 15.507,
"step": 3531
},
{
"epoch": 12.0,
"grad_norm": 37.149688720703125,
"learning_rate": 7.401428065178325e-05,
"loss": 0.4923,
"step": 3852
},
{
"epoch": 12.0,
"eval_accuracy": 0.8117198335644937,
"eval_f1": 0.8212533704879323,
"eval_loss": 0.5427994728088379,
"eval_precision": 0.8444195978517209,
"eval_recall": 0.8117198335644937,
"eval_runtime": 23.1273,
"eval_samples_per_second": 124.701,
"eval_steps_per_second": 15.609,
"step": 3852
},
{
"epoch": 13.0,
"grad_norm": 7.792755126953125,
"learning_rate": 6.80973725492743e-05,
"loss": 0.3798,
"step": 4173
},
{
"epoch": 13.0,
"eval_accuracy": 0.8498613037447988,
"eval_f1": 0.8466618158413786,
"eval_loss": 0.49677935242652893,
"eval_precision": 0.8470322647118588,
"eval_recall": 0.8498613037447988,
"eval_runtime": 22.6968,
"eval_samples_per_second": 127.066,
"eval_steps_per_second": 15.905,
"step": 4173
},
{
"epoch": 14.0,
"grad_norm": 12.735553741455078,
"learning_rate": 6.187211370157784e-05,
"loss": 0.3912,
"step": 4494
},
{
"epoch": 14.0,
"eval_accuracy": 0.8443134535367545,
"eval_f1": 0.8460191446346724,
"eval_loss": 0.5338780879974365,
"eval_precision": 0.8530926029478479,
"eval_recall": 0.8443134535367545,
"eval_runtime": 22.4911,
"eval_samples_per_second": 128.229,
"eval_steps_per_second": 16.051,
"step": 4494
},
{
"epoch": 15.0,
"grad_norm": 1.7020573616027832,
"learning_rate": 5.544457272166217e-05,
"loss": 0.3002,
"step": 4815
},
{
"epoch": 15.0,
"eval_accuracy": 0.84500693481276,
"eval_f1": 0.8481459887384893,
"eval_loss": 0.521908164024353,
"eval_precision": 0.8547788879749323,
"eval_recall": 0.84500693481276,
"eval_runtime": 22.516,
"eval_samples_per_second": 128.087,
"eval_steps_per_second": 16.033,
"step": 4815
},
{
"epoch": 16.0,
"grad_norm": 10.823068618774414,
"learning_rate": 4.894460661440583e-05,
"loss": 0.2744,
"step": 5136
},
{
"epoch": 16.0,
"eval_accuracy": 0.8203883495145631,
"eval_f1": 0.827968123001256,
"eval_loss": 0.636923611164093,
"eval_precision": 0.8481543275037586,
"eval_recall": 0.8203883495145631,
"eval_runtime": 22.8235,
"eval_samples_per_second": 126.361,
"eval_steps_per_second": 15.817,
"step": 5136
},
{
"epoch": 17.0,
"grad_norm": 1.4310046434402466,
"learning_rate": 4.244239774409037e-05,
"loss": 0.2251,
"step": 5457
},
{
"epoch": 17.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8555949794732268,
"eval_loss": 0.515616238117218,
"eval_precision": 0.8560689662280283,
"eval_recall": 0.8571428571428571,
"eval_runtime": 22.9434,
"eval_samples_per_second": 125.701,
"eval_steps_per_second": 15.734,
"step": 5457
},
{
"epoch": 18.0,
"grad_norm": 13.193678855895996,
"learning_rate": 3.606895852147351e-05,
"loss": 0.2187,
"step": 5778
},
{
"epoch": 18.0,
"eval_accuracy": 0.8457004160887656,
"eval_f1": 0.849063653899852,
"eval_loss": 0.5825002789497375,
"eval_precision": 0.8549951611862869,
"eval_recall": 0.8457004160887656,
"eval_runtime": 23.1083,
"eval_samples_per_second": 124.804,
"eval_steps_per_second": 15.622,
"step": 5778
},
{
"epoch": 19.0,
"grad_norm": 0.2162574678659439,
"learning_rate": 2.9932882319894417e-05,
"loss": 0.1767,
"step": 6099
},
{
"epoch": 19.0,
"eval_accuracy": 0.852635228848821,
"eval_f1": 0.8550909208177824,
"eval_loss": 0.5693491697311401,
"eval_precision": 0.8604835069555719,
"eval_recall": 0.852635228848821,
"eval_runtime": 22.8714,
"eval_samples_per_second": 126.097,
"eval_steps_per_second": 15.784,
"step": 6099
},
{
"epoch": 19.0,
"step": 6099,
"total_flos": 7.550537882380222e+18,
"train_loss": 0.6395374234923966,
"train_runtime": 2241.3643,
"train_samples_per_second": 228.789,
"train_steps_per_second": 14.322
}
],
"logging_steps": 500,
"max_steps": 32100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 7.550537882380222e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}