vit-weight-decay-1e-2 / trainer_state.json
sharren's picture
🍻 cheers
ee979e5 verified
{
"best_metric": 0.4994313716888428,
"best_model_checkpoint": "./vit-weight-decay-1e-2/checkpoint-5457",
"epoch": 27.0,
"eval_steps": 500,
"global_step": 8667,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 27.05959129333496,
"learning_rate": 2.6004922067268256e-05,
"loss": 1.7124,
"step": 321
},
{
"epoch": 1.0,
"eval_accuracy": 0.6924410540915396,
"eval_f1": 0.603022144115533,
"eval_loss": 0.8697461485862732,
"eval_precision": 0.6656411815509768,
"eval_recall": 0.6924410540915396,
"eval_runtime": 24.2005,
"eval_samples_per_second": 119.171,
"eval_steps_per_second": 14.917,
"step": 321
},
{
"epoch": 2.0,
"grad_norm": 31.123794555664062,
"learning_rate": 5.233798195242002e-05,
"loss": 1.1476,
"step": 642
},
{
"epoch": 2.0,
"eval_accuracy": 0.6990291262135923,
"eval_f1": 0.714853056600681,
"eval_loss": 0.7271208763122559,
"eval_precision": 0.7684437503161472,
"eval_recall": 0.6990291262135923,
"eval_runtime": 23.1919,
"eval_samples_per_second": 124.354,
"eval_steps_per_second": 15.566,
"step": 642
},
{
"epoch": 3.0,
"grad_norm": 4.652539253234863,
"learning_rate": 7.867104183757178e-05,
"loss": 1.0734,
"step": 963
},
{
"epoch": 3.0,
"eval_accuracy": 0.7687239944521498,
"eval_f1": 0.7417221561170182,
"eval_loss": 0.6440889239311218,
"eval_precision": 0.7568176957187778,
"eval_recall": 0.7687239944521498,
"eval_runtime": 23.9098,
"eval_samples_per_second": 120.62,
"eval_steps_per_second": 15.098,
"step": 963
},
{
"epoch": 4.0,
"grad_norm": 26.467485427856445,
"learning_rate": 9.99845966779335e-05,
"loss": 1.0271,
"step": 1284
},
{
"epoch": 4.0,
"eval_accuracy": 0.7773925104022191,
"eval_f1": 0.7814315937772335,
"eval_loss": 0.5854852199554443,
"eval_precision": 0.78834249113171,
"eval_recall": 0.7773925104022191,
"eval_runtime": 23.2363,
"eval_samples_per_second": 124.116,
"eval_steps_per_second": 15.536,
"step": 1284
},
{
"epoch": 5.0,
"grad_norm": 9.265125274658203,
"learning_rate": 9.93971225198763e-05,
"loss": 0.9158,
"step": 1605
},
{
"epoch": 5.0,
"eval_accuracy": 0.7635228848821082,
"eval_f1": 0.7661961115296833,
"eval_loss": 0.700226366519928,
"eval_precision": 0.7929662231425049,
"eval_recall": 0.7635228848821082,
"eval_runtime": 23.9812,
"eval_samples_per_second": 120.261,
"eval_steps_per_second": 15.053,
"step": 1605
},
{
"epoch": 6.0,
"grad_norm": 7.993426322937012,
"learning_rate": 9.796799913911281e-05,
"loss": 0.9167,
"step": 1926
},
{
"epoch": 6.0,
"eval_accuracy": 0.7812066574202496,
"eval_f1": 0.7900429974573581,
"eval_loss": 0.5867117047309875,
"eval_precision": 0.806466933291998,
"eval_recall": 0.7812066574202496,
"eval_runtime": 23.5885,
"eval_samples_per_second": 122.263,
"eval_steps_per_second": 15.304,
"step": 1926
},
{
"epoch": 7.0,
"grad_norm": 8.043280601501465,
"learning_rate": 9.572157654878572e-05,
"loss": 0.786,
"step": 2247
},
{
"epoch": 7.0,
"eval_accuracy": 0.7340499306518724,
"eval_f1": 0.751486114744647,
"eval_loss": 0.6516677737236023,
"eval_precision": 0.8047120902571854,
"eval_recall": 0.7340499306518724,
"eval_runtime": 24.7074,
"eval_samples_per_second": 116.726,
"eval_steps_per_second": 14.611,
"step": 2247
},
{
"epoch": 8.0,
"grad_norm": 3.0447545051574707,
"learning_rate": 9.26961302542397e-05,
"loss": 0.7406,
"step": 2568
},
{
"epoch": 8.0,
"eval_accuracy": 0.7066574202496533,
"eval_f1": 0.7330088250984578,
"eval_loss": 0.6647158265113831,
"eval_precision": 0.8133714017605812,
"eval_recall": 0.7066574202496533,
"eval_runtime": 24.3728,
"eval_samples_per_second": 118.329,
"eval_steps_per_second": 14.812,
"step": 2568
},
{
"epoch": 9.0,
"grad_norm": 3.4238717555999756,
"learning_rate": 8.89432090986511e-05,
"loss": 0.682,
"step": 2889
},
{
"epoch": 9.0,
"eval_accuracy": 0.8228155339805825,
"eval_f1": 0.8207044848679155,
"eval_loss": 0.510608434677124,
"eval_precision": 0.8230905088203688,
"eval_recall": 0.8228155339805825,
"eval_runtime": 24.4807,
"eval_samples_per_second": 117.807,
"eval_steps_per_second": 14.746,
"step": 2889
},
{
"epoch": 10.0,
"grad_norm": 11.989164352416992,
"learning_rate": 8.45267569518721e-05,
"loss": 0.6427,
"step": 3210
},
{
"epoch": 10.0,
"eval_accuracy": 0.8165742024965326,
"eval_f1": 0.8221884365803362,
"eval_loss": 0.5032415390014648,
"eval_precision": 0.835376334433007,
"eval_recall": 0.8165742024965326,
"eval_runtime": 23.2354,
"eval_samples_per_second": 124.121,
"eval_steps_per_second": 15.537,
"step": 3210
},
{
"epoch": 11.0,
"grad_norm": 34.21263885498047,
"learning_rate": 7.952202320752798e-05,
"loss": 0.5663,
"step": 3531
},
{
"epoch": 11.0,
"eval_accuracy": 0.8151872399445215,
"eval_f1": 0.8216396746544562,
"eval_loss": 0.5357819199562073,
"eval_precision": 0.8325502393616235,
"eval_recall": 0.8151872399445215,
"eval_runtime": 24.4554,
"eval_samples_per_second": 117.929,
"eval_steps_per_second": 14.762,
"step": 3531
},
{
"epoch": 12.0,
"grad_norm": 44.03895568847656,
"learning_rate": 7.401428065178325e-05,
"loss": 0.5395,
"step": 3852
},
{
"epoch": 12.0,
"eval_accuracy": 0.8248959778085991,
"eval_f1": 0.8298561156988427,
"eval_loss": 0.5487632155418396,
"eval_precision": 0.8391637957530821,
"eval_recall": 0.8248959778085991,
"eval_runtime": 23.3681,
"eval_samples_per_second": 123.416,
"eval_steps_per_second": 15.448,
"step": 3852
},
{
"epoch": 13.0,
"grad_norm": 27.093250274658203,
"learning_rate": 6.80973725492743e-05,
"loss": 0.4468,
"step": 4173
},
{
"epoch": 13.0,
"eval_accuracy": 0.8231622746185853,
"eval_f1": 0.8260339295474823,
"eval_loss": 0.578988254070282,
"eval_precision": 0.8397229815779756,
"eval_recall": 0.8231622746185853,
"eval_runtime": 24.8132,
"eval_samples_per_second": 116.229,
"eval_steps_per_second": 14.549,
"step": 4173
},
{
"epoch": 14.0,
"grad_norm": 5.494964122772217,
"learning_rate": 6.187211370157784e-05,
"loss": 0.4247,
"step": 4494
},
{
"epoch": 14.0,
"eval_accuracy": 0.8415395284327323,
"eval_f1": 0.8448790763195109,
"eval_loss": 0.5437958240509033,
"eval_precision": 0.8570099067237934,
"eval_recall": 0.8415395284327323,
"eval_runtime": 23.4933,
"eval_samples_per_second": 122.758,
"eval_steps_per_second": 15.366,
"step": 4494
},
{
"epoch": 15.0,
"grad_norm": 5.022720813751221,
"learning_rate": 5.544457272166217e-05,
"loss": 0.3495,
"step": 4815
},
{
"epoch": 15.0,
"eval_accuracy": 0.8453536754507628,
"eval_f1": 0.846725089255697,
"eval_loss": 0.5135474801063538,
"eval_precision": 0.8518606648557052,
"eval_recall": 0.8453536754507628,
"eval_runtime": 22.9349,
"eval_samples_per_second": 125.747,
"eval_steps_per_second": 15.74,
"step": 4815
},
{
"epoch": 16.0,
"grad_norm": 21.08028221130371,
"learning_rate": 4.894460661440583e-05,
"loss": 0.3039,
"step": 5136
},
{
"epoch": 16.0,
"eval_accuracy": 0.8408460471567267,
"eval_f1": 0.8447631505343497,
"eval_loss": 0.5631198287010193,
"eval_precision": 0.8520329480153485,
"eval_recall": 0.8408460471567267,
"eval_runtime": 22.9052,
"eval_samples_per_second": 125.91,
"eval_steps_per_second": 15.761,
"step": 5136
},
{
"epoch": 17.0,
"grad_norm": 0.3130456805229187,
"learning_rate": 4.244239774409037e-05,
"loss": 0.2602,
"step": 5457
},
{
"epoch": 17.0,
"eval_accuracy": 0.8602635228848821,
"eval_f1": 0.8599959330449201,
"eval_loss": 0.4994313716888428,
"eval_precision": 0.8617536928422816,
"eval_recall": 0.8602635228848821,
"eval_runtime": 24.0474,
"eval_samples_per_second": 119.93,
"eval_steps_per_second": 15.012,
"step": 5457
},
{
"epoch": 18.0,
"grad_norm": 13.50483226776123,
"learning_rate": 3.606895852147351e-05,
"loss": 0.2616,
"step": 5778
},
{
"epoch": 18.0,
"eval_accuracy": 0.8564493758668515,
"eval_f1": 0.8585397395861224,
"eval_loss": 0.5405685901641846,
"eval_precision": 0.8621724878350435,
"eval_recall": 0.8564493758668515,
"eval_runtime": 23.9163,
"eval_samples_per_second": 120.587,
"eval_steps_per_second": 15.094,
"step": 5778
},
{
"epoch": 19.0,
"grad_norm": 0.11348175257444382,
"learning_rate": 2.9932882319894417e-05,
"loss": 0.1876,
"step": 6099
},
{
"epoch": 19.0,
"eval_accuracy": 0.848127600554785,
"eval_f1": 0.8525273952669322,
"eval_loss": 0.5612274408340454,
"eval_precision": 0.8629290477513907,
"eval_recall": 0.848127600554785,
"eval_runtime": 24.2059,
"eval_samples_per_second": 119.145,
"eval_steps_per_second": 14.914,
"step": 6099
},
{
"epoch": 20.0,
"grad_norm": 1.7572256326675415,
"learning_rate": 2.4138718220394167e-05,
"loss": 0.2052,
"step": 6420
},
{
"epoch": 20.0,
"eval_accuracy": 0.8429264909847434,
"eval_f1": 0.8427565344137952,
"eval_loss": 0.6802518367767334,
"eval_precision": 0.8502279276035353,
"eval_recall": 0.8429264909847434,
"eval_runtime": 23.6557,
"eval_samples_per_second": 121.916,
"eval_steps_per_second": 15.261,
"step": 6420
},
{
"epoch": 21.0,
"grad_norm": 11.348849296569824,
"learning_rate": 1.8785189659922232e-05,
"loss": 0.1533,
"step": 6741
},
{
"epoch": 21.0,
"eval_accuracy": 0.8734396671289875,
"eval_f1": 0.870883732217841,
"eval_loss": 0.546351432800293,
"eval_precision": 0.8698420453848273,
"eval_recall": 0.8734396671289875,
"eval_runtime": 23.8505,
"eval_samples_per_second": 120.92,
"eval_steps_per_second": 15.136,
"step": 6741
},
{
"epoch": 22.0,
"grad_norm": 54.87451171875,
"learning_rate": 1.396351233934956e-05,
"loss": 0.1175,
"step": 7062
},
{
"epoch": 22.0,
"eval_accuracy": 0.8685852981969486,
"eval_f1": 0.8672514399266257,
"eval_loss": 0.5572792291641235,
"eval_precision": 0.8667361486336195,
"eval_recall": 0.8685852981969486,
"eval_runtime": 23.4628,
"eval_samples_per_second": 122.918,
"eval_steps_per_second": 15.386,
"step": 7062
},
{
"epoch": 23.0,
"grad_norm": 0.37277576327323914,
"learning_rate": 9.755840051487997e-06,
"loss": 0.1218,
"step": 7383
},
{
"epoch": 23.0,
"eval_accuracy": 0.8703190013869625,
"eval_f1": 0.8669113876377326,
"eval_loss": 0.6043308973312378,
"eval_precision": 0.8680623901339135,
"eval_recall": 0.8703190013869625,
"eval_runtime": 23.6979,
"eval_samples_per_second": 121.698,
"eval_steps_per_second": 15.233,
"step": 7383
},
{
"epoch": 24.0,
"grad_norm": 1.1696586608886719,
"learning_rate": 6.233864909760889e-06,
"loss": 0.114,
"step": 7704
},
{
"epoch": 24.0,
"eval_accuracy": 0.871012482662968,
"eval_f1": 0.8692580136075093,
"eval_loss": 0.5944604277610779,
"eval_precision": 0.8705845000186183,
"eval_recall": 0.871012482662968,
"eval_runtime": 22.9273,
"eval_samples_per_second": 125.789,
"eval_steps_per_second": 15.745,
"step": 7704
},
{
"epoch": 25.0,
"grad_norm": 0.24613961577415466,
"learning_rate": 3.457595827424931e-06,
"loss": 0.104,
"step": 8025
},
{
"epoch": 25.0,
"eval_accuracy": 0.8765603328710125,
"eval_f1": 0.8751858009450587,
"eval_loss": 0.5850355625152588,
"eval_precision": 0.8753430843268125,
"eval_recall": 0.8765603328710125,
"eval_runtime": 23.185,
"eval_samples_per_second": 124.391,
"eval_steps_per_second": 15.57,
"step": 8025
},
{
"epoch": 26.0,
"grad_norm": 15.901623725891113,
"learning_rate": 1.4743360601349622e-06,
"loss": 0.0752,
"step": 8346
},
{
"epoch": 26.0,
"eval_accuracy": 0.8782940360610264,
"eval_f1": 0.8757212451946566,
"eval_loss": 0.5867504477500916,
"eval_precision": 0.8747328684004348,
"eval_recall": 0.8782940360610264,
"eval_runtime": 24.8645,
"eval_samples_per_second": 115.989,
"eval_steps_per_second": 14.519,
"step": 8346
},
{
"epoch": 27.0,
"grad_norm": 0.1058092936873436,
"learning_rate": 3.1787723291717977e-07,
"loss": 0.1309,
"step": 8667
},
{
"epoch": 27.0,
"eval_accuracy": 0.8786407766990292,
"eval_f1": 0.876119808222944,
"eval_loss": 0.5839141011238098,
"eval_precision": 0.875311708483151,
"eval_recall": 0.8786407766990292,
"eval_runtime": 24.8375,
"eval_samples_per_second": 116.115,
"eval_steps_per_second": 14.534,
"step": 8667
},
{
"epoch": 27.0,
"step": 8667,
"total_flos": 1.0729711727592948e+19,
"train_loss": 0.5187536703752126,
"train_runtime": 3224.0652,
"train_samples_per_second": 159.054,
"train_steps_per_second": 9.956
}
],
"logging_steps": 500,
"max_steps": 32100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 1.0729711727592948e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}