vit-beta2-0.99 / trainer_state.json
sharren's picture
🍻 cheers
067b341 verified
raw
history blame contribute delete
No virus
15.5 kB
{
"best_metric": 0.5248246192932129,
"best_model_checkpoint": "./vit-beta2-0.99/checkpoint-5778",
"epoch": 28.0,
"eval_steps": 500,
"global_step": 8988,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 27.36143684387207,
"learning_rate": 1.8291979226774382e-05,
"loss": 1.7217,
"step": 321
},
{
"epoch": 1.0,
"eval_accuracy": 0.7035367545076283,
"eval_f1": 0.6527006206875058,
"eval_loss": 1.0190271139144897,
"eval_precision": 0.6635371340134015,
"eval_recall": 0.7035367545076283,
"eval_runtime": 22.7678,
"eval_samples_per_second": 126.67,
"eval_steps_per_second": 15.856,
"step": 321
},
{
"epoch": 2.0,
"grad_norm": 32.75101089477539,
"learning_rate": 3.681477207155222e-05,
"loss": 1.1622,
"step": 642
},
{
"epoch": 2.0,
"eval_accuracy": 0.705617198335645,
"eval_f1": 0.7191659456414141,
"eval_loss": 0.7385604977607727,
"eval_precision": 0.7575809849286596,
"eval_recall": 0.705617198335645,
"eval_runtime": 23.0256,
"eval_samples_per_second": 125.252,
"eval_steps_per_second": 15.678,
"step": 642
},
{
"epoch": 3.0,
"grad_norm": 7.780852317810059,
"learning_rate": 5.5337564916330066e-05,
"loss": 1.0368,
"step": 963
},
{
"epoch": 3.0,
"eval_accuracy": 0.7517337031900139,
"eval_f1": 0.7214294760102591,
"eval_loss": 0.655035138130188,
"eval_precision": 0.7563510144231254,
"eval_recall": 0.7517337031900139,
"eval_runtime": 22.8211,
"eval_samples_per_second": 126.374,
"eval_steps_per_second": 15.819,
"step": 963
},
{
"epoch": 4.0,
"grad_norm": 30.053781509399414,
"learning_rate": 7.386035776110792e-05,
"loss": 0.9653,
"step": 1284
},
{
"epoch": 4.0,
"eval_accuracy": 0.7843273231622746,
"eval_f1": 0.7863050211732793,
"eval_loss": 0.5640625953674316,
"eval_precision": 0.7948170600442105,
"eval_recall": 0.7843273231622746,
"eval_runtime": 22.4916,
"eval_samples_per_second": 128.226,
"eval_steps_per_second": 16.05,
"step": 1284
},
{
"epoch": 5.0,
"grad_norm": 10.72180461883545,
"learning_rate": 9.238315060588575e-05,
"loss": 0.9272,
"step": 1605
},
{
"epoch": 5.0,
"eval_accuracy": 0.6768377253814147,
"eval_f1": 0.703514416314774,
"eval_loss": 0.7956904172897339,
"eval_precision": 0.7959127541379581,
"eval_recall": 0.6768377253814147,
"eval_runtime": 22.4157,
"eval_samples_per_second": 128.66,
"eval_steps_per_second": 16.105,
"step": 1605
},
{
"epoch": 6.0,
"grad_norm": 9.665838241577148,
"learning_rate": 9.984715255878176e-05,
"loss": 0.9878,
"step": 1926
},
{
"epoch": 6.0,
"eval_accuracy": 0.7871012482662968,
"eval_f1": 0.7903728309886872,
"eval_loss": 0.580937922000885,
"eval_precision": 0.8061861183645468,
"eval_recall": 0.7871012482662968,
"eval_runtime": 22.8489,
"eval_samples_per_second": 126.221,
"eval_steps_per_second": 15.799,
"step": 1926
},
{
"epoch": 7.0,
"grad_norm": 7.413944244384766,
"learning_rate": 9.889061131437471e-05,
"loss": 0.872,
"step": 2247
},
{
"epoch": 7.0,
"eval_accuracy": 0.7215672676837726,
"eval_f1": 0.7441866963098906,
"eval_loss": 0.681545078754425,
"eval_precision": 0.8081398492651465,
"eval_recall": 0.7215672676837726,
"eval_runtime": 22.5223,
"eval_samples_per_second": 128.051,
"eval_steps_per_second": 16.029,
"step": 2247
},
{
"epoch": 8.0,
"grad_norm": 3.1963610649108887,
"learning_rate": 9.707265436104638e-05,
"loss": 0.7998,
"step": 2568
},
{
"epoch": 8.0,
"eval_accuracy": 0.7558945908460472,
"eval_f1": 0.7722745826002211,
"eval_loss": 0.6104105114936829,
"eval_precision": 0.8143413614427701,
"eval_recall": 0.7558945908460472,
"eval_runtime": 22.1997,
"eval_samples_per_second": 129.912,
"eval_steps_per_second": 16.261,
"step": 2568
},
{
"epoch": 9.0,
"grad_norm": 5.708278656005859,
"learning_rate": 9.44253127296151e-05,
"loss": 0.733,
"step": 2889
},
{
"epoch": 9.0,
"eval_accuracy": 0.8148404993065187,
"eval_f1": 0.817195928987888,
"eval_loss": 0.5296399593353271,
"eval_precision": 0.8254366092776788,
"eval_recall": 0.8148404993065187,
"eval_runtime": 22.2658,
"eval_samples_per_second": 129.526,
"eval_steps_per_second": 16.213,
"step": 2889
},
{
"epoch": 10.0,
"grad_norm": 8.459081649780273,
"learning_rate": 9.099523058358976e-05,
"loss": 0.6957,
"step": 3210
},
{
"epoch": 10.0,
"eval_accuracy": 0.7995839112343966,
"eval_f1": 0.8052016823215509,
"eval_loss": 0.579708456993103,
"eval_precision": 0.8322303167903897,
"eval_recall": 0.7995839112343966,
"eval_runtime": 22.874,
"eval_samples_per_second": 126.082,
"eval_steps_per_second": 15.782,
"step": 3210
},
{
"epoch": 11.0,
"grad_norm": 23.16131591796875,
"learning_rate": 8.684284338417735e-05,
"loss": 0.6271,
"step": 3531
},
{
"epoch": 11.0,
"eval_accuracy": 0.7933425797503467,
"eval_f1": 0.8057886771155975,
"eval_loss": 0.5925618410110474,
"eval_precision": 0.8342721256554735,
"eval_recall": 0.7933425797503467,
"eval_runtime": 22.4086,
"eval_samples_per_second": 128.7,
"eval_steps_per_second": 16.11,
"step": 3531
},
{
"epoch": 12.0,
"grad_norm": 13.86120319366455,
"learning_rate": 8.204131306302357e-05,
"loss": 0.5614,
"step": 3852
},
{
"epoch": 12.0,
"eval_accuracy": 0.7919556171983356,
"eval_f1": 0.8060480658501534,
"eval_loss": 0.58785080909729,
"eval_precision": 0.8384030380760077,
"eval_recall": 0.7919556171983356,
"eval_runtime": 22.7711,
"eval_samples_per_second": 126.652,
"eval_steps_per_second": 15.853,
"step": 3852
},
{
"epoch": 13.0,
"grad_norm": 14.25642204284668,
"learning_rate": 7.667523896413962e-05,
"loss": 0.4576,
"step": 4173
},
{
"epoch": 13.0,
"eval_accuracy": 0.8138002773925104,
"eval_f1": 0.802756356097153,
"eval_loss": 0.6664562225341797,
"eval_precision": 0.8312195418319966,
"eval_recall": 0.8138002773925104,
"eval_runtime": 22.7626,
"eval_samples_per_second": 126.699,
"eval_steps_per_second": 15.859,
"step": 4173
},
{
"epoch": 14.0,
"grad_norm": 2.4907939434051514,
"learning_rate": 7.083916726724684e-05,
"loss": 0.4645,
"step": 4494
},
{
"epoch": 14.0,
"eval_accuracy": 0.8294036061026352,
"eval_f1": 0.8329373166180268,
"eval_loss": 0.5514557957649231,
"eval_precision": 0.8470000675264909,
"eval_recall": 0.8294036061026352,
"eval_runtime": 22.6903,
"eval_samples_per_second": 127.103,
"eval_steps_per_second": 15.91,
"step": 4494
},
{
"epoch": 15.0,
"grad_norm": 6.062983512878418,
"learning_rate": 6.463592515537568e-05,
"loss": 0.3913,
"step": 4815
},
{
"epoch": 15.0,
"eval_accuracy": 0.8224687933425797,
"eval_f1": 0.8288240804348163,
"eval_loss": 0.5473943948745728,
"eval_precision": 0.8465516026472886,
"eval_recall": 0.8224687933425797,
"eval_runtime": 22.9153,
"eval_samples_per_second": 125.855,
"eval_steps_per_second": 15.754,
"step": 4815
},
{
"epoch": 16.0,
"grad_norm": 8.832720756530762,
"learning_rate": 5.8174809077430184e-05,
"loss": 0.3693,
"step": 5136
},
{
"epoch": 16.0,
"eval_accuracy": 0.823509015256588,
"eval_f1": 0.8308490392951902,
"eval_loss": 0.5769267082214355,
"eval_precision": 0.8463977879369676,
"eval_recall": 0.823509015256588,
"eval_runtime": 22.4364,
"eval_samples_per_second": 128.541,
"eval_steps_per_second": 16.09,
"step": 5136
},
{
"epoch": 17.0,
"grad_norm": 3.554053783416748,
"learning_rate": 5.156965902716534e-05,
"loss": 0.2794,
"step": 5457
},
{
"epoch": 17.0,
"eval_accuracy": 0.8509015256588072,
"eval_f1": 0.851625286103631,
"eval_loss": 0.5327965617179871,
"eval_precision": 0.8571269483137886,
"eval_recall": 0.8509015256588072,
"eval_runtime": 22.5058,
"eval_samples_per_second": 128.145,
"eval_steps_per_second": 16.04,
"step": 5457
},
{
"epoch": 18.0,
"grad_norm": 12.536526679992676,
"learning_rate": 4.493685276832998e-05,
"loss": 0.2677,
"step": 5778
},
{
"epoch": 18.0,
"eval_accuracy": 0.8623439667128987,
"eval_f1": 0.8596257980493167,
"eval_loss": 0.5248246192932129,
"eval_precision": 0.8583558881059454,
"eval_recall": 0.8623439667128987,
"eval_runtime": 23.0138,
"eval_samples_per_second": 125.316,
"eval_steps_per_second": 15.686,
"step": 5778
},
{
"epoch": 19.0,
"grad_norm": 0.8805328011512756,
"learning_rate": 3.839325534621579e-05,
"loss": 0.2104,
"step": 6099
},
{
"epoch": 19.0,
"eval_accuracy": 0.8432732316227461,
"eval_f1": 0.8473236873027821,
"eval_loss": 0.6283748745918274,
"eval_precision": 0.8572165883311328,
"eval_recall": 0.8432732316227461,
"eval_runtime": 22.0808,
"eval_samples_per_second": 130.611,
"eval_steps_per_second": 16.349,
"step": 6099
},
{
"epoch": 20.0,
"grad_norm": 0.04168611392378807,
"learning_rate": 3.2073473743477955e-05,
"loss": 0.2459,
"step": 6420
},
{
"epoch": 20.0,
"eval_accuracy": 0.8543689320388349,
"eval_f1": 0.8554941368452752,
"eval_loss": 0.6137195229530334,
"eval_precision": 0.8595513589850039,
"eval_recall": 0.8543689320388349,
"eval_runtime": 22.5224,
"eval_samples_per_second": 128.05,
"eval_steps_per_second": 16.028,
"step": 6420
},
{
"epoch": 21.0,
"grad_norm": 3.362337350845337,
"learning_rate": 2.604941738980618e-05,
"loss": 0.1769,
"step": 6741
},
{
"epoch": 21.0,
"eval_accuracy": 0.8637309292649098,
"eval_f1": 0.8566147037629545,
"eval_loss": 0.5959635376930237,
"eval_precision": 0.8572769111277326,
"eval_recall": 0.8637309292649098,
"eval_runtime": 22.6796,
"eval_samples_per_second": 127.162,
"eval_steps_per_second": 15.917,
"step": 6741
},
{
"epoch": 22.0,
"grad_norm": 32.62522506713867,
"learning_rate": 2.0447352243517255e-05,
"loss": 0.1294,
"step": 7062
},
{
"epoch": 22.0,
"eval_accuracy": 0.8699722607489597,
"eval_f1": 0.868659902198456,
"eval_loss": 0.5843542814254761,
"eval_precision": 0.8687454531579298,
"eval_recall": 0.8699722607489597,
"eval_runtime": 22.7408,
"eval_samples_per_second": 126.82,
"eval_steps_per_second": 15.875,
"step": 7062
},
{
"epoch": 23.0,
"grad_norm": 4.362905502319336,
"learning_rate": 1.536598246865575e-05,
"loss": 0.1597,
"step": 7383
},
{
"epoch": 23.0,
"eval_accuracy": 0.866504854368932,
"eval_f1": 0.8589306843328246,
"eval_loss": 0.6580309867858887,
"eval_precision": 0.8603887819342586,
"eval_recall": 0.866504854368932,
"eval_runtime": 22.8065,
"eval_samples_per_second": 126.455,
"eval_steps_per_second": 15.829,
"step": 7383
},
{
"epoch": 24.0,
"grad_norm": 1.3405486345291138,
"learning_rate": 1.0894837969414489e-05,
"loss": 0.1227,
"step": 7704
},
{
"epoch": 24.0,
"eval_accuracy": 0.8730929264909847,
"eval_f1": 0.8712364081669894,
"eval_loss": 0.6225888729095459,
"eval_precision": 0.8720005451749013,
"eval_recall": 0.8730929264909847,
"eval_runtime": 22.4336,
"eval_samples_per_second": 128.557,
"eval_steps_per_second": 16.092,
"step": 7704
},
{
"epoch": 25.0,
"grad_norm": 0.08982106298208237,
"learning_rate": 7.112696940726155e-06,
"loss": 0.1054,
"step": 8025
},
{
"epoch": 25.0,
"eval_accuracy": 0.8751733703190014,
"eval_f1": 0.8720900906443607,
"eval_loss": 0.6197877526283264,
"eval_precision": 0.8727615417504975,
"eval_recall": 0.8751733703190014,
"eval_runtime": 22.3517,
"eval_samples_per_second": 129.028,
"eval_steps_per_second": 16.151,
"step": 8025
},
{
"epoch": 26.0,
"grad_norm": 16.338762283325195,
"learning_rate": 4.094394131694684e-06,
"loss": 0.0945,
"step": 8346
},
{
"epoch": 26.0,
"eval_accuracy": 0.8793342579750347,
"eval_f1": 0.8764130000059128,
"eval_loss": 0.6049804091453552,
"eval_precision": 0.8757375120219936,
"eval_recall": 0.8793342579750347,
"eval_runtime": 22.9615,
"eval_samples_per_second": 125.601,
"eval_steps_per_second": 15.722,
"step": 8346
},
{
"epoch": 27.0,
"grad_norm": 0.07402696460485458,
"learning_rate": 1.8742732027931087e-06,
"loss": 0.1242,
"step": 8667
},
{
"epoch": 27.0,
"eval_accuracy": 0.8828016643550625,
"eval_f1": 0.8797538930980352,
"eval_loss": 0.6077802181243896,
"eval_precision": 0.8788497875866979,
"eval_recall": 0.8828016643550625,
"eval_runtime": 22.9003,
"eval_samples_per_second": 125.937,
"eval_steps_per_second": 15.764,
"step": 8667
},
{
"epoch": 28.0,
"grad_norm": 0.06073630228638649,
"learning_rate": 5.020912943263345e-07,
"loss": 0.0819,
"step": 8988
},
{
"epoch": 28.0,
"eval_accuracy": 0.8796809986130375,
"eval_f1": 0.8755693874564652,
"eval_loss": 0.6189650893211365,
"eval_precision": 0.8748178599277232,
"eval_recall": 0.8796809986130375,
"eval_runtime": 23.1664,
"eval_samples_per_second": 124.49,
"eval_steps_per_second": 15.583,
"step": 8988
},
{
"epoch": 28.0,
"step": 8988,
"total_flos": 1.1127108458244538e+19,
"train_loss": 0.5275238134301817,
"train_runtime": 3329.2548,
"train_samples_per_second": 154.028,
"train_steps_per_second": 9.642
}
],
"logging_steps": 500,
"max_steps": 32100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 1.1127108458244538e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}