videomae-base-finetuned-elderf1 / trainer_state.json
minhah's picture
End of training
cec16d1 verified
{
"best_metric": 0.34081346423562414,
"best_model_checkpoint": "videomae-base-finetuned-elderf1/checkpoint-73",
"epoch": 9.0875,
"eval_steps": 500,
"global_step": 720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 4.3531270027160645,
"learning_rate": 0.0001388888888888889,
"loss": 1.7775,
"step": 10
},
{
"epoch": 0.03,
"grad_norm": 5.537198543548584,
"learning_rate": 0.0002777777777777778,
"loss": 1.6877,
"step": 20
},
{
"epoch": 0.04,
"grad_norm": 5.151556968688965,
"learning_rate": 0.0004166666666666667,
"loss": 1.7997,
"step": 30
},
{
"epoch": 0.06,
"grad_norm": 5.214244365692139,
"learning_rate": 0.0005555555555555556,
"loss": 1.8051,
"step": 40
},
{
"epoch": 0.07,
"grad_norm": 1.6136474609375,
"learning_rate": 0.0006944444444444445,
"loss": 1.7065,
"step": 50
},
{
"epoch": 0.08,
"grad_norm": 5.423031330108643,
"learning_rate": 0.0008333333333333334,
"loss": 1.7922,
"step": 60
},
{
"epoch": 0.1,
"grad_norm": 3.5037877559661865,
"learning_rate": 0.0009722222222222222,
"loss": 1.7358,
"step": 70
},
{
"epoch": 0.1,
"eval_accuracy": 0.34081346423562414,
"eval_loss": 1.692335844039917,
"eval_runtime": 379.7283,
"eval_samples_per_second": 3.755,
"eval_steps_per_second": 0.237,
"step": 73
},
{
"epoch": 1.01,
"grad_norm": 3.8850014209747314,
"learning_rate": 0.0009876543209876543,
"loss": 1.7738,
"step": 80
},
{
"epoch": 1.02,
"grad_norm": 6.418526649475098,
"learning_rate": 0.0009722222222222222,
"loss": 1.8192,
"step": 90
},
{
"epoch": 1.04,
"grad_norm": 6.298271179199219,
"learning_rate": 0.0009567901234567902,
"loss": 1.7833,
"step": 100
},
{
"epoch": 1.05,
"grad_norm": 4.652337551116943,
"learning_rate": 0.000941358024691358,
"loss": 1.7534,
"step": 110
},
{
"epoch": 1.07,
"grad_norm": 3.369626522064209,
"learning_rate": 0.000925925925925926,
"loss": 1.7376,
"step": 120
},
{
"epoch": 1.08,
"grad_norm": 2.182107448577881,
"learning_rate": 0.0009104938271604939,
"loss": 1.6406,
"step": 130
},
{
"epoch": 1.09,
"grad_norm": 3.763148069381714,
"learning_rate": 0.0008950617283950618,
"loss": 1.7163,
"step": 140
},
{
"epoch": 1.1,
"eval_accuracy": 0.3373071528751753,
"eval_loss": 1.66623854637146,
"eval_runtime": 421.0248,
"eval_samples_per_second": 3.387,
"eval_steps_per_second": 0.214,
"step": 146
},
{
"epoch": 2.01,
"grad_norm": 4.109099864959717,
"learning_rate": 0.0008796296296296296,
"loss": 1.6734,
"step": 150
},
{
"epoch": 2.02,
"grad_norm": 3.7187507152557373,
"learning_rate": 0.0008641975308641975,
"loss": 1.7535,
"step": 160
},
{
"epoch": 2.03,
"grad_norm": 2.9596164226531982,
"learning_rate": 0.0008487654320987654,
"loss": 1.7282,
"step": 170
},
{
"epoch": 2.05,
"grad_norm": 5.126859188079834,
"learning_rate": 0.0008333333333333334,
"loss": 1.7021,
"step": 180
},
{
"epoch": 2.06,
"grad_norm": 2.36877703666687,
"learning_rate": 0.0008179012345679012,
"loss": 1.6977,
"step": 190
},
{
"epoch": 2.08,
"grad_norm": 4.42868185043335,
"learning_rate": 0.0008024691358024692,
"loss": 1.6882,
"step": 200
},
{
"epoch": 2.09,
"grad_norm": 3.4869942665100098,
"learning_rate": 0.0007870370370370372,
"loss": 1.7018,
"step": 210
},
{
"epoch": 2.1,
"eval_accuracy": 0.34081346423562414,
"eval_loss": 1.6378456354141235,
"eval_runtime": 423.5487,
"eval_samples_per_second": 3.367,
"eval_steps_per_second": 0.212,
"step": 219
},
{
"epoch": 3.0,
"grad_norm": 3.7394754886627197,
"learning_rate": 0.0007716049382716049,
"loss": 1.7356,
"step": 220
},
{
"epoch": 3.02,
"grad_norm": 1.3488916158676147,
"learning_rate": 0.0007561728395061729,
"loss": 1.7024,
"step": 230
},
{
"epoch": 3.03,
"grad_norm": 3.196521282196045,
"learning_rate": 0.0007407407407407407,
"loss": 1.7094,
"step": 240
},
{
"epoch": 3.04,
"grad_norm": 3.9984352588653564,
"learning_rate": 0.0007253086419753087,
"loss": 1.6481,
"step": 250
},
{
"epoch": 3.06,
"grad_norm": 3.6886866092681885,
"learning_rate": 0.0007098765432098766,
"loss": 1.652,
"step": 260
},
{
"epoch": 3.07,
"grad_norm": 2.245149850845337,
"learning_rate": 0.0006944444444444445,
"loss": 1.7479,
"step": 270
},
{
"epoch": 3.08,
"grad_norm": 4.62326192855835,
"learning_rate": 0.0006790123456790124,
"loss": 1.7129,
"step": 280
},
{
"epoch": 3.1,
"grad_norm": 4.474867343902588,
"learning_rate": 0.0006635802469135802,
"loss": 1.7334,
"step": 290
},
{
"epoch": 3.1,
"eval_accuracy": 0.34011220196353437,
"eval_loss": 1.6562532186508179,
"eval_runtime": 383.4418,
"eval_samples_per_second": 3.719,
"eval_steps_per_second": 0.235,
"step": 292
},
{
"epoch": 4.01,
"grad_norm": 1.4795947074890137,
"learning_rate": 0.0006481481481481481,
"loss": 1.7427,
"step": 300
},
{
"epoch": 4.03,
"grad_norm": 2.173116683959961,
"learning_rate": 0.0006327160493827161,
"loss": 1.6894,
"step": 310
},
{
"epoch": 4.04,
"grad_norm": 2.731816291809082,
"learning_rate": 0.0006172839506172839,
"loss": 1.6818,
"step": 320
},
{
"epoch": 4.05,
"grad_norm": 2.120103120803833,
"learning_rate": 0.0006018518518518519,
"loss": 1.6782,
"step": 330
},
{
"epoch": 4.07,
"grad_norm": 3.7916502952575684,
"learning_rate": 0.0005864197530864199,
"loss": 1.7388,
"step": 340
},
{
"epoch": 4.08,
"grad_norm": 2.852003812789917,
"learning_rate": 0.0005709876543209876,
"loss": 1.7422,
"step": 350
},
{
"epoch": 4.09,
"grad_norm": 6.300606727600098,
"learning_rate": 0.0005555555555555556,
"loss": 1.672,
"step": 360
},
{
"epoch": 4.1,
"eval_accuracy": 0.23983169705469845,
"eval_loss": 1.6567574739456177,
"eval_runtime": 310.9306,
"eval_samples_per_second": 4.586,
"eval_steps_per_second": 0.289,
"step": 365
},
{
"epoch": 5.01,
"grad_norm": 5.469892501831055,
"learning_rate": 0.0005401234567901234,
"loss": 1.6958,
"step": 370
},
{
"epoch": 5.02,
"grad_norm": 3.4026269912719727,
"learning_rate": 0.0005246913580246914,
"loss": 1.7383,
"step": 380
},
{
"epoch": 5.03,
"grad_norm": 2.7335870265960693,
"learning_rate": 0.0005092592592592593,
"loss": 1.6763,
"step": 390
},
{
"epoch": 5.05,
"grad_norm": 4.079995155334473,
"learning_rate": 0.0004938271604938272,
"loss": 1.693,
"step": 400
},
{
"epoch": 5.06,
"grad_norm": 2.8691930770874023,
"learning_rate": 0.0004783950617283951,
"loss": 1.7293,
"step": 410
},
{
"epoch": 5.08,
"grad_norm": 2.4010772705078125,
"learning_rate": 0.000462962962962963,
"loss": 1.6812,
"step": 420
},
{
"epoch": 5.09,
"grad_norm": 4.233634948730469,
"learning_rate": 0.0004475308641975309,
"loss": 1.7095,
"step": 430
},
{
"epoch": 5.1,
"eval_accuracy": 0.3387096774193548,
"eval_loss": 1.6313395500183105,
"eval_runtime": 306.7441,
"eval_samples_per_second": 4.649,
"eval_steps_per_second": 0.293,
"step": 438
},
{
"epoch": 6.0,
"grad_norm": 2.844386100769043,
"learning_rate": 0.00043209876543209873,
"loss": 1.6502,
"step": 440
},
{
"epoch": 6.02,
"grad_norm": 3.7761685848236084,
"learning_rate": 0.0004166666666666667,
"loss": 1.6892,
"step": 450
},
{
"epoch": 6.03,
"grad_norm": 2.8903658390045166,
"learning_rate": 0.0004012345679012346,
"loss": 1.6006,
"step": 460
},
{
"epoch": 6.04,
"grad_norm": 2.548739194869995,
"learning_rate": 0.00038580246913580245,
"loss": 1.7113,
"step": 470
},
{
"epoch": 6.06,
"grad_norm": 3.1980948448181152,
"learning_rate": 0.00037037037037037035,
"loss": 1.7259,
"step": 480
},
{
"epoch": 6.07,
"grad_norm": 3.119049549102783,
"learning_rate": 0.0003549382716049383,
"loss": 1.6634,
"step": 490
},
{
"epoch": 6.09,
"grad_norm": 3.465067148208618,
"learning_rate": 0.0003395061728395062,
"loss": 1.71,
"step": 500
},
{
"epoch": 6.1,
"grad_norm": 1.507797122001648,
"learning_rate": 0.00032407407407407406,
"loss": 1.7119,
"step": 510
},
{
"epoch": 6.1,
"eval_accuracy": 0.34081346423562414,
"eval_loss": 1.6309115886688232,
"eval_runtime": 295.3653,
"eval_samples_per_second": 4.828,
"eval_steps_per_second": 0.305,
"step": 511
},
{
"epoch": 7.01,
"grad_norm": 1.939512848854065,
"learning_rate": 0.00030864197530864197,
"loss": 1.6486,
"step": 520
},
{
"epoch": 7.03,
"grad_norm": 2.4698586463928223,
"learning_rate": 0.00029320987654320993,
"loss": 1.6754,
"step": 530
},
{
"epoch": 7.04,
"grad_norm": 1.9678858518600464,
"learning_rate": 0.0002777777777777778,
"loss": 1.6025,
"step": 540
},
{
"epoch": 7.05,
"grad_norm": 3.6356217861175537,
"learning_rate": 0.0002623456790123457,
"loss": 1.6792,
"step": 550
},
{
"epoch": 7.07,
"grad_norm": 2.781039237976074,
"learning_rate": 0.0002469135802469136,
"loss": 1.7165,
"step": 560
},
{
"epoch": 7.08,
"grad_norm": 4.021714687347412,
"learning_rate": 0.0002314814814814815,
"loss": 1.6836,
"step": 570
},
{
"epoch": 7.1,
"grad_norm": 4.392849922180176,
"learning_rate": 0.00021604938271604937,
"loss": 1.6981,
"step": 580
},
{
"epoch": 7.1,
"eval_accuracy": 0.3288920056100982,
"eval_loss": 1.6518133878707886,
"eval_runtime": 319.5699,
"eval_samples_per_second": 4.462,
"eval_steps_per_second": 0.282,
"step": 584
},
{
"epoch": 8.01,
"grad_norm": 2.291691303253174,
"learning_rate": 0.0002006172839506173,
"loss": 1.6548,
"step": 590
},
{
"epoch": 8.02,
"grad_norm": 4.06191873550415,
"learning_rate": 0.00018518518518518518,
"loss": 1.5957,
"step": 600
},
{
"epoch": 8.04,
"grad_norm": 3.735381603240967,
"learning_rate": 0.0001697530864197531,
"loss": 1.6757,
"step": 610
},
{
"epoch": 8.05,
"grad_norm": 3.7696151733398438,
"learning_rate": 0.00015432098765432098,
"loss": 1.6349,
"step": 620
},
{
"epoch": 8.06,
"grad_norm": 2.210860013961792,
"learning_rate": 0.0001388888888888889,
"loss": 1.7561,
"step": 630
},
{
"epoch": 8.08,
"grad_norm": 2.812994956970215,
"learning_rate": 0.0001234567901234568,
"loss": 1.6331,
"step": 640
},
{
"epoch": 8.09,
"grad_norm": 3.606325626373291,
"learning_rate": 0.00010802469135802468,
"loss": 1.7066,
"step": 650
},
{
"epoch": 8.1,
"eval_accuracy": 0.33099579242636745,
"eval_loss": 1.6313475370407104,
"eval_runtime": 304.3275,
"eval_samples_per_second": 4.686,
"eval_steps_per_second": 0.296,
"step": 657
},
{
"epoch": 9.0,
"grad_norm": 2.993828296661377,
"learning_rate": 9.259259259259259e-05,
"loss": 1.6485,
"step": 660
},
{
"epoch": 9.02,
"grad_norm": 1.9592925310134888,
"learning_rate": 7.716049382716049e-05,
"loss": 1.6776,
"step": 670
},
{
"epoch": 9.03,
"grad_norm": 3.105025291442871,
"learning_rate": 6.17283950617284e-05,
"loss": 1.6466,
"step": 680
},
{
"epoch": 9.05,
"grad_norm": 3.643643856048584,
"learning_rate": 4.6296296296296294e-05,
"loss": 1.6544,
"step": 690
},
{
"epoch": 9.06,
"grad_norm": 2.5237057209014893,
"learning_rate": 3.08641975308642e-05,
"loss": 1.6616,
"step": 700
},
{
"epoch": 9.07,
"grad_norm": 3.834527015686035,
"learning_rate": 1.54320987654321e-05,
"loss": 1.6271,
"step": 710
},
{
"epoch": 9.09,
"grad_norm": 1.777999997138977,
"learning_rate": 0.0,
"loss": 1.6476,
"step": 720
},
{
"epoch": 9.09,
"eval_accuracy": 0.3288920056100982,
"eval_loss": 1.6337770223617554,
"eval_runtime": 340.2408,
"eval_samples_per_second": 4.191,
"eval_steps_per_second": 0.265,
"step": 720
},
{
"epoch": 9.09,
"step": 720,
"total_flos": 1.4231811343419113e+19,
"train_loss": 1.6995894723468357,
"train_runtime": 6776.3985,
"train_samples_per_second": 1.7,
"train_steps_per_second": 0.106
},
{
"epoch": 9.09,
"eval_accuracy": 0.34811715481171546,
"eval_loss": 1.703281283378601,
"eval_runtime": 256.932,
"eval_samples_per_second": 4.651,
"eval_steps_per_second": 0.292,
"step": 720
},
{
"epoch": 9.09,
"eval_accuracy": 0.34811715481171546,
"eval_loss": 1.7031110525131226,
"eval_runtime": 266.484,
"eval_samples_per_second": 4.484,
"eval_steps_per_second": 0.281,
"step": 720
}
],
"logging_steps": 10,
"max_steps": 720,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 1.4231811343419113e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}