chest-beit-base-finetuned / trainer_state.json
selmamalak's picture
End of training
dd053e9 verified
{
"best_metric": 0.9141630901287554,
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-63",
"epoch": 9.882352941176471,
"eval_steps": 500,
"global_step": 630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"grad_norm": 2.7611124515533447,
"learning_rate": 0.004920634920634921,
"loss": 0.6696,
"step": 10
},
{
"epoch": 0.31,
"grad_norm": 3.1516690254211426,
"learning_rate": 0.004841269841269842,
"loss": 0.5037,
"step": 20
},
{
"epoch": 0.47,
"grad_norm": 1.6632912158966064,
"learning_rate": 0.0047619047619047615,
"loss": 0.5343,
"step": 30
},
{
"epoch": 0.63,
"grad_norm": 15.660941123962402,
"learning_rate": 0.004682539682539683,
"loss": 0.7623,
"step": 40
},
{
"epoch": 0.78,
"grad_norm": 1.5720008611679077,
"learning_rate": 0.004603174603174603,
"loss": 0.5904,
"step": 50
},
{
"epoch": 0.94,
"grad_norm": 3.288884401321411,
"learning_rate": 0.004523809523809524,
"loss": 0.4775,
"step": 60
},
{
"epoch": 0.99,
"eval_accuracy": 0.9141630901287554,
"eval_f1": 0.8903401432261271,
"eval_loss": 0.22644232213497162,
"eval_precision": 0.884993651288677,
"eval_recall": 0.8961752704933064,
"eval_runtime": 12.2128,
"eval_samples_per_second": 95.391,
"eval_steps_per_second": 5.977,
"step": 63
},
{
"epoch": 1.1,
"grad_norm": 2.6683709621429443,
"learning_rate": 0.0044603174603174605,
"loss": 0.4832,
"step": 70
},
{
"epoch": 1.25,
"grad_norm": 1.2820926904678345,
"learning_rate": 0.004380952380952381,
"loss": 0.5064,
"step": 80
},
{
"epoch": 1.41,
"grad_norm": 5.157268047332764,
"learning_rate": 0.004301587301587302,
"loss": 0.4561,
"step": 90
},
{
"epoch": 1.57,
"grad_norm": 0.9834429025650024,
"learning_rate": 0.004222222222222223,
"loss": 0.3981,
"step": 100
},
{
"epoch": 1.73,
"grad_norm": 7.78388786315918,
"learning_rate": 0.0041428571428571434,
"loss": 0.5016,
"step": 110
},
{
"epoch": 1.88,
"grad_norm": 12.203431129455566,
"learning_rate": 0.004063492063492063,
"loss": 0.7117,
"step": 120
},
{
"epoch": 1.99,
"eval_accuracy": 0.7390557939914163,
"eval_f1": 0.4249753208292202,
"eval_loss": 0.4007842242717743,
"eval_precision": 0.36952789699570815,
"eval_recall": 0.5,
"eval_runtime": 12.6185,
"eval_samples_per_second": 92.325,
"eval_steps_per_second": 5.785,
"step": 127
},
{
"epoch": 2.04,
"grad_norm": 9.85489273071289,
"learning_rate": 0.003992063492063492,
"loss": 0.4575,
"step": 130
},
{
"epoch": 2.2,
"grad_norm": 2.926109552383423,
"learning_rate": 0.003912698412698413,
"loss": 0.4562,
"step": 140
},
{
"epoch": 2.35,
"grad_norm": 0.5027350187301636,
"learning_rate": 0.0038333333333333336,
"loss": 0.417,
"step": 150
},
{
"epoch": 2.51,
"grad_norm": 0.8259086608886719,
"learning_rate": 0.003753968253968254,
"loss": 0.3953,
"step": 160
},
{
"epoch": 2.67,
"grad_norm": 2.8119688034057617,
"learning_rate": 0.003674603174603175,
"loss": 0.4175,
"step": 170
},
{
"epoch": 2.82,
"grad_norm": 2.306551218032837,
"learning_rate": 0.0035952380952380954,
"loss": 0.4226,
"step": 180
},
{
"epoch": 2.98,
"grad_norm": 6.00565242767334,
"learning_rate": 0.003515873015873016,
"loss": 0.4115,
"step": 190
},
{
"epoch": 3.0,
"eval_accuracy": 0.8154506437768241,
"eval_f1": 0.7957275475643482,
"eval_loss": 0.43578293919563293,
"eval_precision": 0.7870816708492059,
"eval_recall": 0.8645050125313283,
"eval_runtime": 12.1326,
"eval_samples_per_second": 96.022,
"eval_steps_per_second": 6.017,
"step": 191
},
{
"epoch": 3.14,
"grad_norm": 1.8762693405151367,
"learning_rate": 0.0034365079365079364,
"loss": 0.3876,
"step": 200
},
{
"epoch": 3.29,
"grad_norm": 1.1961586475372314,
"learning_rate": 0.003357142857142857,
"loss": 0.3232,
"step": 210
},
{
"epoch": 3.45,
"grad_norm": 0.5015142560005188,
"learning_rate": 0.003277777777777778,
"loss": 0.4061,
"step": 220
},
{
"epoch": 3.61,
"grad_norm": 2.750140428543091,
"learning_rate": 0.003198412698412698,
"loss": 0.372,
"step": 230
},
{
"epoch": 3.76,
"grad_norm": 2.029989242553711,
"learning_rate": 0.003119047619047619,
"loss": 0.3498,
"step": 240
},
{
"epoch": 3.92,
"grad_norm": 2.366288661956787,
"learning_rate": 0.0030396825396825397,
"loss": 0.3631,
"step": 250
},
{
"epoch": 4.0,
"eval_accuracy": 0.8798283261802575,
"eval_f1": 0.8517509925684618,
"eval_loss": 0.30910125374794006,
"eval_precision": 0.8380756825290672,
"eval_recall": 0.8708184332783178,
"eval_runtime": 12.051,
"eval_samples_per_second": 96.673,
"eval_steps_per_second": 6.058,
"step": 255
},
{
"epoch": 4.08,
"grad_norm": 0.5986310839653015,
"learning_rate": 0.0029603174603174604,
"loss": 0.3413,
"step": 260
},
{
"epoch": 4.24,
"grad_norm": 4.78090763092041,
"learning_rate": 0.0028809523809523807,
"loss": 0.384,
"step": 270
},
{
"epoch": 4.39,
"grad_norm": 3.392075300216675,
"learning_rate": 0.0028015873015873015,
"loss": 0.3579,
"step": 280
},
{
"epoch": 4.55,
"grad_norm": 1.982884407043457,
"learning_rate": 0.0027222222222222222,
"loss": 0.3397,
"step": 290
},
{
"epoch": 4.71,
"grad_norm": 4.884500980377197,
"learning_rate": 0.002642857142857143,
"loss": 0.3609,
"step": 300
},
{
"epoch": 4.86,
"grad_norm": 0.9579557776451111,
"learning_rate": 0.0025634920634920633,
"loss": 0.3794,
"step": 310
},
{
"epoch": 4.99,
"eval_accuracy": 0.8798283261802575,
"eval_f1": 0.8494965377744312,
"eval_loss": 0.2801721394062042,
"eval_precision": 0.8392582443135881,
"eval_recall": 0.8623062992847974,
"eval_runtime": 12.197,
"eval_samples_per_second": 95.516,
"eval_steps_per_second": 5.985,
"step": 318
},
{
"epoch": 5.02,
"grad_norm": 1.1442433595657349,
"learning_rate": 0.002484126984126984,
"loss": 0.3566,
"step": 320
},
{
"epoch": 5.18,
"grad_norm": 4.931128978729248,
"learning_rate": 0.0024047619047619048,
"loss": 0.4874,
"step": 330
},
{
"epoch": 5.33,
"grad_norm": 2.474273443222046,
"learning_rate": 0.0023253968253968255,
"loss": 0.375,
"step": 340
},
{
"epoch": 5.49,
"grad_norm": 2.144062042236328,
"learning_rate": 0.0022460317460317463,
"loss": 0.368,
"step": 350
},
{
"epoch": 5.65,
"grad_norm": 0.6386366486549377,
"learning_rate": 0.002166666666666667,
"loss": 0.3755,
"step": 360
},
{
"epoch": 5.8,
"grad_norm": 3.0398051738739014,
"learning_rate": 0.0020873015873015873,
"loss": 0.3248,
"step": 370
},
{
"epoch": 5.96,
"grad_norm": 2.5777676105499268,
"learning_rate": 0.002007936507936508,
"loss": 0.3713,
"step": 380
},
{
"epoch": 5.99,
"eval_accuracy": 0.8772532188841202,
"eval_f1": 0.8449126373928156,
"eval_loss": 0.2805176377296448,
"eval_precision": 0.8370533441922019,
"eval_recall": 0.8541800385109115,
"eval_runtime": 11.9639,
"eval_samples_per_second": 97.376,
"eval_steps_per_second": 6.102,
"step": 382
},
{
"epoch": 6.12,
"grad_norm": 0.9707914590835571,
"learning_rate": 0.0019285714285714288,
"loss": 0.3227,
"step": 390
},
{
"epoch": 6.27,
"grad_norm": 1.1869500875473022,
"learning_rate": 0.0018492063492063493,
"loss": 0.3271,
"step": 400
},
{
"epoch": 6.43,
"grad_norm": 0.6983945369720459,
"learning_rate": 0.00176984126984127,
"loss": 0.3709,
"step": 410
},
{
"epoch": 6.59,
"grad_norm": 1.19561767578125,
"learning_rate": 0.0016904761904761906,
"loss": 0.3629,
"step": 420
},
{
"epoch": 6.75,
"grad_norm": 1.7224721908569336,
"learning_rate": 0.0016111111111111113,
"loss": 0.3182,
"step": 430
},
{
"epoch": 6.9,
"grad_norm": 2.228806972503662,
"learning_rate": 0.0015317460317460319,
"loss": 0.3953,
"step": 440
},
{
"epoch": 7.0,
"eval_accuracy": 0.8583690987124464,
"eval_f1": 0.836744521418762,
"eval_loss": 0.3396996855735779,
"eval_precision": 0.8185304398119043,
"eval_recall": 0.8871569166819487,
"eval_runtime": 12.0715,
"eval_samples_per_second": 96.508,
"eval_steps_per_second": 6.047,
"step": 446
},
{
"epoch": 7.06,
"grad_norm": 3.1751720905303955,
"learning_rate": 0.0014523809523809526,
"loss": 0.3753,
"step": 450
},
{
"epoch": 7.22,
"grad_norm": 2.4551353454589844,
"learning_rate": 0.0013730158730158731,
"loss": 0.3061,
"step": 460
},
{
"epoch": 7.37,
"grad_norm": 1.707070231437683,
"learning_rate": 0.0012936507936507939,
"loss": 0.3525,
"step": 470
},
{
"epoch": 7.53,
"grad_norm": 0.43897438049316406,
"learning_rate": 0.0012142857142857144,
"loss": 0.3032,
"step": 480
},
{
"epoch": 7.69,
"grad_norm": 2.149365186691284,
"learning_rate": 0.001134920634920635,
"loss": 0.3031,
"step": 490
},
{
"epoch": 7.84,
"grad_norm": 1.5541889667510986,
"learning_rate": 0.0010555555555555557,
"loss": 0.3199,
"step": 500
},
{
"epoch": 8.0,
"grad_norm": 0.5194265842437744,
"learning_rate": 0.0009761904761904762,
"loss": 0.3218,
"step": 510
},
{
"epoch": 8.0,
"eval_accuracy": 0.8669527896995708,
"eval_f1": 0.8448168439144914,
"eval_loss": 0.3072466254234314,
"eval_precision": 0.8257109927719459,
"eval_recall": 0.8897720673635308,
"eval_runtime": 12.0816,
"eval_samples_per_second": 96.428,
"eval_steps_per_second": 6.042,
"step": 510
},
{
"epoch": 8.16,
"grad_norm": 0.4812917709350586,
"learning_rate": 0.0008968253968253968,
"loss": 0.3105,
"step": 520
},
{
"epoch": 8.31,
"grad_norm": 3.871387004852295,
"learning_rate": 0.0008174603174603175,
"loss": 0.3398,
"step": 530
},
{
"epoch": 8.47,
"grad_norm": 0.3649653196334839,
"learning_rate": 0.000746031746031746,
"loss": 0.3169,
"step": 540
},
{
"epoch": 8.63,
"grad_norm": 0.4373646676540375,
"learning_rate": 0.0006666666666666666,
"loss": 0.3019,
"step": 550
},
{
"epoch": 8.78,
"grad_norm": 0.21746236085891724,
"learning_rate": 0.0005873015873015873,
"loss": 0.3463,
"step": 560
},
{
"epoch": 8.94,
"grad_norm": 1.6411595344543457,
"learning_rate": 0.0005079365079365079,
"loss": 0.3219,
"step": 570
},
{
"epoch": 8.99,
"eval_accuracy": 0.896137339055794,
"eval_f1": 0.870789179160911,
"eval_loss": 0.263265997171402,
"eval_precision": 0.8582184517497349,
"eval_recall": 0.887172198789657,
"eval_runtime": 12.1817,
"eval_samples_per_second": 95.635,
"eval_steps_per_second": 5.993,
"step": 573
},
{
"epoch": 9.1,
"grad_norm": 1.573140025138855,
"learning_rate": 0.0004285714285714286,
"loss": 0.3078,
"step": 580
},
{
"epoch": 9.25,
"grad_norm": 0.6549800634384155,
"learning_rate": 0.00034920634920634924,
"loss": 0.2872,
"step": 590
},
{
"epoch": 9.41,
"grad_norm": 0.4417751133441925,
"learning_rate": 0.0002698412698412699,
"loss": 0.3256,
"step": 600
},
{
"epoch": 9.57,
"grad_norm": 3.714104413986206,
"learning_rate": 0.0001904761904761905,
"loss": 0.361,
"step": 610
},
{
"epoch": 9.73,
"grad_norm": 0.5447297096252441,
"learning_rate": 0.00011111111111111112,
"loss": 0.3773,
"step": 620
},
{
"epoch": 9.88,
"grad_norm": 1.347410798072815,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.3049,
"step": 630
},
{
"epoch": 9.88,
"eval_accuracy": 0.8927038626609443,
"eval_f1": 0.8684775000564475,
"eval_loss": 0.27391761541366577,
"eval_precision": 0.8528169402296,
"eval_recall": 0.8912334189131366,
"eval_runtime": 12.284,
"eval_samples_per_second": 94.839,
"eval_steps_per_second": 5.943,
"step": 630
},
{
"epoch": 9.88,
"step": 630,
"total_flos": 3.142570654487126e+18,
"train_loss": 0.3948629246817695,
"train_runtime": 760.2155,
"train_samples_per_second": 53.63,
"train_steps_per_second": 0.829
}
],
"logging_steps": 10,
"max_steps": 630,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.142570654487126e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}