chest-deit-base-finetuned / trainer_state.json
selmamalak's picture
End of training
60d4103 verified
{
"best_metric": 0.975107296137339,
"best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-446",
"epoch": 9.882352941176471,
"eval_steps": 500,
"global_step": 630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"grad_norm": 1.1300164461135864,
"learning_rate": 0.004920634920634921,
"loss": 0.4737,
"step": 10
},
{
"epoch": 0.31,
"grad_norm": 0.7213650941848755,
"learning_rate": 0.004841269841269842,
"loss": 0.6061,
"step": 20
},
{
"epoch": 0.47,
"grad_norm": 3.0624804496765137,
"learning_rate": 0.0047619047619047615,
"loss": 0.2754,
"step": 30
},
{
"epoch": 0.63,
"grad_norm": 0.509834885597229,
"learning_rate": 0.004682539682539683,
"loss": 0.2462,
"step": 40
},
{
"epoch": 0.78,
"grad_norm": 1.2371031045913696,
"learning_rate": 0.004603174603174603,
"loss": 0.205,
"step": 50
},
{
"epoch": 0.94,
"grad_norm": 1.2106962203979492,
"learning_rate": 0.004523809523809524,
"loss": 0.2556,
"step": 60
},
{
"epoch": 0.99,
"eval_accuracy": 0.9184549356223176,
"eval_f1": 0.8822492390220755,
"eval_loss": 0.20187422633171082,
"eval_precision": 0.9446022031823746,
"eval_recall": 0.8469420502475702,
"eval_runtime": 11.2928,
"eval_samples_per_second": 103.163,
"eval_steps_per_second": 6.464,
"step": 63
},
{
"epoch": 1.1,
"grad_norm": 2.5079874992370605,
"learning_rate": 0.0044444444444444444,
"loss": 0.282,
"step": 70
},
{
"epoch": 1.25,
"grad_norm": 0.6824792623519897,
"learning_rate": 0.004365079365079365,
"loss": 0.2291,
"step": 80
},
{
"epoch": 1.41,
"grad_norm": 1.294320821762085,
"learning_rate": 0.004285714285714286,
"loss": 0.208,
"step": 90
},
{
"epoch": 1.57,
"grad_norm": 2.1044976711273193,
"learning_rate": 0.004206349206349207,
"loss": 0.2288,
"step": 100
},
{
"epoch": 1.73,
"grad_norm": 0.8730837106704712,
"learning_rate": 0.0041269841269841265,
"loss": 0.3071,
"step": 110
},
{
"epoch": 1.88,
"grad_norm": 1.6219590902328491,
"learning_rate": 0.004047619047619048,
"loss": 0.2302,
"step": 120
},
{
"epoch": 1.99,
"eval_accuracy": 0.9613733905579399,
"eval_f1": 0.9513881612438695,
"eval_loss": 0.10982219129800797,
"eval_precision": 0.9396367521367521,
"eval_recall": 0.965355461825295,
"eval_runtime": 11.2742,
"eval_samples_per_second": 103.333,
"eval_steps_per_second": 6.475,
"step": 127
},
{
"epoch": 2.04,
"grad_norm": 1.7205462455749512,
"learning_rate": 0.003968253968253968,
"loss": 0.214,
"step": 130
},
{
"epoch": 2.2,
"grad_norm": 0.4880439341068268,
"learning_rate": 0.003888888888888889,
"loss": 0.2757,
"step": 140
},
{
"epoch": 2.35,
"grad_norm": 1.5420565605163574,
"learning_rate": 0.0038095238095238095,
"loss": 0.2314,
"step": 150
},
{
"epoch": 2.51,
"grad_norm": 0.872795820236206,
"learning_rate": 0.0037301587301587303,
"loss": 0.2689,
"step": 160
},
{
"epoch": 2.67,
"grad_norm": 1.3670737743377686,
"learning_rate": 0.0036507936507936506,
"loss": 0.1787,
"step": 170
},
{
"epoch": 2.82,
"grad_norm": 0.9697684049606323,
"learning_rate": 0.0035714285714285718,
"loss": 0.1962,
"step": 180
},
{
"epoch": 2.98,
"grad_norm": 0.6434329152107239,
"learning_rate": 0.003492063492063492,
"loss": 0.2258,
"step": 190
},
{
"epoch": 3.0,
"eval_accuracy": 0.9622317596566523,
"eval_f1": 0.9496135003577972,
"eval_loss": 0.11506476998329163,
"eval_precision": 0.9640835246121029,
"eval_recall": 0.9372077296900788,
"eval_runtime": 11.448,
"eval_samples_per_second": 101.764,
"eval_steps_per_second": 6.377,
"step": 191
},
{
"epoch": 3.14,
"grad_norm": 0.3758108913898468,
"learning_rate": 0.003412698412698413,
"loss": 0.2048,
"step": 200
},
{
"epoch": 3.29,
"grad_norm": 0.3552361726760864,
"learning_rate": 0.003333333333333333,
"loss": 0.1136,
"step": 210
},
{
"epoch": 3.45,
"grad_norm": 0.3793877959251404,
"learning_rate": 0.0032539682539682543,
"loss": 0.2048,
"step": 220
},
{
"epoch": 3.61,
"grad_norm": 0.6903932094573975,
"learning_rate": 0.0031746031746031746,
"loss": 0.2696,
"step": 230
},
{
"epoch": 3.76,
"grad_norm": 0.628604531288147,
"learning_rate": 0.0030952380952380953,
"loss": 0.1585,
"step": 240
},
{
"epoch": 3.92,
"grad_norm": 0.7250388860702515,
"learning_rate": 0.0030158730158730157,
"loss": 0.1465,
"step": 250
},
{
"epoch": 4.0,
"eval_accuracy": 0.9725321888412017,
"eval_f1": 0.9643166172765768,
"eval_loss": 0.07334637641906738,
"eval_precision": 0.9653162002256106,
"eval_recall": 0.9633286722904824,
"eval_runtime": 11.4298,
"eval_samples_per_second": 101.927,
"eval_steps_per_second": 6.387,
"step": 255
},
{
"epoch": 4.08,
"grad_norm": 0.4553186893463135,
"learning_rate": 0.002936507936507937,
"loss": 0.1609,
"step": 260
},
{
"epoch": 4.24,
"grad_norm": 0.3900231420993805,
"learning_rate": 0.002857142857142857,
"loss": 0.1517,
"step": 270
},
{
"epoch": 4.39,
"grad_norm": 0.45594316720962524,
"learning_rate": 0.002777777777777778,
"loss": 0.1705,
"step": 280
},
{
"epoch": 4.55,
"grad_norm": 0.46610358357429504,
"learning_rate": 0.002698412698412698,
"loss": 0.1451,
"step": 290
},
{
"epoch": 4.71,
"grad_norm": 0.557108461856842,
"learning_rate": 0.0026190476190476194,
"loss": 0.1582,
"step": 300
},
{
"epoch": 4.86,
"grad_norm": 0.4486098885536194,
"learning_rate": 0.0025396825396825397,
"loss": 0.1763,
"step": 310
},
{
"epoch": 4.99,
"eval_accuracy": 0.9725321888412017,
"eval_f1": 0.9639271421162228,
"eval_loss": 0.07630692422389984,
"eval_precision": 0.9702764832336926,
"eval_recall": 0.9580085885445321,
"eval_runtime": 11.1034,
"eval_samples_per_second": 104.923,
"eval_steps_per_second": 6.575,
"step": 318
},
{
"epoch": 5.02,
"grad_norm": 0.5927975177764893,
"learning_rate": 0.0024603174603174604,
"loss": 0.1563,
"step": 320
},
{
"epoch": 5.18,
"grad_norm": 0.5728869438171387,
"learning_rate": 0.0023809523809523807,
"loss": 0.1666,
"step": 330
},
{
"epoch": 5.33,
"grad_norm": 0.34924712777137756,
"learning_rate": 0.0023015873015873015,
"loss": 0.1425,
"step": 340
},
{
"epoch": 5.49,
"grad_norm": 0.487810343503952,
"learning_rate": 0.0022222222222222222,
"loss": 0.1157,
"step": 350
},
{
"epoch": 5.65,
"grad_norm": 0.5470776557922363,
"learning_rate": 0.002142857142857143,
"loss": 0.1503,
"step": 360
},
{
"epoch": 5.8,
"grad_norm": 0.9323746562004089,
"learning_rate": 0.0020634920634920633,
"loss": 0.1813,
"step": 370
},
{
"epoch": 5.96,
"grad_norm": 0.4948749542236328,
"learning_rate": 0.001984126984126984,
"loss": 0.1627,
"step": 380
},
{
"epoch": 5.99,
"eval_accuracy": 0.9570815450643777,
"eval_f1": 0.9465564950602603,
"eval_loss": 0.10568992793560028,
"eval_precision": 0.9315479562643401,
"eval_recall": 0.965643911608289,
"eval_runtime": 11.3843,
"eval_samples_per_second": 102.334,
"eval_steps_per_second": 6.412,
"step": 382
},
{
"epoch": 6.12,
"grad_norm": 0.5047426819801331,
"learning_rate": 0.0019047619047619048,
"loss": 0.1335,
"step": 390
},
{
"epoch": 6.27,
"grad_norm": 0.3176202178001404,
"learning_rate": 0.0018253968253968253,
"loss": 0.0897,
"step": 400
},
{
"epoch": 6.43,
"grad_norm": 0.42543891072273254,
"learning_rate": 0.001746031746031746,
"loss": 0.1356,
"step": 410
},
{
"epoch": 6.59,
"grad_norm": 0.35515210032463074,
"learning_rate": 0.0016666666666666666,
"loss": 0.1324,
"step": 420
},
{
"epoch": 6.75,
"grad_norm": 0.900916337966919,
"learning_rate": 0.0015873015873015873,
"loss": 0.1435,
"step": 430
},
{
"epoch": 6.9,
"grad_norm": 0.6653035283088684,
"learning_rate": 0.0015079365079365078,
"loss": 0.1509,
"step": 440
},
{
"epoch": 7.0,
"eval_accuracy": 0.975107296137339,
"eval_f1": 0.9680345077825117,
"eval_loss": 0.07007680833339691,
"eval_precision": 0.9637800176993805,
"eval_recall": 0.9725189498135582,
"eval_runtime": 12.1771,
"eval_samples_per_second": 95.672,
"eval_steps_per_second": 5.995,
"step": 446
},
{
"epoch": 7.06,
"grad_norm": 0.31296506524086,
"learning_rate": 0.0014285714285714286,
"loss": 0.1322,
"step": 450
},
{
"epoch": 7.22,
"grad_norm": 0.6464645862579346,
"learning_rate": 0.001349206349206349,
"loss": 0.1457,
"step": 460
},
{
"epoch": 7.37,
"grad_norm": 1.0385708808898926,
"learning_rate": 0.0012698412698412698,
"loss": 0.1278,
"step": 470
},
{
"epoch": 7.53,
"grad_norm": 0.6095046401023865,
"learning_rate": 0.0011904761904761904,
"loss": 0.1168,
"step": 480
},
{
"epoch": 7.69,
"grad_norm": 0.7724207639694214,
"learning_rate": 0.0011111111111111111,
"loss": 0.1182,
"step": 490
},
{
"epoch": 7.84,
"grad_norm": 0.4225674569606781,
"learning_rate": 0.0010317460317460316,
"loss": 0.1325,
"step": 500
},
{
"epoch": 8.0,
"grad_norm": 0.47218218445777893,
"learning_rate": 0.0009523809523809524,
"loss": 0.1209,
"step": 510
},
{
"epoch": 8.0,
"eval_accuracy": 0.9570815450643777,
"eval_f1": 0.9465564950602603,
"eval_loss": 0.10465892404317856,
"eval_precision": 0.9315479562643401,
"eval_recall": 0.965643911608289,
"eval_runtime": 11.3686,
"eval_samples_per_second": 102.475,
"eval_steps_per_second": 6.421,
"step": 510
},
{
"epoch": 8.16,
"grad_norm": 0.296578973531723,
"learning_rate": 0.000873015873015873,
"loss": 0.1006,
"step": 520
},
{
"epoch": 8.31,
"grad_norm": 0.9204122424125671,
"learning_rate": 0.0007936507936507937,
"loss": 0.1202,
"step": 530
},
{
"epoch": 8.47,
"grad_norm": 0.5195232629776001,
"learning_rate": 0.0007142857142857143,
"loss": 0.1052,
"step": 540
},
{
"epoch": 8.63,
"grad_norm": 0.29583191871643066,
"learning_rate": 0.0006349206349206349,
"loss": 0.1285,
"step": 550
},
{
"epoch": 8.78,
"grad_norm": 0.4851154386997223,
"learning_rate": 0.0005555555555555556,
"loss": 0.1271,
"step": 560
},
{
"epoch": 8.94,
"grad_norm": 0.47467851638793945,
"learning_rate": 0.0004761904761904762,
"loss": 0.0961,
"step": 570
},
{
"epoch": 8.99,
"eval_accuracy": 0.9733905579399141,
"eval_f1": 0.966177521420591,
"eval_loss": 0.07205679267644882,
"eval_precision": 0.9577373642293522,
"eval_recall": 0.975613576624488,
"eval_runtime": 11.2412,
"eval_samples_per_second": 103.637,
"eval_steps_per_second": 6.494,
"step": 573
},
{
"epoch": 9.1,
"grad_norm": 0.29292458295822144,
"learning_rate": 0.0003968253968253968,
"loss": 0.096,
"step": 580
},
{
"epoch": 9.25,
"grad_norm": 0.513671338558197,
"learning_rate": 0.00031746031746031746,
"loss": 0.0871,
"step": 590
},
{
"epoch": 9.41,
"grad_norm": 0.8143641948699951,
"learning_rate": 0.0002380952380952381,
"loss": 0.1297,
"step": 600
},
{
"epoch": 9.57,
"grad_norm": 0.4731471836566925,
"learning_rate": 0.00015873015873015873,
"loss": 0.1175,
"step": 610
},
{
"epoch": 9.73,
"grad_norm": 0.30293089151382446,
"learning_rate": 7.936507936507937e-05,
"loss": 0.1166,
"step": 620
},
{
"epoch": 9.88,
"grad_norm": 0.3364422023296356,
"learning_rate": 0.0,
"loss": 0.1063,
"step": 630
},
{
"epoch": 9.88,
"eval_accuracy": 0.9622317596566523,
"eval_f1": 0.9526072485207102,
"eval_loss": 0.08854742348194122,
"eval_precision": 0.9398335630995462,
"eval_recall": 0.9680642154165903,
"eval_runtime": 11.4305,
"eval_samples_per_second": 101.92,
"eval_steps_per_second": 6.386,
"step": 630
},
{
"epoch": 9.88,
"step": 630,
"total_flos": 3.1439051980091965e+18,
"train_loss": 0.1791202223490155,
"train_runtime": 695.1527,
"train_samples_per_second": 58.649,
"train_steps_per_second": 0.906
}
],
"logging_steps": 10,
"max_steps": 630,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.1439051980091965e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}