mansee's picture
End of training
b110cba
raw
history blame
No virus
41.4 kB
{
"best_metric": 0.7280604310153299,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-img_orientation/checkpoint-3160",
"epoch": 9.984202211690363,
"eval_steps": 500,
"global_step": 3160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.5822784810126583e-06,
"loss": 1.4093,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 3.1645569620253167e-06,
"loss": 1.3983,
"step": 20
},
{
"epoch": 0.09,
"learning_rate": 4.746835443037975e-06,
"loss": 1.402,
"step": 30
},
{
"epoch": 0.13,
"learning_rate": 6.329113924050633e-06,
"loss": 1.3799,
"step": 40
},
{
"epoch": 0.16,
"learning_rate": 7.911392405063292e-06,
"loss": 1.3363,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 9.49367088607595e-06,
"loss": 1.3071,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 1.1075949367088608e-05,
"loss": 1.2847,
"step": 70
},
{
"epoch": 0.25,
"learning_rate": 1.2658227848101267e-05,
"loss": 1.2458,
"step": 80
},
{
"epoch": 0.28,
"learning_rate": 1.4240506329113925e-05,
"loss": 1.1755,
"step": 90
},
{
"epoch": 0.32,
"learning_rate": 1.5822784810126583e-05,
"loss": 1.1232,
"step": 100
},
{
"epoch": 0.35,
"learning_rate": 1.7405063291139243e-05,
"loss": 1.0368,
"step": 110
},
{
"epoch": 0.38,
"learning_rate": 1.89873417721519e-05,
"loss": 0.9468,
"step": 120
},
{
"epoch": 0.41,
"learning_rate": 2.056962025316456e-05,
"loss": 0.867,
"step": 130
},
{
"epoch": 0.44,
"learning_rate": 2.2151898734177217e-05,
"loss": 0.8772,
"step": 140
},
{
"epoch": 0.47,
"learning_rate": 2.3734177215189873e-05,
"loss": 0.7954,
"step": 150
},
{
"epoch": 0.51,
"learning_rate": 2.5316455696202533e-05,
"loss": 0.7559,
"step": 160
},
{
"epoch": 0.54,
"learning_rate": 2.689873417721519e-05,
"loss": 0.8245,
"step": 170
},
{
"epoch": 0.57,
"learning_rate": 2.848101265822785e-05,
"loss": 0.7358,
"step": 180
},
{
"epoch": 0.6,
"learning_rate": 3.0063291139240506e-05,
"loss": 0.7766,
"step": 190
},
{
"epoch": 0.63,
"learning_rate": 3.1645569620253167e-05,
"loss": 0.7109,
"step": 200
},
{
"epoch": 0.66,
"learning_rate": 3.322784810126582e-05,
"loss": 0.738,
"step": 210
},
{
"epoch": 0.7,
"learning_rate": 3.4810126582278487e-05,
"loss": 0.7185,
"step": 220
},
{
"epoch": 0.73,
"learning_rate": 3.639240506329114e-05,
"loss": 0.7008,
"step": 230
},
{
"epoch": 0.76,
"learning_rate": 3.79746835443038e-05,
"loss": 0.7009,
"step": 240
},
{
"epoch": 0.79,
"learning_rate": 3.9556962025316456e-05,
"loss": 0.677,
"step": 250
},
{
"epoch": 0.82,
"learning_rate": 4.113924050632912e-05,
"loss": 0.6927,
"step": 260
},
{
"epoch": 0.85,
"learning_rate": 4.2721518987341776e-05,
"loss": 0.7135,
"step": 270
},
{
"epoch": 0.88,
"learning_rate": 4.430379746835443e-05,
"loss": 0.6918,
"step": 280
},
{
"epoch": 0.92,
"learning_rate": 4.588607594936709e-05,
"loss": 0.7152,
"step": 290
},
{
"epoch": 0.95,
"learning_rate": 4.7468354430379746e-05,
"loss": 0.695,
"step": 300
},
{
"epoch": 0.98,
"learning_rate": 4.905063291139241e-05,
"loss": 0.6659,
"step": 310
},
{
"epoch": 1.0,
"eval_accuracy": 0.6849588980226616,
"eval_loss": 0.5535573363304138,
"eval_runtime": 33.1556,
"eval_samples_per_second": 135.754,
"eval_steps_per_second": 4.253,
"step": 316
},
{
"epoch": 1.01,
"learning_rate": 4.9929676511955e-05,
"loss": 0.7086,
"step": 320
},
{
"epoch": 1.04,
"learning_rate": 4.975386779184248e-05,
"loss": 0.6302,
"step": 330
},
{
"epoch": 1.07,
"learning_rate": 4.957805907172996e-05,
"loss": 0.6449,
"step": 340
},
{
"epoch": 1.11,
"learning_rate": 4.940225035161744e-05,
"loss": 0.6431,
"step": 350
},
{
"epoch": 1.14,
"learning_rate": 4.9226441631504925e-05,
"loss": 0.6261,
"step": 360
},
{
"epoch": 1.17,
"learning_rate": 4.905063291139241e-05,
"loss": 0.7035,
"step": 370
},
{
"epoch": 1.2,
"learning_rate": 4.887482419127989e-05,
"loss": 0.6453,
"step": 380
},
{
"epoch": 1.23,
"learning_rate": 4.869901547116737e-05,
"loss": 0.624,
"step": 390
},
{
"epoch": 1.26,
"learning_rate": 4.852320675105486e-05,
"loss": 0.6493,
"step": 400
},
{
"epoch": 1.3,
"learning_rate": 4.8347398030942336e-05,
"loss": 0.6148,
"step": 410
},
{
"epoch": 1.33,
"learning_rate": 4.817158931082982e-05,
"loss": 0.6194,
"step": 420
},
{
"epoch": 1.36,
"learning_rate": 4.7995780590717305e-05,
"loss": 0.6322,
"step": 430
},
{
"epoch": 1.39,
"learning_rate": 4.7819971870604783e-05,
"loss": 0.6201,
"step": 440
},
{
"epoch": 1.42,
"learning_rate": 4.764416315049227e-05,
"loss": 0.6492,
"step": 450
},
{
"epoch": 1.45,
"learning_rate": 4.7468354430379746e-05,
"loss": 0.5862,
"step": 460
},
{
"epoch": 1.48,
"learning_rate": 4.729254571026723e-05,
"loss": 0.6019,
"step": 470
},
{
"epoch": 1.52,
"learning_rate": 4.7116736990154716e-05,
"loss": 0.6338,
"step": 480
},
{
"epoch": 1.55,
"learning_rate": 4.6940928270042194e-05,
"loss": 0.6159,
"step": 490
},
{
"epoch": 1.58,
"learning_rate": 4.676511954992968e-05,
"loss": 0.6494,
"step": 500
},
{
"epoch": 1.61,
"learning_rate": 4.6589310829817164e-05,
"loss": 0.606,
"step": 510
},
{
"epoch": 1.64,
"learning_rate": 4.641350210970464e-05,
"loss": 0.62,
"step": 520
},
{
"epoch": 1.67,
"learning_rate": 4.623769338959213e-05,
"loss": 0.6263,
"step": 530
},
{
"epoch": 1.71,
"learning_rate": 4.606188466947961e-05,
"loss": 0.5747,
"step": 540
},
{
"epoch": 1.74,
"learning_rate": 4.588607594936709e-05,
"loss": 0.6158,
"step": 550
},
{
"epoch": 1.77,
"learning_rate": 4.5710267229254575e-05,
"loss": 0.5928,
"step": 560
},
{
"epoch": 1.8,
"learning_rate": 4.553445850914206e-05,
"loss": 0.5981,
"step": 570
},
{
"epoch": 1.83,
"learning_rate": 4.535864978902954e-05,
"loss": 0.6166,
"step": 580
},
{
"epoch": 1.86,
"learning_rate": 4.518284106891702e-05,
"loss": 0.6215,
"step": 590
},
{
"epoch": 1.9,
"learning_rate": 4.50070323488045e-05,
"loss": 0.6185,
"step": 600
},
{
"epoch": 1.93,
"learning_rate": 4.4831223628691985e-05,
"loss": 0.5896,
"step": 610
},
{
"epoch": 1.96,
"learning_rate": 4.465541490857947e-05,
"loss": 0.5894,
"step": 620
},
{
"epoch": 1.99,
"learning_rate": 4.447960618846695e-05,
"loss": 0.5971,
"step": 630
},
{
"epoch": 2.0,
"eval_accuracy": 0.7169517884914464,
"eval_loss": 0.4985543489456177,
"eval_runtime": 33.2772,
"eval_samples_per_second": 135.258,
"eval_steps_per_second": 4.237,
"step": 633
},
{
"epoch": 2.02,
"learning_rate": 4.430379746835443e-05,
"loss": 0.5902,
"step": 640
},
{
"epoch": 2.05,
"learning_rate": 4.412798874824192e-05,
"loss": 0.5861,
"step": 650
},
{
"epoch": 2.09,
"learning_rate": 4.3952180028129396e-05,
"loss": 0.5626,
"step": 660
},
{
"epoch": 2.12,
"learning_rate": 4.377637130801688e-05,
"loss": 0.537,
"step": 670
},
{
"epoch": 2.15,
"learning_rate": 4.3600562587904366e-05,
"loss": 0.554,
"step": 680
},
{
"epoch": 2.18,
"learning_rate": 4.3424753867791844e-05,
"loss": 0.5916,
"step": 690
},
{
"epoch": 2.21,
"learning_rate": 4.324894514767933e-05,
"loss": 0.5525,
"step": 700
},
{
"epoch": 2.24,
"learning_rate": 4.307313642756681e-05,
"loss": 0.5568,
"step": 710
},
{
"epoch": 2.27,
"learning_rate": 4.289732770745429e-05,
"loss": 0.5474,
"step": 720
},
{
"epoch": 2.31,
"learning_rate": 4.2721518987341776e-05,
"loss": 0.5914,
"step": 730
},
{
"epoch": 2.34,
"learning_rate": 4.2545710267229255e-05,
"loss": 0.5652,
"step": 740
},
{
"epoch": 2.37,
"learning_rate": 4.236990154711674e-05,
"loss": 0.5814,
"step": 750
},
{
"epoch": 2.4,
"learning_rate": 4.2194092827004224e-05,
"loss": 0.5703,
"step": 760
},
{
"epoch": 2.43,
"learning_rate": 4.20182841068917e-05,
"loss": 0.564,
"step": 770
},
{
"epoch": 2.46,
"learning_rate": 4.184247538677919e-05,
"loss": 0.5755,
"step": 780
},
{
"epoch": 2.5,
"learning_rate": 4.166666666666667e-05,
"loss": 0.5765,
"step": 790
},
{
"epoch": 2.53,
"learning_rate": 4.149085794655415e-05,
"loss": 0.567,
"step": 800
},
{
"epoch": 2.56,
"learning_rate": 4.1315049226441635e-05,
"loss": 0.5398,
"step": 810
},
{
"epoch": 2.59,
"learning_rate": 4.113924050632912e-05,
"loss": 0.5746,
"step": 820
},
{
"epoch": 2.62,
"learning_rate": 4.09634317862166e-05,
"loss": 0.5443,
"step": 830
},
{
"epoch": 2.65,
"learning_rate": 4.078762306610408e-05,
"loss": 0.5384,
"step": 840
},
{
"epoch": 2.69,
"learning_rate": 4.061181434599156e-05,
"loss": 0.5861,
"step": 850
},
{
"epoch": 2.72,
"learning_rate": 4.0436005625879046e-05,
"loss": 0.5332,
"step": 860
},
{
"epoch": 2.75,
"learning_rate": 4.026019690576653e-05,
"loss": 0.5776,
"step": 870
},
{
"epoch": 2.78,
"learning_rate": 4.008438818565401e-05,
"loss": 0.548,
"step": 880
},
{
"epoch": 2.81,
"learning_rate": 3.9908579465541493e-05,
"loss": 0.5758,
"step": 890
},
{
"epoch": 2.84,
"learning_rate": 3.973277074542898e-05,
"loss": 0.5691,
"step": 900
},
{
"epoch": 2.88,
"learning_rate": 3.9556962025316456e-05,
"loss": 0.5284,
"step": 910
},
{
"epoch": 2.91,
"learning_rate": 3.938115330520394e-05,
"loss": 0.5518,
"step": 920
},
{
"epoch": 2.94,
"learning_rate": 3.9205344585091426e-05,
"loss": 0.5507,
"step": 930
},
{
"epoch": 2.97,
"learning_rate": 3.9029535864978904e-05,
"loss": 0.5782,
"step": 940
},
{
"epoch": 3.0,
"eval_accuracy": 0.717173961341924,
"eval_loss": 0.48247379064559937,
"eval_runtime": 33.4685,
"eval_samples_per_second": 134.485,
"eval_steps_per_second": 4.213,
"step": 949
},
{
"epoch": 3.0,
"learning_rate": 3.885372714486639e-05,
"loss": 0.5663,
"step": 950
},
{
"epoch": 3.03,
"learning_rate": 3.867791842475387e-05,
"loss": 0.5212,
"step": 960
},
{
"epoch": 3.06,
"learning_rate": 3.850210970464135e-05,
"loss": 0.5294,
"step": 970
},
{
"epoch": 3.1,
"learning_rate": 3.832630098452884e-05,
"loss": 0.525,
"step": 980
},
{
"epoch": 3.13,
"learning_rate": 3.8150492264416315e-05,
"loss": 0.5303,
"step": 990
},
{
"epoch": 3.16,
"learning_rate": 3.79746835443038e-05,
"loss": 0.5189,
"step": 1000
},
{
"epoch": 3.19,
"learning_rate": 3.7798874824191285e-05,
"loss": 0.5325,
"step": 1010
},
{
"epoch": 3.22,
"learning_rate": 3.762306610407876e-05,
"loss": 0.5308,
"step": 1020
},
{
"epoch": 3.25,
"learning_rate": 3.744725738396625e-05,
"loss": 0.5326,
"step": 1030
},
{
"epoch": 3.29,
"learning_rate": 3.727144866385373e-05,
"loss": 0.5045,
"step": 1040
},
{
"epoch": 3.32,
"learning_rate": 3.709563994374121e-05,
"loss": 0.5444,
"step": 1050
},
{
"epoch": 3.35,
"learning_rate": 3.6919831223628695e-05,
"loss": 0.5263,
"step": 1060
},
{
"epoch": 3.38,
"learning_rate": 3.674402250351618e-05,
"loss": 0.5251,
"step": 1070
},
{
"epoch": 3.41,
"learning_rate": 3.656821378340366e-05,
"loss": 0.5168,
"step": 1080
},
{
"epoch": 3.44,
"learning_rate": 3.639240506329114e-05,
"loss": 0.5121,
"step": 1090
},
{
"epoch": 3.48,
"learning_rate": 3.621659634317862e-05,
"loss": 0.5179,
"step": 1100
},
{
"epoch": 3.51,
"learning_rate": 3.6040787623066106e-05,
"loss": 0.5435,
"step": 1110
},
{
"epoch": 3.54,
"learning_rate": 3.586497890295359e-05,
"loss": 0.5158,
"step": 1120
},
{
"epoch": 3.57,
"learning_rate": 3.568917018284107e-05,
"loss": 0.4941,
"step": 1130
},
{
"epoch": 3.6,
"learning_rate": 3.551336146272855e-05,
"loss": 0.5679,
"step": 1140
},
{
"epoch": 3.63,
"learning_rate": 3.533755274261604e-05,
"loss": 0.5137,
"step": 1150
},
{
"epoch": 3.67,
"learning_rate": 3.516174402250352e-05,
"loss": 0.5221,
"step": 1160
},
{
"epoch": 3.7,
"learning_rate": 3.4985935302391e-05,
"loss": 0.5304,
"step": 1170
},
{
"epoch": 3.73,
"learning_rate": 3.4810126582278487e-05,
"loss": 0.5066,
"step": 1180
},
{
"epoch": 3.76,
"learning_rate": 3.4634317862165965e-05,
"loss": 0.5467,
"step": 1190
},
{
"epoch": 3.79,
"learning_rate": 3.445850914205345e-05,
"loss": 0.5249,
"step": 1200
},
{
"epoch": 3.82,
"learning_rate": 3.428270042194093e-05,
"loss": 0.5639,
"step": 1210
},
{
"epoch": 3.85,
"learning_rate": 3.410689170182841e-05,
"loss": 0.522,
"step": 1220
},
{
"epoch": 3.89,
"learning_rate": 3.39310829817159e-05,
"loss": 0.5195,
"step": 1230
},
{
"epoch": 3.92,
"learning_rate": 3.3755274261603375e-05,
"loss": 0.4986,
"step": 1240
},
{
"epoch": 3.95,
"learning_rate": 3.357946554149086e-05,
"loss": 0.5187,
"step": 1250
},
{
"epoch": 3.98,
"learning_rate": 3.3403656821378345e-05,
"loss": 0.5428,
"step": 1260
},
{
"epoch": 4.0,
"eval_accuracy": 0.7140635414352366,
"eval_loss": 0.46642085909843445,
"eval_runtime": 33.1603,
"eval_samples_per_second": 135.735,
"eval_steps_per_second": 4.252,
"step": 1266
},
{
"epoch": 4.01,
"learning_rate": 3.322784810126582e-05,
"loss": 0.5234,
"step": 1270
},
{
"epoch": 4.04,
"learning_rate": 3.305203938115331e-05,
"loss": 0.5088,
"step": 1280
},
{
"epoch": 4.08,
"learning_rate": 3.287623066104079e-05,
"loss": 0.4914,
"step": 1290
},
{
"epoch": 4.11,
"learning_rate": 3.270042194092827e-05,
"loss": 0.5056,
"step": 1300
},
{
"epoch": 4.14,
"learning_rate": 3.2524613220815756e-05,
"loss": 0.5277,
"step": 1310
},
{
"epoch": 4.17,
"learning_rate": 3.234880450070324e-05,
"loss": 0.5003,
"step": 1320
},
{
"epoch": 4.2,
"learning_rate": 3.217299578059072e-05,
"loss": 0.4859,
"step": 1330
},
{
"epoch": 4.23,
"learning_rate": 3.1997187060478204e-05,
"loss": 0.5113,
"step": 1340
},
{
"epoch": 4.27,
"learning_rate": 3.182137834036568e-05,
"loss": 0.5146,
"step": 1350
},
{
"epoch": 4.3,
"learning_rate": 3.1645569620253167e-05,
"loss": 0.522,
"step": 1360
},
{
"epoch": 4.33,
"learning_rate": 3.146976090014065e-05,
"loss": 0.486,
"step": 1370
},
{
"epoch": 4.36,
"learning_rate": 3.129395218002813e-05,
"loss": 0.5025,
"step": 1380
},
{
"epoch": 4.39,
"learning_rate": 3.111814345991561e-05,
"loss": 0.5099,
"step": 1390
},
{
"epoch": 4.42,
"learning_rate": 3.09423347398031e-05,
"loss": 0.5161,
"step": 1400
},
{
"epoch": 4.45,
"learning_rate": 3.076652601969058e-05,
"loss": 0.4902,
"step": 1410
},
{
"epoch": 4.49,
"learning_rate": 3.059071729957806e-05,
"loss": 0.5406,
"step": 1420
},
{
"epoch": 4.52,
"learning_rate": 3.0414908579465547e-05,
"loss": 0.5025,
"step": 1430
},
{
"epoch": 4.55,
"learning_rate": 3.0239099859353025e-05,
"loss": 0.4895,
"step": 1440
},
{
"epoch": 4.58,
"learning_rate": 3.0063291139240506e-05,
"loss": 0.5104,
"step": 1450
},
{
"epoch": 4.61,
"learning_rate": 2.9887482419127988e-05,
"loss": 0.5078,
"step": 1460
},
{
"epoch": 4.64,
"learning_rate": 2.9711673699015473e-05,
"loss": 0.5011,
"step": 1470
},
{
"epoch": 4.68,
"learning_rate": 2.9535864978902954e-05,
"loss": 0.5105,
"step": 1480
},
{
"epoch": 4.71,
"learning_rate": 2.9360056258790436e-05,
"loss": 0.5066,
"step": 1490
},
{
"epoch": 4.74,
"learning_rate": 2.9184247538677924e-05,
"loss": 0.4699,
"step": 1500
},
{
"epoch": 4.77,
"learning_rate": 2.9008438818565402e-05,
"loss": 0.5125,
"step": 1510
},
{
"epoch": 4.8,
"learning_rate": 2.8832630098452884e-05,
"loss": 0.4876,
"step": 1520
},
{
"epoch": 4.83,
"learning_rate": 2.8656821378340365e-05,
"loss": 0.5184,
"step": 1530
},
{
"epoch": 4.87,
"learning_rate": 2.848101265822785e-05,
"loss": 0.4957,
"step": 1540
},
{
"epoch": 4.9,
"learning_rate": 2.830520393811533e-05,
"loss": 0.5218,
"step": 1550
},
{
"epoch": 4.93,
"learning_rate": 2.8129395218002813e-05,
"loss": 0.478,
"step": 1560
},
{
"epoch": 4.96,
"learning_rate": 2.7953586497890294e-05,
"loss": 0.5133,
"step": 1570
},
{
"epoch": 4.99,
"learning_rate": 2.777777777777778e-05,
"loss": 0.5131,
"step": 1580
},
{
"epoch": 5.0,
"eval_accuracy": 0.7149522328371473,
"eval_loss": 0.47850412130355835,
"eval_runtime": 34.3593,
"eval_samples_per_second": 130.998,
"eval_steps_per_second": 4.104,
"step": 1582
},
{
"epoch": 5.02,
"learning_rate": 2.760196905766526e-05,
"loss": 0.5038,
"step": 1590
},
{
"epoch": 5.06,
"learning_rate": 2.7426160337552742e-05,
"loss": 0.4844,
"step": 1600
},
{
"epoch": 5.09,
"learning_rate": 2.7250351617440227e-05,
"loss": 0.4941,
"step": 1610
},
{
"epoch": 5.12,
"learning_rate": 2.707454289732771e-05,
"loss": 0.4742,
"step": 1620
},
{
"epoch": 5.15,
"learning_rate": 2.689873417721519e-05,
"loss": 0.4599,
"step": 1630
},
{
"epoch": 5.18,
"learning_rate": 2.672292545710267e-05,
"loss": 0.5227,
"step": 1640
},
{
"epoch": 5.21,
"learning_rate": 2.6547116736990156e-05,
"loss": 0.4716,
"step": 1650
},
{
"epoch": 5.24,
"learning_rate": 2.6371308016877638e-05,
"loss": 0.4848,
"step": 1660
},
{
"epoch": 5.28,
"learning_rate": 2.619549929676512e-05,
"loss": 0.4654,
"step": 1670
},
{
"epoch": 5.31,
"learning_rate": 2.6019690576652604e-05,
"loss": 0.4686,
"step": 1680
},
{
"epoch": 5.34,
"learning_rate": 2.5843881856540085e-05,
"loss": 0.4513,
"step": 1690
},
{
"epoch": 5.37,
"learning_rate": 2.5668073136427567e-05,
"loss": 0.4758,
"step": 1700
},
{
"epoch": 5.4,
"learning_rate": 2.549226441631505e-05,
"loss": 0.4773,
"step": 1710
},
{
"epoch": 5.43,
"learning_rate": 2.5316455696202533e-05,
"loss": 0.4661,
"step": 1720
},
{
"epoch": 5.47,
"learning_rate": 2.5140646976090015e-05,
"loss": 0.5042,
"step": 1730
},
{
"epoch": 5.5,
"learning_rate": 2.49648382559775e-05,
"loss": 0.4584,
"step": 1740
},
{
"epoch": 5.53,
"learning_rate": 2.478902953586498e-05,
"loss": 0.4758,
"step": 1750
},
{
"epoch": 5.56,
"learning_rate": 2.4613220815752462e-05,
"loss": 0.4803,
"step": 1760
},
{
"epoch": 5.59,
"learning_rate": 2.4437412095639944e-05,
"loss": 0.4805,
"step": 1770
},
{
"epoch": 5.62,
"learning_rate": 2.426160337552743e-05,
"loss": 0.4807,
"step": 1780
},
{
"epoch": 5.66,
"learning_rate": 2.408579465541491e-05,
"loss": 0.4921,
"step": 1790
},
{
"epoch": 5.69,
"learning_rate": 2.3909985935302392e-05,
"loss": 0.4705,
"step": 1800
},
{
"epoch": 5.72,
"learning_rate": 2.3734177215189873e-05,
"loss": 0.4972,
"step": 1810
},
{
"epoch": 5.75,
"learning_rate": 2.3558368495077358e-05,
"loss": 0.4872,
"step": 1820
},
{
"epoch": 5.78,
"learning_rate": 2.338255977496484e-05,
"loss": 0.4633,
"step": 1830
},
{
"epoch": 5.81,
"learning_rate": 2.320675105485232e-05,
"loss": 0.5115,
"step": 1840
},
{
"epoch": 5.85,
"learning_rate": 2.3030942334739806e-05,
"loss": 0.4849,
"step": 1850
},
{
"epoch": 5.88,
"learning_rate": 2.2855133614627287e-05,
"loss": 0.5129,
"step": 1860
},
{
"epoch": 5.91,
"learning_rate": 2.267932489451477e-05,
"loss": 0.4727,
"step": 1870
},
{
"epoch": 5.94,
"learning_rate": 2.250351617440225e-05,
"loss": 0.4904,
"step": 1880
},
{
"epoch": 5.97,
"learning_rate": 2.2327707454289735e-05,
"loss": 0.4851,
"step": 1890
},
{
"epoch": 6.0,
"eval_accuracy": 0.7225061097533881,
"eval_loss": 0.47057706117630005,
"eval_runtime": 34.9373,
"eval_samples_per_second": 128.831,
"eval_steps_per_second": 4.036,
"step": 1899
},
{
"epoch": 6.0,
"learning_rate": 2.2151898734177217e-05,
"loss": 0.4668,
"step": 1900
},
{
"epoch": 6.03,
"learning_rate": 2.1976090014064698e-05,
"loss": 0.4821,
"step": 1910
},
{
"epoch": 6.07,
"learning_rate": 2.1800281293952183e-05,
"loss": 0.4689,
"step": 1920
},
{
"epoch": 6.1,
"learning_rate": 2.1624472573839664e-05,
"loss": 0.4384,
"step": 1930
},
{
"epoch": 6.13,
"learning_rate": 2.1448663853727146e-05,
"loss": 0.4348,
"step": 1940
},
{
"epoch": 6.16,
"learning_rate": 2.1272855133614627e-05,
"loss": 0.4707,
"step": 1950
},
{
"epoch": 6.19,
"learning_rate": 2.1097046413502112e-05,
"loss": 0.4661,
"step": 1960
},
{
"epoch": 6.22,
"learning_rate": 2.0921237693389594e-05,
"loss": 0.4809,
"step": 1970
},
{
"epoch": 6.26,
"learning_rate": 2.0745428973277075e-05,
"loss": 0.4811,
"step": 1980
},
{
"epoch": 6.29,
"learning_rate": 2.056962025316456e-05,
"loss": 0.4604,
"step": 1990
},
{
"epoch": 6.32,
"learning_rate": 2.039381153305204e-05,
"loss": 0.4435,
"step": 2000
},
{
"epoch": 6.35,
"learning_rate": 2.0218002812939523e-05,
"loss": 0.4801,
"step": 2010
},
{
"epoch": 6.38,
"learning_rate": 2.0042194092827004e-05,
"loss": 0.4808,
"step": 2020
},
{
"epoch": 6.41,
"learning_rate": 1.986638537271449e-05,
"loss": 0.4588,
"step": 2030
},
{
"epoch": 6.45,
"learning_rate": 1.969057665260197e-05,
"loss": 0.4518,
"step": 2040
},
{
"epoch": 6.48,
"learning_rate": 1.9514767932489452e-05,
"loss": 0.435,
"step": 2050
},
{
"epoch": 6.51,
"learning_rate": 1.9338959212376934e-05,
"loss": 0.5161,
"step": 2060
},
{
"epoch": 6.54,
"learning_rate": 1.916315049226442e-05,
"loss": 0.4665,
"step": 2070
},
{
"epoch": 6.57,
"learning_rate": 1.89873417721519e-05,
"loss": 0.4403,
"step": 2080
},
{
"epoch": 6.6,
"learning_rate": 1.881153305203938e-05,
"loss": 0.4782,
"step": 2090
},
{
"epoch": 6.64,
"learning_rate": 1.8635724331926866e-05,
"loss": 0.4756,
"step": 2100
},
{
"epoch": 6.67,
"learning_rate": 1.8459915611814348e-05,
"loss": 0.472,
"step": 2110
},
{
"epoch": 6.7,
"learning_rate": 1.828410689170183e-05,
"loss": 0.4347,
"step": 2120
},
{
"epoch": 6.73,
"learning_rate": 1.810829817158931e-05,
"loss": 0.4708,
"step": 2130
},
{
"epoch": 6.76,
"learning_rate": 1.7932489451476795e-05,
"loss": 0.4573,
"step": 2140
},
{
"epoch": 6.79,
"learning_rate": 1.7756680731364274e-05,
"loss": 0.4791,
"step": 2150
},
{
"epoch": 6.82,
"learning_rate": 1.758087201125176e-05,
"loss": 0.4355,
"step": 2160
},
{
"epoch": 6.86,
"learning_rate": 1.7405063291139243e-05,
"loss": 0.4981,
"step": 2170
},
{
"epoch": 6.89,
"learning_rate": 1.7229254571026725e-05,
"loss": 0.4776,
"step": 2180
},
{
"epoch": 6.92,
"learning_rate": 1.7053445850914206e-05,
"loss": 0.4564,
"step": 2190
},
{
"epoch": 6.95,
"learning_rate": 1.6877637130801688e-05,
"loss": 0.4516,
"step": 2200
},
{
"epoch": 6.98,
"learning_rate": 1.6701828410689173e-05,
"loss": 0.4457,
"step": 2210
},
{
"epoch": 7.0,
"eval_accuracy": 0.7187291712952677,
"eval_loss": 0.4728682339191437,
"eval_runtime": 33.7148,
"eval_samples_per_second": 133.502,
"eval_steps_per_second": 4.182,
"step": 2215
},
{
"epoch": 7.01,
"learning_rate": 1.6526019690576654e-05,
"loss": 0.4374,
"step": 2220
},
{
"epoch": 7.05,
"learning_rate": 1.6350210970464135e-05,
"loss": 0.4525,
"step": 2230
},
{
"epoch": 7.08,
"learning_rate": 1.617440225035162e-05,
"loss": 0.4369,
"step": 2240
},
{
"epoch": 7.11,
"learning_rate": 1.5998593530239102e-05,
"loss": 0.4517,
"step": 2250
},
{
"epoch": 7.14,
"learning_rate": 1.5822784810126583e-05,
"loss": 0.4763,
"step": 2260
},
{
"epoch": 7.17,
"learning_rate": 1.5646976090014065e-05,
"loss": 0.4582,
"step": 2270
},
{
"epoch": 7.2,
"learning_rate": 1.547116736990155e-05,
"loss": 0.4737,
"step": 2280
},
{
"epoch": 7.24,
"learning_rate": 1.529535864978903e-05,
"loss": 0.4456,
"step": 2290
},
{
"epoch": 7.27,
"learning_rate": 1.5119549929676513e-05,
"loss": 0.4535,
"step": 2300
},
{
"epoch": 7.3,
"learning_rate": 1.4943741209563994e-05,
"loss": 0.469,
"step": 2310
},
{
"epoch": 7.33,
"learning_rate": 1.4767932489451477e-05,
"loss": 0.4567,
"step": 2320
},
{
"epoch": 7.36,
"learning_rate": 1.4592123769338962e-05,
"loss": 0.4264,
"step": 2330
},
{
"epoch": 7.39,
"learning_rate": 1.4416315049226442e-05,
"loss": 0.4565,
"step": 2340
},
{
"epoch": 7.42,
"learning_rate": 1.4240506329113925e-05,
"loss": 0.4376,
"step": 2350
},
{
"epoch": 7.46,
"learning_rate": 1.4064697609001406e-05,
"loss": 0.4415,
"step": 2360
},
{
"epoch": 7.49,
"learning_rate": 1.388888888888889e-05,
"loss": 0.459,
"step": 2370
},
{
"epoch": 7.52,
"learning_rate": 1.3713080168776371e-05,
"loss": 0.4562,
"step": 2380
},
{
"epoch": 7.55,
"learning_rate": 1.3537271448663854e-05,
"loss": 0.4733,
"step": 2390
},
{
"epoch": 7.58,
"learning_rate": 1.3361462728551336e-05,
"loss": 0.4324,
"step": 2400
},
{
"epoch": 7.61,
"learning_rate": 1.3185654008438819e-05,
"loss": 0.4385,
"step": 2410
},
{
"epoch": 7.65,
"learning_rate": 1.3009845288326302e-05,
"loss": 0.466,
"step": 2420
},
{
"epoch": 7.68,
"learning_rate": 1.2834036568213783e-05,
"loss": 0.4525,
"step": 2430
},
{
"epoch": 7.71,
"learning_rate": 1.2658227848101267e-05,
"loss": 0.4319,
"step": 2440
},
{
"epoch": 7.74,
"learning_rate": 1.248241912798875e-05,
"loss": 0.4374,
"step": 2450
},
{
"epoch": 7.77,
"learning_rate": 1.2306610407876231e-05,
"loss": 0.4327,
"step": 2460
},
{
"epoch": 7.8,
"learning_rate": 1.2130801687763714e-05,
"loss": 0.4557,
"step": 2470
},
{
"epoch": 7.84,
"learning_rate": 1.1954992967651196e-05,
"loss": 0.4401,
"step": 2480
},
{
"epoch": 7.87,
"learning_rate": 1.1779184247538679e-05,
"loss": 0.4614,
"step": 2490
},
{
"epoch": 7.9,
"learning_rate": 1.160337552742616e-05,
"loss": 0.4298,
"step": 2500
},
{
"epoch": 7.93,
"learning_rate": 1.1427566807313644e-05,
"loss": 0.4452,
"step": 2510
},
{
"epoch": 7.96,
"learning_rate": 1.1251758087201125e-05,
"loss": 0.4393,
"step": 2520
},
{
"epoch": 7.99,
"learning_rate": 1.1075949367088608e-05,
"loss": 0.4407,
"step": 2530
},
{
"epoch": 8.0,
"eval_accuracy": 0.7207287269495668,
"eval_loss": 0.47592467069625854,
"eval_runtime": 33.9139,
"eval_samples_per_second": 132.718,
"eval_steps_per_second": 4.158,
"step": 2532
},
{
"epoch": 8.03,
"learning_rate": 1.0900140646976091e-05,
"loss": 0.4452,
"step": 2540
},
{
"epoch": 8.06,
"learning_rate": 1.0724331926863573e-05,
"loss": 0.473,
"step": 2550
},
{
"epoch": 8.09,
"learning_rate": 1.0548523206751056e-05,
"loss": 0.4202,
"step": 2560
},
{
"epoch": 8.12,
"learning_rate": 1.0372714486638538e-05,
"loss": 0.423,
"step": 2570
},
{
"epoch": 8.15,
"learning_rate": 1.019690576652602e-05,
"loss": 0.3977,
"step": 2580
},
{
"epoch": 8.18,
"learning_rate": 1.0021097046413502e-05,
"loss": 0.4587,
"step": 2590
},
{
"epoch": 8.21,
"learning_rate": 9.845288326300985e-06,
"loss": 0.4175,
"step": 2600
},
{
"epoch": 8.25,
"learning_rate": 9.669479606188467e-06,
"loss": 0.4288,
"step": 2610
},
{
"epoch": 8.28,
"learning_rate": 9.49367088607595e-06,
"loss": 0.4552,
"step": 2620
},
{
"epoch": 8.31,
"learning_rate": 9.317862165963433e-06,
"loss": 0.4408,
"step": 2630
},
{
"epoch": 8.34,
"learning_rate": 9.142053445850915e-06,
"loss": 0.4519,
"step": 2640
},
{
"epoch": 8.37,
"learning_rate": 8.966244725738398e-06,
"loss": 0.414,
"step": 2650
},
{
"epoch": 8.4,
"learning_rate": 8.79043600562588e-06,
"loss": 0.4362,
"step": 2660
},
{
"epoch": 8.44,
"learning_rate": 8.614627285513362e-06,
"loss": 0.4411,
"step": 2670
},
{
"epoch": 8.47,
"learning_rate": 8.438818565400844e-06,
"loss": 0.4271,
"step": 2680
},
{
"epoch": 8.5,
"learning_rate": 8.263009845288327e-06,
"loss": 0.4731,
"step": 2690
},
{
"epoch": 8.53,
"learning_rate": 8.08720112517581e-06,
"loss": 0.4188,
"step": 2700
},
{
"epoch": 8.56,
"learning_rate": 7.911392405063292e-06,
"loss": 0.4252,
"step": 2710
},
{
"epoch": 8.59,
"learning_rate": 7.735583684950775e-06,
"loss": 0.4256,
"step": 2720
},
{
"epoch": 8.63,
"learning_rate": 7.559774964838256e-06,
"loss": 0.4429,
"step": 2730
},
{
"epoch": 8.66,
"learning_rate": 7.3839662447257386e-06,
"loss": 0.416,
"step": 2740
},
{
"epoch": 8.69,
"learning_rate": 7.208157524613221e-06,
"loss": 0.416,
"step": 2750
},
{
"epoch": 8.72,
"learning_rate": 7.032348804500703e-06,
"loss": 0.4236,
"step": 2760
},
{
"epoch": 8.75,
"learning_rate": 6.8565400843881855e-06,
"loss": 0.4331,
"step": 2770
},
{
"epoch": 8.78,
"learning_rate": 6.680731364275668e-06,
"loss": 0.4439,
"step": 2780
},
{
"epoch": 8.82,
"learning_rate": 6.504922644163151e-06,
"loss": 0.4286,
"step": 2790
},
{
"epoch": 8.85,
"learning_rate": 6.329113924050633e-06,
"loss": 0.4333,
"step": 2800
},
{
"epoch": 8.88,
"learning_rate": 6.153305203938116e-06,
"loss": 0.45,
"step": 2810
},
{
"epoch": 8.91,
"learning_rate": 5.977496483825598e-06,
"loss": 0.4307,
"step": 2820
},
{
"epoch": 8.94,
"learning_rate": 5.80168776371308e-06,
"loss": 0.4156,
"step": 2830
},
{
"epoch": 8.97,
"learning_rate": 5.6258790436005626e-06,
"loss": 0.4636,
"step": 2840
},
{
"epoch": 9.0,
"eval_accuracy": 0.7249500111086425,
"eval_loss": 0.47323304414749146,
"eval_runtime": 33.1442,
"eval_samples_per_second": 135.8,
"eval_steps_per_second": 4.254,
"step": 2848
},
{
"epoch": 9.0,
"learning_rate": 5.450070323488046e-06,
"loss": 0.439,
"step": 2850
},
{
"epoch": 9.04,
"learning_rate": 5.274261603375528e-06,
"loss": 0.4224,
"step": 2860
},
{
"epoch": 9.07,
"learning_rate": 5.09845288326301e-06,
"loss": 0.4397,
"step": 2870
},
{
"epoch": 9.1,
"learning_rate": 4.922644163150493e-06,
"loss": 0.4092,
"step": 2880
},
{
"epoch": 9.13,
"learning_rate": 4.746835443037975e-06,
"loss": 0.4246,
"step": 2890
},
{
"epoch": 9.16,
"learning_rate": 4.571026722925457e-06,
"loss": 0.4304,
"step": 2900
},
{
"epoch": 9.19,
"learning_rate": 4.39521800281294e-06,
"loss": 0.4292,
"step": 2910
},
{
"epoch": 9.23,
"learning_rate": 4.219409282700422e-06,
"loss": 0.3956,
"step": 2920
},
{
"epoch": 9.26,
"learning_rate": 4.043600562587905e-06,
"loss": 0.4152,
"step": 2930
},
{
"epoch": 9.29,
"learning_rate": 3.867791842475387e-06,
"loss": 0.4449,
"step": 2940
},
{
"epoch": 9.32,
"learning_rate": 3.6919831223628693e-06,
"loss": 0.4262,
"step": 2950
},
{
"epoch": 9.35,
"learning_rate": 3.5161744022503516e-06,
"loss": 0.4303,
"step": 2960
},
{
"epoch": 9.38,
"learning_rate": 3.340365682137834e-06,
"loss": 0.3949,
"step": 2970
},
{
"epoch": 9.42,
"learning_rate": 3.1645569620253167e-06,
"loss": 0.4235,
"step": 2980
},
{
"epoch": 9.45,
"learning_rate": 2.988748241912799e-06,
"loss": 0.4213,
"step": 2990
},
{
"epoch": 9.48,
"learning_rate": 2.8129395218002813e-06,
"loss": 0.419,
"step": 3000
},
{
"epoch": 9.51,
"learning_rate": 2.637130801687764e-06,
"loss": 0.4503,
"step": 3010
},
{
"epoch": 9.54,
"learning_rate": 2.4613220815752463e-06,
"loss": 0.4302,
"step": 3020
},
{
"epoch": 9.57,
"learning_rate": 2.2855133614627286e-06,
"loss": 0.429,
"step": 3030
},
{
"epoch": 9.61,
"learning_rate": 2.109704641350211e-06,
"loss": 0.4133,
"step": 3040
},
{
"epoch": 9.64,
"learning_rate": 1.9338959212376937e-06,
"loss": 0.4417,
"step": 3050
},
{
"epoch": 9.67,
"learning_rate": 1.7580872011251758e-06,
"loss": 0.4096,
"step": 3060
},
{
"epoch": 9.7,
"learning_rate": 1.5822784810126583e-06,
"loss": 0.4601,
"step": 3070
},
{
"epoch": 9.73,
"learning_rate": 1.4064697609001406e-06,
"loss": 0.4201,
"step": 3080
},
{
"epoch": 9.76,
"learning_rate": 1.2306610407876232e-06,
"loss": 0.4253,
"step": 3090
},
{
"epoch": 9.79,
"learning_rate": 1.0548523206751055e-06,
"loss": 0.4238,
"step": 3100
},
{
"epoch": 9.83,
"learning_rate": 8.790436005625879e-07,
"loss": 0.4152,
"step": 3110
},
{
"epoch": 9.86,
"learning_rate": 7.032348804500703e-07,
"loss": 0.4091,
"step": 3120
},
{
"epoch": 9.89,
"learning_rate": 5.274261603375527e-07,
"loss": 0.4348,
"step": 3130
},
{
"epoch": 9.92,
"learning_rate": 3.5161744022503516e-07,
"loss": 0.4046,
"step": 3140
},
{
"epoch": 9.95,
"learning_rate": 1.7580872011251758e-07,
"loss": 0.3884,
"step": 3150
},
{
"epoch": 9.98,
"learning_rate": 0.0,
"loss": 0.4212,
"step": 3160
},
{
"epoch": 9.98,
"eval_accuracy": 0.7280604310153299,
"eval_loss": 0.4742942750453949,
"eval_runtime": 32.6052,
"eval_samples_per_second": 138.046,
"eval_steps_per_second": 4.324,
"step": 3160
},
{
"epoch": 9.98,
"step": 3160,
"total_flos": 1.0051627680166625e+19,
"train_loss": 0.5403492726857149,
"train_runtime": 6645.2491,
"train_samples_per_second": 60.946,
"train_steps_per_second": 0.476
}
],
"logging_steps": 10,
"max_steps": 3160,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.0051627680166625e+19,
"trial_name": null,
"trial_params": null
}