Karelito00's picture
End of training
b09c06c
{
"best_metric": 0.9935,
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-mnist/checkpoint-2811",
"epoch": 2.9994666666666667,
"global_step": 2811,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.7730496453900712e-06,
"loss": 2.3843,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 3.5460992907801423e-06,
"loss": 2.27,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 5.319148936170213e-06,
"loss": 2.1311,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 7.092198581560285e-06,
"loss": 1.9981,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 8.865248226950355e-06,
"loss": 1.761,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.0638297872340426e-05,
"loss": 1.4946,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.2411347517730498e-05,
"loss": 1.2439,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.418439716312057e-05,
"loss": 1.0823,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 1.595744680851064e-05,
"loss": 0.9241,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 1.773049645390071e-05,
"loss": 0.8283,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 1.950354609929078e-05,
"loss": 0.8041,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 2.1276595744680852e-05,
"loss": 0.6482,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 2.3049645390070924e-05,
"loss": 0.6448,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 2.4822695035460995e-05,
"loss": 0.5983,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 2.6595744680851064e-05,
"loss": 0.5754,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.836879432624114e-05,
"loss": 0.5485,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 3.0141843971631207e-05,
"loss": 0.5681,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 3.191489361702128e-05,
"loss": 0.48,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 3.3687943262411347e-05,
"loss": 0.5461,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 3.546099290780142e-05,
"loss": 0.5761,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 3.723404255319149e-05,
"loss": 0.4957,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 3.900709219858156e-05,
"loss": 0.4965,
"step": 220
},
{
"epoch": 0.25,
"learning_rate": 4.078014184397163e-05,
"loss": 0.473,
"step": 230
},
{
"epoch": 0.26,
"learning_rate": 4.2553191489361704e-05,
"loss": 0.4948,
"step": 240
},
{
"epoch": 0.27,
"learning_rate": 4.432624113475177e-05,
"loss": 0.5289,
"step": 250
},
{
"epoch": 0.28,
"learning_rate": 4.609929078014185e-05,
"loss": 0.4895,
"step": 260
},
{
"epoch": 0.29,
"learning_rate": 4.787234042553192e-05,
"loss": 0.5204,
"step": 270
},
{
"epoch": 0.3,
"learning_rate": 4.964539007092199e-05,
"loss": 0.5589,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 4.984183471727956e-05,
"loss": 0.4661,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 4.964412811387901e-05,
"loss": 0.4783,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 4.944642151047845e-05,
"loss": 0.5219,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 4.92487149070779e-05,
"loss": 0.5128,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 4.905100830367734e-05,
"loss": 0.4896,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 4.8853301700276796e-05,
"loss": 0.4123,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 4.865559509687624e-05,
"loss": 0.4216,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 4.8457888493475686e-05,
"loss": 0.4611,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 4.826018189007513e-05,
"loss": 0.469,
"step": 370
},
{
"epoch": 0.41,
"learning_rate": 4.806247528667458e-05,
"loss": 0.4072,
"step": 380
},
{
"epoch": 0.42,
"learning_rate": 4.786476868327402e-05,
"loss": 0.4476,
"step": 390
},
{
"epoch": 0.43,
"learning_rate": 4.766706207987347e-05,
"loss": 0.4469,
"step": 400
},
{
"epoch": 0.44,
"learning_rate": 4.746935547647291e-05,
"loss": 0.4529,
"step": 410
},
{
"epoch": 0.45,
"learning_rate": 4.727164887307236e-05,
"loss": 0.4699,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 4.707394226967181e-05,
"loss": 0.4249,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 4.687623566627126e-05,
"loss": 0.439,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 4.66785290628707e-05,
"loss": 0.4137,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 4.6480822459470146e-05,
"loss": 0.5142,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 4.6283115856069595e-05,
"loss": 0.4532,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 4.608540925266904e-05,
"loss": 0.4514,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 4.588770264926849e-05,
"loss": 0.4166,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 4.568999604586793e-05,
"loss": 0.4584,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 4.549228944246738e-05,
"loss": 0.3461,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 4.529458283906683e-05,
"loss": 0.4783,
"step": 520
},
{
"epoch": 0.57,
"learning_rate": 4.5096876235666277e-05,
"loss": 0.4474,
"step": 530
},
{
"epoch": 0.58,
"learning_rate": 4.489916963226572e-05,
"loss": 0.4403,
"step": 540
},
{
"epoch": 0.59,
"learning_rate": 4.4701463028865166e-05,
"loss": 0.3663,
"step": 550
},
{
"epoch": 0.6,
"learning_rate": 4.450375642546461e-05,
"loss": 0.3791,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 4.430604982206406e-05,
"loss": 0.4573,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 4.4108343218663504e-05,
"loss": 0.3962,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 4.391063661526295e-05,
"loss": 0.3705,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 4.371293001186239e-05,
"loss": 0.4082,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 4.351522340846185e-05,
"loss": 0.4173,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 4.331751680506129e-05,
"loss": 0.4122,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 4.311981020166074e-05,
"loss": 0.4279,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 4.2922103598260186e-05,
"loss": 0.3369,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 4.272439699485963e-05,
"loss": 0.3995,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 4.252669039145908e-05,
"loss": 0.3735,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 4.232898378805852e-05,
"loss": 0.356,
"step": 670
},
{
"epoch": 0.73,
"learning_rate": 4.213127718465797e-05,
"loss": 0.3741,
"step": 680
},
{
"epoch": 0.74,
"learning_rate": 4.193357058125741e-05,
"loss": 0.4291,
"step": 690
},
{
"epoch": 0.75,
"learning_rate": 4.173586397785687e-05,
"loss": 0.4473,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 4.153815737445631e-05,
"loss": 0.3914,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 4.134045077105576e-05,
"loss": 0.3241,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 4.11427441676552e-05,
"loss": 0.3986,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 4.0945037564254647e-05,
"loss": 0.4287,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 4.0747330960854095e-05,
"loss": 0.4011,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 4.054962435745354e-05,
"loss": 0.388,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 4.0351917754052984e-05,
"loss": 0.3904,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 4.015421115065243e-05,
"loss": 0.3528,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 3.995650454725188e-05,
"loss": 0.3805,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 3.975879794385133e-05,
"loss": 0.3698,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 3.956109134045078e-05,
"loss": 0.3454,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 3.936338473705022e-05,
"loss": 0.4112,
"step": 820
},
{
"epoch": 0.89,
"learning_rate": 3.9165678133649666e-05,
"loss": 0.344,
"step": 830
},
{
"epoch": 0.9,
"learning_rate": 3.8967971530249114e-05,
"loss": 0.3694,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 3.877026492684856e-05,
"loss": 0.3867,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 3.8572558323448004e-05,
"loss": 0.3531,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 3.837485172004745e-05,
"loss": 0.397,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 3.817714511664689e-05,
"loss": 0.3702,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 3.797943851324635e-05,
"loss": 0.3751,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 3.778173190984579e-05,
"loss": 0.3294,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 3.758402530644524e-05,
"loss": 0.3028,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 3.738631870304468e-05,
"loss": 0.4089,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 3.7188612099644134e-05,
"loss": 0.3376,
"step": 930
},
{
"epoch": 1.0,
"eval_accuracy": 0.9855,
"eval_loss": 0.044587597250938416,
"eval_runtime": 131.1221,
"eval_samples_per_second": 76.265,
"eval_steps_per_second": 4.767,
"step": 937
},
{
"epoch": 1.0,
"learning_rate": 3.6990905496243575e-05,
"loss": 0.4091,
"step": 940
},
{
"epoch": 1.01,
"learning_rate": 3.679319889284302e-05,
"loss": 0.3623,
"step": 950
},
{
"epoch": 1.02,
"learning_rate": 3.6595492289442465e-05,
"loss": 0.3532,
"step": 960
},
{
"epoch": 1.04,
"learning_rate": 3.639778568604191e-05,
"loss": 0.3457,
"step": 970
},
{
"epoch": 1.05,
"learning_rate": 3.620007908264136e-05,
"loss": 0.3359,
"step": 980
},
{
"epoch": 1.06,
"learning_rate": 3.600237247924081e-05,
"loss": 0.3162,
"step": 990
},
{
"epoch": 1.07,
"learning_rate": 3.580466587584026e-05,
"loss": 0.3081,
"step": 1000
},
{
"epoch": 1.08,
"learning_rate": 3.56069592724397e-05,
"loss": 0.4274,
"step": 1010
},
{
"epoch": 1.09,
"learning_rate": 3.540925266903915e-05,
"loss": 0.3806,
"step": 1020
},
{
"epoch": 1.1,
"learning_rate": 3.5211546065638595e-05,
"loss": 0.4025,
"step": 1030
},
{
"epoch": 1.11,
"learning_rate": 3.501383946223804e-05,
"loss": 0.3973,
"step": 1040
},
{
"epoch": 1.12,
"learning_rate": 3.4816132858837484e-05,
"loss": 0.3603,
"step": 1050
},
{
"epoch": 1.13,
"learning_rate": 3.461842625543693e-05,
"loss": 0.3805,
"step": 1060
},
{
"epoch": 1.14,
"learning_rate": 3.442071965203638e-05,
"loss": 0.3542,
"step": 1070
},
{
"epoch": 1.15,
"learning_rate": 3.422301304863583e-05,
"loss": 0.3334,
"step": 1080
},
{
"epoch": 1.16,
"learning_rate": 3.402530644523527e-05,
"loss": 0.3736,
"step": 1090
},
{
"epoch": 1.17,
"learning_rate": 3.382759984183472e-05,
"loss": 0.3524,
"step": 1100
},
{
"epoch": 1.18,
"learning_rate": 3.3629893238434166e-05,
"loss": 0.4211,
"step": 1110
},
{
"epoch": 1.2,
"learning_rate": 3.3432186635033614e-05,
"loss": 0.3224,
"step": 1120
},
{
"epoch": 1.21,
"learning_rate": 3.3234480031633056e-05,
"loss": 0.3446,
"step": 1130
},
{
"epoch": 1.22,
"learning_rate": 3.3036773428232504e-05,
"loss": 0.3062,
"step": 1140
},
{
"epoch": 1.23,
"learning_rate": 3.283906682483195e-05,
"loss": 0.3114,
"step": 1150
},
{
"epoch": 1.24,
"learning_rate": 3.26413602214314e-05,
"loss": 0.3816,
"step": 1160
},
{
"epoch": 1.25,
"learning_rate": 3.244365361803085e-05,
"loss": 0.3216,
"step": 1170
},
{
"epoch": 1.26,
"learning_rate": 3.224594701463029e-05,
"loss": 0.3618,
"step": 1180
},
{
"epoch": 1.27,
"learning_rate": 3.204824041122974e-05,
"loss": 0.3515,
"step": 1190
},
{
"epoch": 1.28,
"learning_rate": 3.185053380782918e-05,
"loss": 0.3023,
"step": 1200
},
{
"epoch": 1.29,
"learning_rate": 3.1652827204428634e-05,
"loss": 0.3348,
"step": 1210
},
{
"epoch": 1.3,
"learning_rate": 3.1455120601028075e-05,
"loss": 0.3047,
"step": 1220
},
{
"epoch": 1.31,
"learning_rate": 3.125741399762752e-05,
"loss": 0.4023,
"step": 1230
},
{
"epoch": 1.32,
"learning_rate": 3.1059707394226965e-05,
"loss": 0.3379,
"step": 1240
},
{
"epoch": 1.33,
"learning_rate": 3.086200079082642e-05,
"loss": 0.2975,
"step": 1250
},
{
"epoch": 1.34,
"learning_rate": 3.066429418742586e-05,
"loss": 0.3445,
"step": 1260
},
{
"epoch": 1.36,
"learning_rate": 3.046658758402531e-05,
"loss": 0.3401,
"step": 1270
},
{
"epoch": 1.37,
"learning_rate": 3.0268880980624754e-05,
"loss": 0.2998,
"step": 1280
},
{
"epoch": 1.38,
"learning_rate": 3.00711743772242e-05,
"loss": 0.289,
"step": 1290
},
{
"epoch": 1.39,
"learning_rate": 2.987346777382365e-05,
"loss": 0.3816,
"step": 1300
},
{
"epoch": 1.4,
"learning_rate": 2.9675761170423095e-05,
"loss": 0.3213,
"step": 1310
},
{
"epoch": 1.41,
"learning_rate": 2.947805456702254e-05,
"loss": 0.3604,
"step": 1320
},
{
"epoch": 1.42,
"learning_rate": 2.9280347963621984e-05,
"loss": 0.2953,
"step": 1330
},
{
"epoch": 1.43,
"learning_rate": 2.9082641360221436e-05,
"loss": 0.3474,
"step": 1340
},
{
"epoch": 1.44,
"learning_rate": 2.888493475682088e-05,
"loss": 0.3291,
"step": 1350
},
{
"epoch": 1.45,
"learning_rate": 2.8687228153420325e-05,
"loss": 0.3229,
"step": 1360
},
{
"epoch": 1.46,
"learning_rate": 2.848952155001977e-05,
"loss": 0.3118,
"step": 1370
},
{
"epoch": 1.47,
"learning_rate": 2.8291814946619215e-05,
"loss": 0.2957,
"step": 1380
},
{
"epoch": 1.48,
"learning_rate": 2.8094108343218666e-05,
"loss": 0.3061,
"step": 1390
},
{
"epoch": 1.49,
"learning_rate": 2.789640173981811e-05,
"loss": 0.3489,
"step": 1400
},
{
"epoch": 1.5,
"learning_rate": 2.7698695136417556e-05,
"loss": 0.3754,
"step": 1410
},
{
"epoch": 1.52,
"learning_rate": 2.7500988533017004e-05,
"loss": 0.3633,
"step": 1420
},
{
"epoch": 1.53,
"learning_rate": 2.7303281929616452e-05,
"loss": 0.3709,
"step": 1430
},
{
"epoch": 1.54,
"learning_rate": 2.71055753262159e-05,
"loss": 0.3616,
"step": 1440
},
{
"epoch": 1.55,
"learning_rate": 2.6907868722815345e-05,
"loss": 0.3712,
"step": 1450
},
{
"epoch": 1.56,
"learning_rate": 2.671016211941479e-05,
"loss": 0.2578,
"step": 1460
},
{
"epoch": 1.57,
"learning_rate": 2.6512455516014234e-05,
"loss": 0.3222,
"step": 1470
},
{
"epoch": 1.58,
"learning_rate": 2.6314748912613686e-05,
"loss": 0.323,
"step": 1480
},
{
"epoch": 1.59,
"learning_rate": 2.611704230921313e-05,
"loss": 0.3349,
"step": 1490
},
{
"epoch": 1.6,
"learning_rate": 2.5919335705812575e-05,
"loss": 0.3152,
"step": 1500
},
{
"epoch": 1.61,
"learning_rate": 2.572162910241202e-05,
"loss": 0.3529,
"step": 1510
},
{
"epoch": 1.62,
"learning_rate": 2.5523922499011465e-05,
"loss": 0.2843,
"step": 1520
},
{
"epoch": 1.63,
"learning_rate": 2.5326215895610916e-05,
"loss": 0.3035,
"step": 1530
},
{
"epoch": 1.64,
"learning_rate": 2.512850929221036e-05,
"loss": 0.3331,
"step": 1540
},
{
"epoch": 1.65,
"learning_rate": 2.4930802688809806e-05,
"loss": 0.4147,
"step": 1550
},
{
"epoch": 1.66,
"learning_rate": 2.4733096085409254e-05,
"loss": 0.2968,
"step": 1560
},
{
"epoch": 1.68,
"learning_rate": 2.45353894820087e-05,
"loss": 0.3199,
"step": 1570
},
{
"epoch": 1.69,
"learning_rate": 2.4337682878608147e-05,
"loss": 0.3143,
"step": 1580
},
{
"epoch": 1.7,
"learning_rate": 2.413997627520759e-05,
"loss": 0.3115,
"step": 1590
},
{
"epoch": 1.71,
"learning_rate": 2.394226967180704e-05,
"loss": 0.3396,
"step": 1600
},
{
"epoch": 1.72,
"learning_rate": 2.3744563068406488e-05,
"loss": 0.375,
"step": 1610
},
{
"epoch": 1.73,
"learning_rate": 2.3546856465005932e-05,
"loss": 0.3709,
"step": 1620
},
{
"epoch": 1.74,
"learning_rate": 2.334914986160538e-05,
"loss": 0.2714,
"step": 1630
},
{
"epoch": 1.75,
"learning_rate": 2.3151443258204825e-05,
"loss": 0.3532,
"step": 1640
},
{
"epoch": 1.76,
"learning_rate": 2.2953736654804273e-05,
"loss": 0.3799,
"step": 1650
},
{
"epoch": 1.77,
"learning_rate": 2.2756030051403718e-05,
"loss": 0.3241,
"step": 1660
},
{
"epoch": 1.78,
"learning_rate": 2.2558323448003166e-05,
"loss": 0.368,
"step": 1670
},
{
"epoch": 1.79,
"learning_rate": 2.236061684460261e-05,
"loss": 0.3099,
"step": 1680
},
{
"epoch": 1.8,
"learning_rate": 2.2162910241202056e-05,
"loss": 0.3517,
"step": 1690
},
{
"epoch": 1.81,
"learning_rate": 2.1965203637801504e-05,
"loss": 0.2803,
"step": 1700
},
{
"epoch": 1.82,
"learning_rate": 2.176749703440095e-05,
"loss": 0.3505,
"step": 1710
},
{
"epoch": 1.84,
"learning_rate": 2.1569790431000397e-05,
"loss": 0.3268,
"step": 1720
},
{
"epoch": 1.85,
"learning_rate": 2.137208382759984e-05,
"loss": 0.2774,
"step": 1730
},
{
"epoch": 1.86,
"learning_rate": 2.117437722419929e-05,
"loss": 0.3158,
"step": 1740
},
{
"epoch": 1.87,
"learning_rate": 2.0976670620798734e-05,
"loss": 0.3687,
"step": 1750
},
{
"epoch": 1.88,
"learning_rate": 2.0778964017398182e-05,
"loss": 0.3239,
"step": 1760
},
{
"epoch": 1.89,
"learning_rate": 2.058125741399763e-05,
"loss": 0.3351,
"step": 1770
},
{
"epoch": 1.9,
"learning_rate": 2.0383550810597075e-05,
"loss": 0.341,
"step": 1780
},
{
"epoch": 1.91,
"learning_rate": 2.0185844207196523e-05,
"loss": 0.2696,
"step": 1790
},
{
"epoch": 1.92,
"learning_rate": 1.9988137603795968e-05,
"loss": 0.3837,
"step": 1800
},
{
"epoch": 1.93,
"learning_rate": 1.9790431000395416e-05,
"loss": 0.3441,
"step": 1810
},
{
"epoch": 1.94,
"learning_rate": 1.959272439699486e-05,
"loss": 0.258,
"step": 1820
},
{
"epoch": 1.95,
"learning_rate": 1.9395017793594306e-05,
"loss": 0.3381,
"step": 1830
},
{
"epoch": 1.96,
"learning_rate": 1.9197311190193754e-05,
"loss": 0.2887,
"step": 1840
},
{
"epoch": 1.97,
"learning_rate": 1.89996045867932e-05,
"loss": 0.3031,
"step": 1850
},
{
"epoch": 1.98,
"learning_rate": 1.8801897983392647e-05,
"loss": 0.3168,
"step": 1860
},
{
"epoch": 2.0,
"learning_rate": 1.860419137999209e-05,
"loss": 0.318,
"step": 1870
},
{
"epoch": 2.0,
"eval_accuracy": 0.9916,
"eval_loss": 0.026178531348705292,
"eval_runtime": 131.1473,
"eval_samples_per_second": 76.25,
"eval_steps_per_second": 4.766,
"step": 1874
},
{
"epoch": 2.01,
"learning_rate": 1.840648477659154e-05,
"loss": 0.3821,
"step": 1880
},
{
"epoch": 2.02,
"learning_rate": 1.8208778173190984e-05,
"loss": 0.3132,
"step": 1890
},
{
"epoch": 2.03,
"learning_rate": 1.8011071569790432e-05,
"loss": 0.3423,
"step": 1900
},
{
"epoch": 2.04,
"learning_rate": 1.7813364966389877e-05,
"loss": 0.3407,
"step": 1910
},
{
"epoch": 2.05,
"learning_rate": 1.7615658362989322e-05,
"loss": 0.2871,
"step": 1920
},
{
"epoch": 2.06,
"learning_rate": 1.741795175958877e-05,
"loss": 0.2987,
"step": 1930
},
{
"epoch": 2.07,
"learning_rate": 1.7220245156188218e-05,
"loss": 0.3316,
"step": 1940
},
{
"epoch": 2.08,
"learning_rate": 1.7022538552787666e-05,
"loss": 0.263,
"step": 1950
},
{
"epoch": 2.09,
"learning_rate": 1.682483194938711e-05,
"loss": 0.2606,
"step": 1960
},
{
"epoch": 2.1,
"learning_rate": 1.662712534598656e-05,
"loss": 0.2799,
"step": 1970
},
{
"epoch": 2.11,
"learning_rate": 1.6429418742586004e-05,
"loss": 0.3024,
"step": 1980
},
{
"epoch": 2.12,
"learning_rate": 1.623171213918545e-05,
"loss": 0.2587,
"step": 1990
},
{
"epoch": 2.13,
"learning_rate": 1.6034005535784897e-05,
"loss": 0.2674,
"step": 2000
},
{
"epoch": 2.15,
"learning_rate": 1.583629893238434e-05,
"loss": 0.292,
"step": 2010
},
{
"epoch": 2.16,
"learning_rate": 1.563859232898379e-05,
"loss": 0.2998,
"step": 2020
},
{
"epoch": 2.17,
"learning_rate": 1.5440885725583234e-05,
"loss": 0.2867,
"step": 2030
},
{
"epoch": 2.18,
"learning_rate": 1.5243179122182682e-05,
"loss": 0.2567,
"step": 2040
},
{
"epoch": 2.19,
"learning_rate": 1.5045472518782127e-05,
"loss": 0.3127,
"step": 2050
},
{
"epoch": 2.2,
"learning_rate": 1.4847765915381575e-05,
"loss": 0.3159,
"step": 2060
},
{
"epoch": 2.21,
"learning_rate": 1.4650059311981022e-05,
"loss": 0.2666,
"step": 2070
},
{
"epoch": 2.22,
"learning_rate": 1.4452352708580466e-05,
"loss": 0.3152,
"step": 2080
},
{
"epoch": 2.23,
"learning_rate": 1.4254646105179915e-05,
"loss": 0.2772,
"step": 2090
},
{
"epoch": 2.24,
"learning_rate": 1.405693950177936e-05,
"loss": 0.3362,
"step": 2100
},
{
"epoch": 2.25,
"learning_rate": 1.3859232898378807e-05,
"loss": 0.2909,
"step": 2110
},
{
"epoch": 2.26,
"learning_rate": 1.3661526294978252e-05,
"loss": 0.3371,
"step": 2120
},
{
"epoch": 2.27,
"learning_rate": 1.34638196915777e-05,
"loss": 0.3314,
"step": 2130
},
{
"epoch": 2.28,
"learning_rate": 1.3266113088177145e-05,
"loss": 0.3195,
"step": 2140
},
{
"epoch": 2.29,
"learning_rate": 1.3068406484776591e-05,
"loss": 0.2603,
"step": 2150
},
{
"epoch": 2.31,
"learning_rate": 1.287069988137604e-05,
"loss": 0.2797,
"step": 2160
},
{
"epoch": 2.32,
"learning_rate": 1.2672993277975484e-05,
"loss": 0.2906,
"step": 2170
},
{
"epoch": 2.33,
"learning_rate": 1.2475286674574932e-05,
"loss": 0.342,
"step": 2180
},
{
"epoch": 2.34,
"learning_rate": 1.2277580071174377e-05,
"loss": 0.3176,
"step": 2190
},
{
"epoch": 2.35,
"learning_rate": 1.2079873467773824e-05,
"loss": 0.271,
"step": 2200
},
{
"epoch": 2.36,
"learning_rate": 1.188216686437327e-05,
"loss": 0.3251,
"step": 2210
},
{
"epoch": 2.37,
"learning_rate": 1.1684460260972716e-05,
"loss": 0.2421,
"step": 2220
},
{
"epoch": 2.38,
"learning_rate": 1.1486753657572163e-05,
"loss": 0.3071,
"step": 2230
},
{
"epoch": 2.39,
"learning_rate": 1.128904705417161e-05,
"loss": 0.3017,
"step": 2240
},
{
"epoch": 2.4,
"learning_rate": 1.1091340450771057e-05,
"loss": 0.3107,
"step": 2250
},
{
"epoch": 2.41,
"learning_rate": 1.0893633847370504e-05,
"loss": 0.2682,
"step": 2260
},
{
"epoch": 2.42,
"learning_rate": 1.0695927243969949e-05,
"loss": 0.3018,
"step": 2270
},
{
"epoch": 2.43,
"learning_rate": 1.0498220640569395e-05,
"loss": 0.3179,
"step": 2280
},
{
"epoch": 2.44,
"learning_rate": 1.0300514037168841e-05,
"loss": 0.3172,
"step": 2290
},
{
"epoch": 2.45,
"learning_rate": 1.0102807433768288e-05,
"loss": 0.3421,
"step": 2300
},
{
"epoch": 2.47,
"learning_rate": 9.905100830367734e-06,
"loss": 0.2955,
"step": 2310
},
{
"epoch": 2.48,
"learning_rate": 9.70739422696718e-06,
"loss": 0.3146,
"step": 2320
},
{
"epoch": 2.49,
"learning_rate": 9.509687623566627e-06,
"loss": 0.2883,
"step": 2330
},
{
"epoch": 2.5,
"learning_rate": 9.311981020166075e-06,
"loss": 0.3252,
"step": 2340
},
{
"epoch": 2.51,
"learning_rate": 9.11427441676552e-06,
"loss": 0.3297,
"step": 2350
},
{
"epoch": 2.52,
"learning_rate": 8.916567813364967e-06,
"loss": 0.3002,
"step": 2360
},
{
"epoch": 2.53,
"learning_rate": 8.718861209964413e-06,
"loss": 0.2354,
"step": 2370
},
{
"epoch": 2.54,
"learning_rate": 8.52115460656386e-06,
"loss": 0.2808,
"step": 2380
},
{
"epoch": 2.55,
"learning_rate": 8.323448003163306e-06,
"loss": 0.3158,
"step": 2390
},
{
"epoch": 2.56,
"learning_rate": 8.125741399762752e-06,
"loss": 0.3408,
"step": 2400
},
{
"epoch": 2.57,
"learning_rate": 7.928034796362199e-06,
"loss": 0.3253,
"step": 2410
},
{
"epoch": 2.58,
"learning_rate": 7.730328192961645e-06,
"loss": 0.2898,
"step": 2420
},
{
"epoch": 2.59,
"learning_rate": 7.532621589561091e-06,
"loss": 0.3144,
"step": 2430
},
{
"epoch": 2.6,
"learning_rate": 7.334914986160538e-06,
"loss": 0.3143,
"step": 2440
},
{
"epoch": 2.61,
"learning_rate": 7.137208382759984e-06,
"loss": 0.2758,
"step": 2450
},
{
"epoch": 2.63,
"learning_rate": 6.939501779359431e-06,
"loss": 0.2996,
"step": 2460
},
{
"epoch": 2.64,
"learning_rate": 6.741795175958877e-06,
"loss": 0.3002,
"step": 2470
},
{
"epoch": 2.65,
"learning_rate": 6.544088572558324e-06,
"loss": 0.3579,
"step": 2480
},
{
"epoch": 2.66,
"learning_rate": 6.346381969157771e-06,
"loss": 0.2395,
"step": 2490
},
{
"epoch": 2.67,
"learning_rate": 6.1486753657572165e-06,
"loss": 0.242,
"step": 2500
},
{
"epoch": 2.68,
"learning_rate": 5.950968762356663e-06,
"loss": 0.2256,
"step": 2510
},
{
"epoch": 2.69,
"learning_rate": 5.753262158956109e-06,
"loss": 0.2807,
"step": 2520
},
{
"epoch": 2.7,
"learning_rate": 5.555555555555556e-06,
"loss": 0.3264,
"step": 2530
},
{
"epoch": 2.71,
"learning_rate": 5.357848952155002e-06,
"loss": 0.323,
"step": 2540
},
{
"epoch": 2.72,
"learning_rate": 5.160142348754449e-06,
"loss": 0.2416,
"step": 2550
},
{
"epoch": 2.73,
"learning_rate": 4.962435745353895e-06,
"loss": 0.2796,
"step": 2560
},
{
"epoch": 2.74,
"learning_rate": 4.7647291419533415e-06,
"loss": 0.2513,
"step": 2570
},
{
"epoch": 2.75,
"learning_rate": 4.567022538552788e-06,
"loss": 0.2681,
"step": 2580
},
{
"epoch": 2.76,
"learning_rate": 4.369315935152234e-06,
"loss": 0.3121,
"step": 2590
},
{
"epoch": 2.77,
"learning_rate": 4.171609331751681e-06,
"loss": 0.2579,
"step": 2600
},
{
"epoch": 2.79,
"learning_rate": 3.973902728351126e-06,
"loss": 0.2899,
"step": 2610
},
{
"epoch": 2.8,
"learning_rate": 3.7761961249505733e-06,
"loss": 0.2676,
"step": 2620
},
{
"epoch": 2.81,
"learning_rate": 3.57848952155002e-06,
"loss": 0.2652,
"step": 2630
},
{
"epoch": 2.82,
"learning_rate": 3.3807829181494666e-06,
"loss": 0.2804,
"step": 2640
},
{
"epoch": 2.83,
"learning_rate": 3.1830763147489126e-06,
"loss": 0.2689,
"step": 2650
},
{
"epoch": 2.84,
"learning_rate": 2.985369711348359e-06,
"loss": 0.2918,
"step": 2660
},
{
"epoch": 2.85,
"learning_rate": 2.787663107947806e-06,
"loss": 0.2602,
"step": 2670
},
{
"epoch": 2.86,
"learning_rate": 2.589956504547252e-06,
"loss": 0.2595,
"step": 2680
},
{
"epoch": 2.87,
"learning_rate": 2.3922499011466983e-06,
"loss": 0.268,
"step": 2690
},
{
"epoch": 2.88,
"learning_rate": 2.1945432977461447e-06,
"loss": 0.238,
"step": 2700
},
{
"epoch": 2.89,
"learning_rate": 1.996836694345591e-06,
"loss": 0.2787,
"step": 2710
},
{
"epoch": 2.9,
"learning_rate": 1.7991300909450376e-06,
"loss": 0.2723,
"step": 2720
},
{
"epoch": 2.91,
"learning_rate": 1.6014234875444842e-06,
"loss": 0.2974,
"step": 2730
},
{
"epoch": 2.92,
"learning_rate": 1.4037168841439304e-06,
"loss": 0.2928,
"step": 2740
},
{
"epoch": 2.93,
"learning_rate": 1.2060102807433769e-06,
"loss": 0.2526,
"step": 2750
},
{
"epoch": 2.95,
"learning_rate": 1.0083036773428233e-06,
"loss": 0.2957,
"step": 2760
},
{
"epoch": 2.96,
"learning_rate": 8.105970739422697e-07,
"loss": 0.2813,
"step": 2770
},
{
"epoch": 2.97,
"learning_rate": 6.12890470541716e-07,
"loss": 0.3048,
"step": 2780
},
{
"epoch": 2.98,
"learning_rate": 4.151838671411625e-07,
"loss": 0.2735,
"step": 2790
},
{
"epoch": 2.99,
"learning_rate": 2.1747726374060896e-07,
"loss": 0.2681,
"step": 2800
},
{
"epoch": 3.0,
"learning_rate": 1.977066034005536e-08,
"loss": 0.2374,
"step": 2810
},
{
"epoch": 3.0,
"eval_accuracy": 0.9935,
"eval_loss": 0.02018115483224392,
"eval_runtime": 131.0943,
"eval_samples_per_second": 76.281,
"eval_steps_per_second": 4.768,
"step": 2811
},
{
"epoch": 3.0,
"step": 2811,
"total_flos": 1.3941117625304089e+19,
"train_loss": 0.3983713053842746,
"train_runtime": 6676.1657,
"train_samples_per_second": 26.962,
"train_steps_per_second": 0.421
}
],
"max_steps": 2811,
"num_train_epochs": 3,
"total_flos": 1.3941117625304089e+19,
"trial_name": null,
"trial_params": null
}