{ "best_metric": 0.9935, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-mnist/checkpoint-2811", "epoch": 2.9994666666666667, "global_step": 2811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.7730496453900712e-06, "loss": 2.3843, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.5460992907801423e-06, "loss": 2.27, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.319148936170213e-06, "loss": 2.1311, "step": 30 }, { "epoch": 0.04, "learning_rate": 7.092198581560285e-06, "loss": 1.9981, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.865248226950355e-06, "loss": 1.761, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0638297872340426e-05, "loss": 1.4946, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.2411347517730498e-05, "loss": 1.2439, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.418439716312057e-05, "loss": 1.0823, "step": 80 }, { "epoch": 0.1, "learning_rate": 1.595744680851064e-05, "loss": 0.9241, "step": 90 }, { "epoch": 0.11, "learning_rate": 1.773049645390071e-05, "loss": 0.8283, "step": 100 }, { "epoch": 0.12, "learning_rate": 1.950354609929078e-05, "loss": 0.8041, "step": 110 }, { "epoch": 0.13, "learning_rate": 2.1276595744680852e-05, "loss": 0.6482, "step": 120 }, { "epoch": 0.14, "learning_rate": 2.3049645390070924e-05, "loss": 0.6448, "step": 130 }, { "epoch": 0.15, "learning_rate": 2.4822695035460995e-05, "loss": 0.5983, "step": 140 }, { "epoch": 0.16, "learning_rate": 2.6595744680851064e-05, "loss": 0.5754, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.836879432624114e-05, "loss": 0.5485, "step": 160 }, { "epoch": 0.18, "learning_rate": 3.0141843971631207e-05, "loss": 0.5681, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.191489361702128e-05, "loss": 0.48, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.3687943262411347e-05, "loss": 0.5461, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.546099290780142e-05, "loss": 0.5761, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.723404255319149e-05, "loss": 0.4957, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.900709219858156e-05, "loss": 0.4965, "step": 220 }, { "epoch": 0.25, "learning_rate": 4.078014184397163e-05, "loss": 0.473, "step": 230 }, { "epoch": 0.26, "learning_rate": 4.2553191489361704e-05, "loss": 0.4948, "step": 240 }, { "epoch": 0.27, "learning_rate": 4.432624113475177e-05, "loss": 0.5289, "step": 250 }, { "epoch": 0.28, "learning_rate": 4.609929078014185e-05, "loss": 0.4895, "step": 260 }, { "epoch": 0.29, "learning_rate": 4.787234042553192e-05, "loss": 0.5204, "step": 270 }, { "epoch": 0.3, "learning_rate": 4.964539007092199e-05, "loss": 0.5589, "step": 280 }, { "epoch": 0.31, "learning_rate": 4.984183471727956e-05, "loss": 0.4661, "step": 290 }, { "epoch": 0.32, "learning_rate": 4.964412811387901e-05, "loss": 0.4783, "step": 300 }, { "epoch": 0.33, "learning_rate": 4.944642151047845e-05, "loss": 0.5219, "step": 310 }, { "epoch": 0.34, "learning_rate": 4.92487149070779e-05, "loss": 0.5128, "step": 320 }, { "epoch": 0.35, "learning_rate": 4.905100830367734e-05, "loss": 0.4896, "step": 330 }, { "epoch": 0.36, "learning_rate": 4.8853301700276796e-05, "loss": 0.4123, "step": 340 }, { "epoch": 0.37, "learning_rate": 4.865559509687624e-05, "loss": 0.4216, "step": 350 }, { "epoch": 0.38, "learning_rate": 4.8457888493475686e-05, "loss": 0.4611, "step": 360 }, { "epoch": 0.39, "learning_rate": 4.826018189007513e-05, "loss": 0.469, "step": 370 }, { "epoch": 0.41, "learning_rate": 4.806247528667458e-05, "loss": 0.4072, "step": 380 }, { "epoch": 0.42, "learning_rate": 4.786476868327402e-05, "loss": 0.4476, "step": 390 }, { "epoch": 0.43, "learning_rate": 4.766706207987347e-05, "loss": 0.4469, "step": 400 }, { "epoch": 0.44, "learning_rate": 4.746935547647291e-05, "loss": 0.4529, "step": 410 }, { "epoch": 0.45, "learning_rate": 4.727164887307236e-05, "loss": 0.4699, "step": 420 }, { "epoch": 0.46, "learning_rate": 4.707394226967181e-05, "loss": 0.4249, "step": 430 }, { "epoch": 0.47, "learning_rate": 4.687623566627126e-05, "loss": 0.439, "step": 440 }, { "epoch": 0.48, "learning_rate": 4.66785290628707e-05, "loss": 0.4137, "step": 450 }, { "epoch": 0.49, "learning_rate": 4.6480822459470146e-05, "loss": 0.5142, "step": 460 }, { "epoch": 0.5, "learning_rate": 4.6283115856069595e-05, "loss": 0.4532, "step": 470 }, { "epoch": 0.51, "learning_rate": 4.608540925266904e-05, "loss": 0.4514, "step": 480 }, { "epoch": 0.52, "learning_rate": 4.588770264926849e-05, "loss": 0.4166, "step": 490 }, { "epoch": 0.53, "learning_rate": 4.568999604586793e-05, "loss": 0.4584, "step": 500 }, { "epoch": 0.54, "learning_rate": 4.549228944246738e-05, "loss": 0.3461, "step": 510 }, { "epoch": 0.55, "learning_rate": 4.529458283906683e-05, "loss": 0.4783, "step": 520 }, { "epoch": 0.57, "learning_rate": 4.5096876235666277e-05, "loss": 0.4474, "step": 530 }, { "epoch": 0.58, "learning_rate": 4.489916963226572e-05, "loss": 0.4403, "step": 540 }, { "epoch": 0.59, "learning_rate": 4.4701463028865166e-05, "loss": 0.3663, "step": 550 }, { "epoch": 0.6, "learning_rate": 4.450375642546461e-05, "loss": 0.3791, "step": 560 }, { "epoch": 0.61, "learning_rate": 4.430604982206406e-05, "loss": 0.4573, "step": 570 }, { "epoch": 0.62, "learning_rate": 4.4108343218663504e-05, "loss": 0.3962, "step": 580 }, { "epoch": 0.63, "learning_rate": 4.391063661526295e-05, "loss": 0.3705, "step": 590 }, { "epoch": 0.64, "learning_rate": 4.371293001186239e-05, "loss": 0.4082, "step": 600 }, { "epoch": 0.65, "learning_rate": 4.351522340846185e-05, "loss": 0.4173, "step": 610 }, { "epoch": 0.66, "learning_rate": 4.331751680506129e-05, "loss": 0.4122, "step": 620 }, { "epoch": 0.67, "learning_rate": 4.311981020166074e-05, "loss": 0.4279, "step": 630 }, { "epoch": 0.68, "learning_rate": 4.2922103598260186e-05, "loss": 0.3369, "step": 640 }, { "epoch": 0.69, "learning_rate": 4.272439699485963e-05, "loss": 0.3995, "step": 650 }, { "epoch": 0.7, "learning_rate": 4.252669039145908e-05, "loss": 0.3735, "step": 660 }, { "epoch": 0.71, "learning_rate": 4.232898378805852e-05, "loss": 0.356, "step": 670 }, { "epoch": 0.73, "learning_rate": 4.213127718465797e-05, "loss": 0.3741, "step": 680 }, { "epoch": 0.74, "learning_rate": 4.193357058125741e-05, "loss": 0.4291, "step": 690 }, { "epoch": 0.75, "learning_rate": 4.173586397785687e-05, "loss": 0.4473, "step": 700 }, { "epoch": 0.76, "learning_rate": 4.153815737445631e-05, "loss": 0.3914, "step": 710 }, { "epoch": 0.77, "learning_rate": 4.134045077105576e-05, "loss": 0.3241, "step": 720 }, { "epoch": 0.78, "learning_rate": 4.11427441676552e-05, "loss": 0.3986, "step": 730 }, { "epoch": 0.79, "learning_rate": 4.0945037564254647e-05, "loss": 0.4287, "step": 740 }, { "epoch": 0.8, "learning_rate": 4.0747330960854095e-05, "loss": 0.4011, "step": 750 }, { "epoch": 0.81, "learning_rate": 4.054962435745354e-05, "loss": 0.388, "step": 760 }, { "epoch": 0.82, "learning_rate": 4.0351917754052984e-05, "loss": 0.3904, "step": 770 }, { "epoch": 0.83, "learning_rate": 4.015421115065243e-05, "loss": 0.3528, "step": 780 }, { "epoch": 0.84, "learning_rate": 3.995650454725188e-05, "loss": 0.3805, "step": 790 }, { "epoch": 0.85, "learning_rate": 3.975879794385133e-05, "loss": 0.3698, "step": 800 }, { "epoch": 0.86, "learning_rate": 3.956109134045078e-05, "loss": 0.3454, "step": 810 }, { "epoch": 0.87, "learning_rate": 3.936338473705022e-05, "loss": 0.4112, "step": 820 }, { "epoch": 0.89, "learning_rate": 3.9165678133649666e-05, "loss": 0.344, "step": 830 }, { "epoch": 0.9, "learning_rate": 3.8967971530249114e-05, "loss": 0.3694, "step": 840 }, { "epoch": 0.91, "learning_rate": 3.877026492684856e-05, "loss": 0.3867, "step": 850 }, { "epoch": 0.92, "learning_rate": 3.8572558323448004e-05, "loss": 0.3531, "step": 860 }, { "epoch": 0.93, "learning_rate": 3.837485172004745e-05, "loss": 0.397, "step": 870 }, { "epoch": 0.94, "learning_rate": 3.817714511664689e-05, "loss": 0.3702, "step": 880 }, { "epoch": 0.95, "learning_rate": 3.797943851324635e-05, "loss": 0.3751, "step": 890 }, { "epoch": 0.96, "learning_rate": 3.778173190984579e-05, "loss": 0.3294, "step": 900 }, { "epoch": 0.97, "learning_rate": 3.758402530644524e-05, "loss": 0.3028, "step": 910 }, { "epoch": 0.98, "learning_rate": 3.738631870304468e-05, "loss": 0.4089, "step": 920 }, { "epoch": 0.99, "learning_rate": 3.7188612099644134e-05, "loss": 0.3376, "step": 930 }, { "epoch": 1.0, "eval_accuracy": 0.9855, "eval_loss": 0.044587597250938416, "eval_runtime": 131.1221, "eval_samples_per_second": 76.265, "eval_steps_per_second": 4.767, "step": 937 }, { "epoch": 1.0, "learning_rate": 3.6990905496243575e-05, "loss": 0.4091, "step": 940 }, { "epoch": 1.01, "learning_rate": 3.679319889284302e-05, "loss": 0.3623, "step": 950 }, { "epoch": 1.02, "learning_rate": 3.6595492289442465e-05, "loss": 0.3532, "step": 960 }, { "epoch": 1.04, "learning_rate": 3.639778568604191e-05, "loss": 0.3457, "step": 970 }, { "epoch": 1.05, "learning_rate": 3.620007908264136e-05, "loss": 0.3359, "step": 980 }, { "epoch": 1.06, "learning_rate": 3.600237247924081e-05, "loss": 0.3162, "step": 990 }, { "epoch": 1.07, "learning_rate": 3.580466587584026e-05, "loss": 0.3081, "step": 1000 }, { "epoch": 1.08, "learning_rate": 3.56069592724397e-05, "loss": 0.4274, "step": 1010 }, { "epoch": 1.09, "learning_rate": 3.540925266903915e-05, "loss": 0.3806, "step": 1020 }, { "epoch": 1.1, "learning_rate": 3.5211546065638595e-05, "loss": 0.4025, "step": 1030 }, { "epoch": 1.11, "learning_rate": 3.501383946223804e-05, "loss": 0.3973, "step": 1040 }, { "epoch": 1.12, "learning_rate": 3.4816132858837484e-05, "loss": 0.3603, "step": 1050 }, { "epoch": 1.13, "learning_rate": 3.461842625543693e-05, "loss": 0.3805, "step": 1060 }, { "epoch": 1.14, "learning_rate": 3.442071965203638e-05, "loss": 0.3542, "step": 1070 }, { "epoch": 1.15, "learning_rate": 3.422301304863583e-05, "loss": 0.3334, "step": 1080 }, { "epoch": 1.16, "learning_rate": 3.402530644523527e-05, "loss": 0.3736, "step": 1090 }, { "epoch": 1.17, "learning_rate": 3.382759984183472e-05, "loss": 0.3524, "step": 1100 }, { "epoch": 1.18, "learning_rate": 3.3629893238434166e-05, "loss": 0.4211, "step": 1110 }, { "epoch": 1.2, "learning_rate": 3.3432186635033614e-05, "loss": 0.3224, "step": 1120 }, { "epoch": 1.21, "learning_rate": 3.3234480031633056e-05, "loss": 0.3446, "step": 1130 }, { "epoch": 1.22, "learning_rate": 3.3036773428232504e-05, "loss": 0.3062, "step": 1140 }, { "epoch": 1.23, "learning_rate": 3.283906682483195e-05, "loss": 0.3114, "step": 1150 }, { "epoch": 1.24, "learning_rate": 3.26413602214314e-05, "loss": 0.3816, "step": 1160 }, { "epoch": 1.25, "learning_rate": 3.244365361803085e-05, "loss": 0.3216, "step": 1170 }, { "epoch": 1.26, "learning_rate": 3.224594701463029e-05, "loss": 0.3618, "step": 1180 }, { "epoch": 1.27, "learning_rate": 3.204824041122974e-05, "loss": 0.3515, "step": 1190 }, { "epoch": 1.28, "learning_rate": 3.185053380782918e-05, "loss": 0.3023, "step": 1200 }, { "epoch": 1.29, "learning_rate": 3.1652827204428634e-05, "loss": 0.3348, "step": 1210 }, { "epoch": 1.3, "learning_rate": 3.1455120601028075e-05, "loss": 0.3047, "step": 1220 }, { "epoch": 1.31, "learning_rate": 3.125741399762752e-05, "loss": 0.4023, "step": 1230 }, { "epoch": 1.32, "learning_rate": 3.1059707394226965e-05, "loss": 0.3379, "step": 1240 }, { "epoch": 1.33, "learning_rate": 3.086200079082642e-05, "loss": 0.2975, "step": 1250 }, { "epoch": 1.34, "learning_rate": 3.066429418742586e-05, "loss": 0.3445, "step": 1260 }, { "epoch": 1.36, "learning_rate": 3.046658758402531e-05, "loss": 0.3401, "step": 1270 }, { "epoch": 1.37, "learning_rate": 3.0268880980624754e-05, "loss": 0.2998, "step": 1280 }, { "epoch": 1.38, "learning_rate": 3.00711743772242e-05, "loss": 0.289, "step": 1290 }, { "epoch": 1.39, "learning_rate": 2.987346777382365e-05, "loss": 0.3816, "step": 1300 }, { "epoch": 1.4, "learning_rate": 2.9675761170423095e-05, "loss": 0.3213, "step": 1310 }, { "epoch": 1.41, "learning_rate": 2.947805456702254e-05, "loss": 0.3604, "step": 1320 }, { "epoch": 1.42, "learning_rate": 2.9280347963621984e-05, "loss": 0.2953, "step": 1330 }, { "epoch": 1.43, "learning_rate": 2.9082641360221436e-05, "loss": 0.3474, "step": 1340 }, { "epoch": 1.44, "learning_rate": 2.888493475682088e-05, "loss": 0.3291, "step": 1350 }, { "epoch": 1.45, "learning_rate": 2.8687228153420325e-05, "loss": 0.3229, "step": 1360 }, { "epoch": 1.46, "learning_rate": 2.848952155001977e-05, "loss": 0.3118, "step": 1370 }, { "epoch": 1.47, "learning_rate": 2.8291814946619215e-05, "loss": 0.2957, "step": 1380 }, { "epoch": 1.48, "learning_rate": 2.8094108343218666e-05, "loss": 0.3061, "step": 1390 }, { "epoch": 1.49, "learning_rate": 2.789640173981811e-05, "loss": 0.3489, "step": 1400 }, { "epoch": 1.5, "learning_rate": 2.7698695136417556e-05, "loss": 0.3754, "step": 1410 }, { "epoch": 1.52, "learning_rate": 2.7500988533017004e-05, "loss": 0.3633, "step": 1420 }, { "epoch": 1.53, "learning_rate": 2.7303281929616452e-05, "loss": 0.3709, "step": 1430 }, { "epoch": 1.54, "learning_rate": 2.71055753262159e-05, "loss": 0.3616, "step": 1440 }, { "epoch": 1.55, "learning_rate": 2.6907868722815345e-05, "loss": 0.3712, "step": 1450 }, { "epoch": 1.56, "learning_rate": 2.671016211941479e-05, "loss": 0.2578, "step": 1460 }, { "epoch": 1.57, "learning_rate": 2.6512455516014234e-05, "loss": 0.3222, "step": 1470 }, { "epoch": 1.58, "learning_rate": 2.6314748912613686e-05, "loss": 0.323, "step": 1480 }, { "epoch": 1.59, "learning_rate": 2.611704230921313e-05, "loss": 0.3349, "step": 1490 }, { "epoch": 1.6, "learning_rate": 2.5919335705812575e-05, "loss": 0.3152, "step": 1500 }, { "epoch": 1.61, "learning_rate": 2.572162910241202e-05, "loss": 0.3529, "step": 1510 }, { "epoch": 1.62, "learning_rate": 2.5523922499011465e-05, "loss": 0.2843, "step": 1520 }, { "epoch": 1.63, "learning_rate": 2.5326215895610916e-05, "loss": 0.3035, "step": 1530 }, { "epoch": 1.64, "learning_rate": 2.512850929221036e-05, "loss": 0.3331, "step": 1540 }, { "epoch": 1.65, "learning_rate": 2.4930802688809806e-05, "loss": 0.4147, "step": 1550 }, { "epoch": 1.66, "learning_rate": 2.4733096085409254e-05, "loss": 0.2968, "step": 1560 }, { "epoch": 1.68, "learning_rate": 2.45353894820087e-05, "loss": 0.3199, "step": 1570 }, { "epoch": 1.69, "learning_rate": 2.4337682878608147e-05, "loss": 0.3143, "step": 1580 }, { "epoch": 1.7, "learning_rate": 2.413997627520759e-05, "loss": 0.3115, "step": 1590 }, { "epoch": 1.71, "learning_rate": 2.394226967180704e-05, "loss": 0.3396, "step": 1600 }, { "epoch": 1.72, "learning_rate": 2.3744563068406488e-05, "loss": 0.375, "step": 1610 }, { "epoch": 1.73, "learning_rate": 2.3546856465005932e-05, "loss": 0.3709, "step": 1620 }, { "epoch": 1.74, "learning_rate": 2.334914986160538e-05, "loss": 0.2714, "step": 1630 }, { "epoch": 1.75, "learning_rate": 2.3151443258204825e-05, "loss": 0.3532, "step": 1640 }, { "epoch": 1.76, "learning_rate": 2.2953736654804273e-05, "loss": 0.3799, "step": 1650 }, { "epoch": 1.77, "learning_rate": 2.2756030051403718e-05, "loss": 0.3241, "step": 1660 }, { "epoch": 1.78, "learning_rate": 2.2558323448003166e-05, "loss": 0.368, "step": 1670 }, { "epoch": 1.79, "learning_rate": 2.236061684460261e-05, "loss": 0.3099, "step": 1680 }, { "epoch": 1.8, "learning_rate": 2.2162910241202056e-05, "loss": 0.3517, "step": 1690 }, { "epoch": 1.81, "learning_rate": 2.1965203637801504e-05, "loss": 0.2803, "step": 1700 }, { "epoch": 1.82, "learning_rate": 2.176749703440095e-05, "loss": 0.3505, "step": 1710 }, { "epoch": 1.84, "learning_rate": 2.1569790431000397e-05, "loss": 0.3268, "step": 1720 }, { "epoch": 1.85, "learning_rate": 2.137208382759984e-05, "loss": 0.2774, "step": 1730 }, { "epoch": 1.86, "learning_rate": 2.117437722419929e-05, "loss": 0.3158, "step": 1740 }, { "epoch": 1.87, "learning_rate": 2.0976670620798734e-05, "loss": 0.3687, "step": 1750 }, { "epoch": 1.88, "learning_rate": 2.0778964017398182e-05, "loss": 0.3239, "step": 1760 }, { "epoch": 1.89, "learning_rate": 2.058125741399763e-05, "loss": 0.3351, "step": 1770 }, { "epoch": 1.9, "learning_rate": 2.0383550810597075e-05, "loss": 0.341, "step": 1780 }, { "epoch": 1.91, "learning_rate": 2.0185844207196523e-05, "loss": 0.2696, "step": 1790 }, { "epoch": 1.92, "learning_rate": 1.9988137603795968e-05, "loss": 0.3837, "step": 1800 }, { "epoch": 1.93, "learning_rate": 1.9790431000395416e-05, "loss": 0.3441, "step": 1810 }, { "epoch": 1.94, "learning_rate": 1.959272439699486e-05, "loss": 0.258, "step": 1820 }, { "epoch": 1.95, "learning_rate": 1.9395017793594306e-05, "loss": 0.3381, "step": 1830 }, { "epoch": 1.96, "learning_rate": 1.9197311190193754e-05, "loss": 0.2887, "step": 1840 }, { "epoch": 1.97, "learning_rate": 1.89996045867932e-05, "loss": 0.3031, "step": 1850 }, { "epoch": 1.98, "learning_rate": 1.8801897983392647e-05, "loss": 0.3168, "step": 1860 }, { "epoch": 2.0, "learning_rate": 1.860419137999209e-05, "loss": 0.318, "step": 1870 }, { "epoch": 2.0, "eval_accuracy": 0.9916, "eval_loss": 0.026178531348705292, "eval_runtime": 131.1473, "eval_samples_per_second": 76.25, "eval_steps_per_second": 4.766, "step": 1874 }, { "epoch": 2.01, "learning_rate": 1.840648477659154e-05, "loss": 0.3821, "step": 1880 }, { "epoch": 2.02, "learning_rate": 1.8208778173190984e-05, "loss": 0.3132, "step": 1890 }, { "epoch": 2.03, "learning_rate": 1.8011071569790432e-05, "loss": 0.3423, "step": 1900 }, { "epoch": 2.04, "learning_rate": 1.7813364966389877e-05, "loss": 0.3407, "step": 1910 }, { "epoch": 2.05, "learning_rate": 1.7615658362989322e-05, "loss": 0.2871, "step": 1920 }, { "epoch": 2.06, "learning_rate": 1.741795175958877e-05, "loss": 0.2987, "step": 1930 }, { "epoch": 2.07, "learning_rate": 1.7220245156188218e-05, "loss": 0.3316, "step": 1940 }, { "epoch": 2.08, "learning_rate": 1.7022538552787666e-05, "loss": 0.263, "step": 1950 }, { "epoch": 2.09, "learning_rate": 1.682483194938711e-05, "loss": 0.2606, "step": 1960 }, { "epoch": 2.1, "learning_rate": 1.662712534598656e-05, "loss": 0.2799, "step": 1970 }, { "epoch": 2.11, "learning_rate": 1.6429418742586004e-05, "loss": 0.3024, "step": 1980 }, { "epoch": 2.12, "learning_rate": 1.623171213918545e-05, "loss": 0.2587, "step": 1990 }, { "epoch": 2.13, "learning_rate": 1.6034005535784897e-05, "loss": 0.2674, "step": 2000 }, { "epoch": 2.15, "learning_rate": 1.583629893238434e-05, "loss": 0.292, "step": 2010 }, { "epoch": 2.16, "learning_rate": 1.563859232898379e-05, "loss": 0.2998, "step": 2020 }, { "epoch": 2.17, "learning_rate": 1.5440885725583234e-05, "loss": 0.2867, "step": 2030 }, { "epoch": 2.18, "learning_rate": 1.5243179122182682e-05, "loss": 0.2567, "step": 2040 }, { "epoch": 2.19, "learning_rate": 1.5045472518782127e-05, "loss": 0.3127, "step": 2050 }, { "epoch": 2.2, "learning_rate": 1.4847765915381575e-05, "loss": 0.3159, "step": 2060 }, { "epoch": 2.21, "learning_rate": 1.4650059311981022e-05, "loss": 0.2666, "step": 2070 }, { "epoch": 2.22, "learning_rate": 1.4452352708580466e-05, "loss": 0.3152, "step": 2080 }, { "epoch": 2.23, "learning_rate": 1.4254646105179915e-05, "loss": 0.2772, "step": 2090 }, { "epoch": 2.24, "learning_rate": 1.405693950177936e-05, "loss": 0.3362, "step": 2100 }, { "epoch": 2.25, "learning_rate": 1.3859232898378807e-05, "loss": 0.2909, "step": 2110 }, { "epoch": 2.26, "learning_rate": 1.3661526294978252e-05, "loss": 0.3371, "step": 2120 }, { "epoch": 2.27, "learning_rate": 1.34638196915777e-05, "loss": 0.3314, "step": 2130 }, { "epoch": 2.28, "learning_rate": 1.3266113088177145e-05, "loss": 0.3195, "step": 2140 }, { "epoch": 2.29, "learning_rate": 1.3068406484776591e-05, "loss": 0.2603, "step": 2150 }, { "epoch": 2.31, "learning_rate": 1.287069988137604e-05, "loss": 0.2797, "step": 2160 }, { "epoch": 2.32, "learning_rate": 1.2672993277975484e-05, "loss": 0.2906, "step": 2170 }, { "epoch": 2.33, "learning_rate": 1.2475286674574932e-05, "loss": 0.342, "step": 2180 }, { "epoch": 2.34, "learning_rate": 1.2277580071174377e-05, "loss": 0.3176, "step": 2190 }, { "epoch": 2.35, "learning_rate": 1.2079873467773824e-05, "loss": 0.271, "step": 2200 }, { "epoch": 2.36, "learning_rate": 1.188216686437327e-05, "loss": 0.3251, "step": 2210 }, { "epoch": 2.37, "learning_rate": 1.1684460260972716e-05, "loss": 0.2421, "step": 2220 }, { "epoch": 2.38, "learning_rate": 1.1486753657572163e-05, "loss": 0.3071, "step": 2230 }, { "epoch": 2.39, "learning_rate": 1.128904705417161e-05, "loss": 0.3017, "step": 2240 }, { "epoch": 2.4, "learning_rate": 1.1091340450771057e-05, "loss": 0.3107, "step": 2250 }, { "epoch": 2.41, "learning_rate": 1.0893633847370504e-05, "loss": 0.2682, "step": 2260 }, { "epoch": 2.42, "learning_rate": 1.0695927243969949e-05, "loss": 0.3018, "step": 2270 }, { "epoch": 2.43, "learning_rate": 1.0498220640569395e-05, "loss": 0.3179, "step": 2280 }, { "epoch": 2.44, "learning_rate": 1.0300514037168841e-05, "loss": 0.3172, "step": 2290 }, { "epoch": 2.45, "learning_rate": 1.0102807433768288e-05, "loss": 0.3421, "step": 2300 }, { "epoch": 2.47, "learning_rate": 9.905100830367734e-06, "loss": 0.2955, "step": 2310 }, { "epoch": 2.48, "learning_rate": 9.70739422696718e-06, "loss": 0.3146, "step": 2320 }, { "epoch": 2.49, "learning_rate": 9.509687623566627e-06, "loss": 0.2883, "step": 2330 }, { "epoch": 2.5, "learning_rate": 9.311981020166075e-06, "loss": 0.3252, "step": 2340 }, { "epoch": 2.51, "learning_rate": 9.11427441676552e-06, "loss": 0.3297, "step": 2350 }, { "epoch": 2.52, "learning_rate": 8.916567813364967e-06, "loss": 0.3002, "step": 2360 }, { "epoch": 2.53, "learning_rate": 8.718861209964413e-06, "loss": 0.2354, "step": 2370 }, { "epoch": 2.54, "learning_rate": 8.52115460656386e-06, "loss": 0.2808, "step": 2380 }, { "epoch": 2.55, "learning_rate": 8.323448003163306e-06, "loss": 0.3158, "step": 2390 }, { "epoch": 2.56, "learning_rate": 8.125741399762752e-06, "loss": 0.3408, "step": 2400 }, { "epoch": 2.57, "learning_rate": 7.928034796362199e-06, "loss": 0.3253, "step": 2410 }, { "epoch": 2.58, "learning_rate": 7.730328192961645e-06, "loss": 0.2898, "step": 2420 }, { "epoch": 2.59, "learning_rate": 7.532621589561091e-06, "loss": 0.3144, "step": 2430 }, { "epoch": 2.6, "learning_rate": 7.334914986160538e-06, "loss": 0.3143, "step": 2440 }, { "epoch": 2.61, "learning_rate": 7.137208382759984e-06, "loss": 0.2758, "step": 2450 }, { "epoch": 2.63, "learning_rate": 6.939501779359431e-06, "loss": 0.2996, "step": 2460 }, { "epoch": 2.64, "learning_rate": 6.741795175958877e-06, "loss": 0.3002, "step": 2470 }, { "epoch": 2.65, "learning_rate": 6.544088572558324e-06, "loss": 0.3579, "step": 2480 }, { "epoch": 2.66, "learning_rate": 6.346381969157771e-06, "loss": 0.2395, "step": 2490 }, { "epoch": 2.67, "learning_rate": 6.1486753657572165e-06, "loss": 0.242, "step": 2500 }, { "epoch": 2.68, "learning_rate": 5.950968762356663e-06, "loss": 0.2256, "step": 2510 }, { "epoch": 2.69, "learning_rate": 5.753262158956109e-06, "loss": 0.2807, "step": 2520 }, { "epoch": 2.7, "learning_rate": 5.555555555555556e-06, "loss": 0.3264, "step": 2530 }, { "epoch": 2.71, "learning_rate": 5.357848952155002e-06, "loss": 0.323, "step": 2540 }, { "epoch": 2.72, "learning_rate": 5.160142348754449e-06, "loss": 0.2416, "step": 2550 }, { "epoch": 2.73, "learning_rate": 4.962435745353895e-06, "loss": 0.2796, "step": 2560 }, { "epoch": 2.74, "learning_rate": 4.7647291419533415e-06, "loss": 0.2513, "step": 2570 }, { "epoch": 2.75, "learning_rate": 4.567022538552788e-06, "loss": 0.2681, "step": 2580 }, { "epoch": 2.76, "learning_rate": 4.369315935152234e-06, "loss": 0.3121, "step": 2590 }, { "epoch": 2.77, "learning_rate": 4.171609331751681e-06, "loss": 0.2579, "step": 2600 }, { "epoch": 2.79, "learning_rate": 3.973902728351126e-06, "loss": 0.2899, "step": 2610 }, { "epoch": 2.8, "learning_rate": 3.7761961249505733e-06, "loss": 0.2676, "step": 2620 }, { "epoch": 2.81, "learning_rate": 3.57848952155002e-06, "loss": 0.2652, "step": 2630 }, { "epoch": 2.82, "learning_rate": 3.3807829181494666e-06, "loss": 0.2804, "step": 2640 }, { "epoch": 2.83, "learning_rate": 3.1830763147489126e-06, "loss": 0.2689, "step": 2650 }, { "epoch": 2.84, "learning_rate": 2.985369711348359e-06, "loss": 0.2918, "step": 2660 }, { "epoch": 2.85, "learning_rate": 2.787663107947806e-06, "loss": 0.2602, "step": 2670 }, { "epoch": 2.86, "learning_rate": 2.589956504547252e-06, "loss": 0.2595, "step": 2680 }, { "epoch": 2.87, "learning_rate": 2.3922499011466983e-06, "loss": 0.268, "step": 2690 }, { "epoch": 2.88, "learning_rate": 2.1945432977461447e-06, "loss": 0.238, "step": 2700 }, { "epoch": 2.89, "learning_rate": 1.996836694345591e-06, "loss": 0.2787, "step": 2710 }, { "epoch": 2.9, "learning_rate": 1.7991300909450376e-06, "loss": 0.2723, "step": 2720 }, { "epoch": 2.91, "learning_rate": 1.6014234875444842e-06, "loss": 0.2974, "step": 2730 }, { "epoch": 2.92, "learning_rate": 1.4037168841439304e-06, "loss": 0.2928, "step": 2740 }, { "epoch": 2.93, "learning_rate": 1.2060102807433769e-06, "loss": 0.2526, "step": 2750 }, { "epoch": 2.95, "learning_rate": 1.0083036773428233e-06, "loss": 0.2957, "step": 2760 }, { "epoch": 2.96, "learning_rate": 8.105970739422697e-07, "loss": 0.2813, "step": 2770 }, { "epoch": 2.97, "learning_rate": 6.12890470541716e-07, "loss": 0.3048, "step": 2780 }, { "epoch": 2.98, "learning_rate": 4.151838671411625e-07, "loss": 0.2735, "step": 2790 }, { "epoch": 2.99, "learning_rate": 2.1747726374060896e-07, "loss": 0.2681, "step": 2800 }, { "epoch": 3.0, "learning_rate": 1.977066034005536e-08, "loss": 0.2374, "step": 2810 }, { "epoch": 3.0, "eval_accuracy": 0.9935, "eval_loss": 0.02018115483224392, "eval_runtime": 131.0943, "eval_samples_per_second": 76.281, "eval_steps_per_second": 4.768, "step": 2811 }, { "epoch": 3.0, "step": 2811, "total_flos": 1.3941117625304089e+19, "train_loss": 0.3983713053842746, "train_runtime": 6676.1657, "train_samples_per_second": 26.962, "train_steps_per_second": 0.421 } ], "max_steps": 2811, "num_train_epochs": 3, "total_flos": 1.3941117625304089e+19, "trial_name": null, "trial_params": null }