{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.6260504201680672, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 4.6151, "step": 10 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 4.6009, "step": 20 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 4.5892, "step": 30 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 4.4933, "step": 40 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 4.4838, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.2e-05, "loss": 4.0367, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.4e-05, "loss": 3.6247, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.6000000000000003e-05, "loss": 3.329, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.8e-05, "loss": 3.1007, "step": 90 }, { "epoch": 0.11, "learning_rate": 2e-05, "loss": 2.8089, "step": 100 }, { "epoch": 0.12, "learning_rate": 1.992743105950653e-05, "loss": 2.5354, "step": 110 }, { "epoch": 0.13, "learning_rate": 1.9854862119013064e-05, "loss": 2.2403, "step": 120 }, { "epoch": 0.14, "learning_rate": 1.9782293178519596e-05, "loss": 2.7885, "step": 130 }, { "epoch": 0.15, "learning_rate": 1.9709724238026125e-05, "loss": 2.4836, "step": 140 }, { "epoch": 0.16, "learning_rate": 1.9637155297532658e-05, "loss": 2.3569, "step": 150 }, { "epoch": 0.17, "learning_rate": 1.9564586357039187e-05, "loss": 2.4299, "step": 160 }, { "epoch": 0.18, "learning_rate": 1.949201741654572e-05, "loss": 2.0131, "step": 170 }, { "epoch": 0.19, "learning_rate": 1.9419448476052253e-05, "loss": 1.8533, "step": 180 }, { "epoch": 0.2, "learning_rate": 1.9346879535558782e-05, "loss": 2.0193, "step": 190 }, { "epoch": 0.21, "learning_rate": 1.9274310595065315e-05, "loss": 2.0059, "step": 200 }, { "epoch": 0.22, "learning_rate": 1.9201741654571844e-05, "loss": 1.6821, "step": 210 }, { "epoch": 0.23, "learning_rate": 1.9129172714078376e-05, "loss": 1.8958, "step": 220 }, { "epoch": 0.24, "learning_rate": 1.905660377358491e-05, "loss": 1.9811, "step": 230 }, { "epoch": 0.25, "learning_rate": 1.898403483309144e-05, "loss": 2.0177, "step": 240 }, { "epoch": 0.26, "learning_rate": 1.8911465892597968e-05, "loss": 1.9885, "step": 250 }, { "epoch": 0.27, "learning_rate": 1.88388969521045e-05, "loss": 1.8754, "step": 260 }, { "epoch": 0.28, "learning_rate": 1.8766328011611033e-05, "loss": 1.5953, "step": 270 }, { "epoch": 0.29, "learning_rate": 1.8693759071117562e-05, "loss": 1.9287, "step": 280 }, { "epoch": 0.3, "learning_rate": 1.8621190130624095e-05, "loss": 1.7173, "step": 290 }, { "epoch": 0.32, "learning_rate": 1.8548621190130624e-05, "loss": 1.8167, "step": 300 }, { "epoch": 0.33, "learning_rate": 1.8476052249637157e-05, "loss": 1.3269, "step": 310 }, { "epoch": 0.34, "learning_rate": 1.840348330914369e-05, "loss": 1.5738, "step": 320 }, { "epoch": 0.35, "learning_rate": 1.833091436865022e-05, "loss": 1.5026, "step": 330 }, { "epoch": 0.36, "learning_rate": 1.825834542815675e-05, "loss": 1.6312, "step": 340 }, { "epoch": 0.37, "learning_rate": 1.8185776487663284e-05, "loss": 1.8379, "step": 350 }, { "epoch": 0.38, "learning_rate": 1.8113207547169813e-05, "loss": 1.242, "step": 360 }, { "epoch": 0.39, "learning_rate": 1.8040638606676342e-05, "loss": 1.5145, "step": 370 }, { "epoch": 0.4, "learning_rate": 1.7968069666182875e-05, "loss": 1.326, "step": 380 }, { "epoch": 0.41, "learning_rate": 1.7895500725689404e-05, "loss": 1.2579, "step": 390 }, { "epoch": 0.42, "learning_rate": 1.7822931785195937e-05, "loss": 1.4427, "step": 400 }, { "epoch": 0.43, "learning_rate": 1.775036284470247e-05, "loss": 1.2246, "step": 410 }, { "epoch": 0.44, "learning_rate": 1.7677793904209e-05, "loss": 1.3649, "step": 420 }, { "epoch": 0.45, "learning_rate": 1.760522496371553e-05, "loss": 1.5865, "step": 430 }, { "epoch": 0.46, "learning_rate": 1.7532656023222064e-05, "loss": 1.0269, "step": 440 }, { "epoch": 0.47, "learning_rate": 1.7460087082728593e-05, "loss": 1.2287, "step": 450 }, { "epoch": 0.48, "learning_rate": 1.7387518142235126e-05, "loss": 1.231, "step": 460 }, { "epoch": 0.49, "learning_rate": 1.7314949201741655e-05, "loss": 1.4882, "step": 470 }, { "epoch": 0.5, "learning_rate": 1.7242380261248185e-05, "loss": 1.3311, "step": 480 }, { "epoch": 0.51, "learning_rate": 1.716981132075472e-05, "loss": 0.8074, "step": 490 }, { "epoch": 0.53, "learning_rate": 1.709724238026125e-05, "loss": 1.3885, "step": 500 }, { "epoch": 0.54, "learning_rate": 1.702467343976778e-05, "loss": 0.7269, "step": 510 }, { "epoch": 0.55, "learning_rate": 1.6952104499274312e-05, "loss": 1.0517, "step": 520 }, { "epoch": 0.56, "learning_rate": 1.6879535558780844e-05, "loss": 1.0979, "step": 530 }, { "epoch": 0.57, "learning_rate": 1.6806966618287374e-05, "loss": 1.2165, "step": 540 }, { "epoch": 0.58, "learning_rate": 1.6734397677793906e-05, "loss": 0.9271, "step": 550 }, { "epoch": 0.59, "learning_rate": 1.6661828737300436e-05, "loss": 1.1317, "step": 560 }, { "epoch": 0.6, "learning_rate": 1.6589259796806968e-05, "loss": 1.1018, "step": 570 }, { "epoch": 0.61, "learning_rate": 1.65166908563135e-05, "loss": 1.054, "step": 580 }, { "epoch": 0.62, "learning_rate": 1.644412191582003e-05, "loss": 1.0754, "step": 590 }, { "epoch": 0.63, "learning_rate": 1.6371552975326563e-05, "loss": 0.8159, "step": 600 }, { "epoch": 0.64, "learning_rate": 1.6298984034833092e-05, "loss": 0.929, "step": 610 }, { "epoch": 0.65, "learning_rate": 1.6226415094339625e-05, "loss": 1.1673, "step": 620 }, { "epoch": 0.66, "learning_rate": 1.6153846153846154e-05, "loss": 0.8993, "step": 630 }, { "epoch": 0.67, "learning_rate": 1.6081277213352687e-05, "loss": 0.8426, "step": 640 }, { "epoch": 0.68, "learning_rate": 1.6008708272859216e-05, "loss": 0.9685, "step": 650 }, { "epoch": 0.69, "learning_rate": 1.593613933236575e-05, "loss": 0.7475, "step": 660 }, { "epoch": 0.7, "learning_rate": 1.586357039187228e-05, "loss": 0.4813, "step": 670 }, { "epoch": 0.71, "learning_rate": 1.579100145137881e-05, "loss": 0.8771, "step": 680 }, { "epoch": 0.72, "learning_rate": 1.5718432510885343e-05, "loss": 0.8842, "step": 690 }, { "epoch": 0.74, "learning_rate": 1.5645863570391872e-05, "loss": 1.0122, "step": 700 }, { "epoch": 0.75, "learning_rate": 1.5573294629898405e-05, "loss": 0.6247, "step": 710 }, { "epoch": 0.76, "learning_rate": 1.5500725689404938e-05, "loss": 0.9389, "step": 720 }, { "epoch": 0.77, "learning_rate": 1.5428156748911467e-05, "loss": 0.3921, "step": 730 }, { "epoch": 0.78, "learning_rate": 1.5355587808417996e-05, "loss": 0.695, "step": 740 }, { "epoch": 0.79, "learning_rate": 1.5283018867924532e-05, "loss": 0.4541, "step": 750 }, { "epoch": 0.8, "learning_rate": 1.5210449927431061e-05, "loss": 0.6469, "step": 760 }, { "epoch": 0.81, "learning_rate": 1.5137880986937592e-05, "loss": 0.6391, "step": 770 }, { "epoch": 0.82, "learning_rate": 1.5065312046444123e-05, "loss": 0.6501, "step": 780 }, { "epoch": 0.83, "learning_rate": 1.4992743105950653e-05, "loss": 0.9943, "step": 790 }, { "epoch": 0.84, "learning_rate": 1.4920174165457187e-05, "loss": 0.5541, "step": 800 }, { "epoch": 0.85, "learning_rate": 1.4847605224963716e-05, "loss": 0.7708, "step": 810 }, { "epoch": 0.86, "learning_rate": 1.4775036284470247e-05, "loss": 0.9068, "step": 820 }, { "epoch": 0.87, "learning_rate": 1.4702467343976778e-05, "loss": 0.8202, "step": 830 }, { "epoch": 0.88, "learning_rate": 1.462989840348331e-05, "loss": 0.6628, "step": 840 }, { "epoch": 0.89, "learning_rate": 1.4557329462989842e-05, "loss": 0.768, "step": 850 }, { "epoch": 0.9, "learning_rate": 1.4484760522496373e-05, "loss": 0.829, "step": 860 }, { "epoch": 0.91, "learning_rate": 1.4412191582002904e-05, "loss": 0.9561, "step": 870 }, { "epoch": 0.92, "learning_rate": 1.4339622641509435e-05, "loss": 0.6783, "step": 880 }, { "epoch": 0.93, "learning_rate": 1.4267053701015967e-05, "loss": 0.7017, "step": 890 }, { "epoch": 0.95, "learning_rate": 1.4194484760522498e-05, "loss": 0.6462, "step": 900 }, { "epoch": 0.96, "learning_rate": 1.4121915820029029e-05, "loss": 0.648, "step": 910 }, { "epoch": 0.97, "learning_rate": 1.4049346879535558e-05, "loss": 0.4731, "step": 920 }, { "epoch": 0.98, "learning_rate": 1.3976777939042093e-05, "loss": 0.6257, "step": 930 }, { "epoch": 0.99, "learning_rate": 1.3904208998548622e-05, "loss": 0.4245, "step": 940 }, { "epoch": 1.0, "learning_rate": 1.3831640058055153e-05, "loss": 0.687, "step": 950 }, { "epoch": 1.0, "eval_Accuracy": 0.9114854517611026, "eval_F1": 0.8950475194731157, "eval_loss": 0.5604398846626282, "eval_runtime": 98.5377, "eval_samples_per_second": 33.135, "eval_steps_per_second": 2.08, "step": 952 }, { "epoch": 1.01, "learning_rate": 1.3759071117561684e-05, "loss": 0.3493, "step": 960 }, { "epoch": 1.02, "learning_rate": 1.3686502177068215e-05, "loss": 0.9263, "step": 970 }, { "epoch": 1.03, "learning_rate": 1.3613933236574748e-05, "loss": 0.4981, "step": 980 }, { "epoch": 1.04, "learning_rate": 1.3541364296081278e-05, "loss": 0.4574, "step": 990 }, { "epoch": 1.05, "learning_rate": 1.346879535558781e-05, "loss": 0.6115, "step": 1000 }, { "epoch": 1.06, "learning_rate": 1.339622641509434e-05, "loss": 0.5973, "step": 1010 }, { "epoch": 1.07, "learning_rate": 1.3323657474600873e-05, "loss": 0.4881, "step": 1020 }, { "epoch": 1.08, "learning_rate": 1.3251088534107404e-05, "loss": 0.4824, "step": 1030 }, { "epoch": 1.09, "learning_rate": 1.3178519593613935e-05, "loss": 0.7372, "step": 1040 }, { "epoch": 1.1, "learning_rate": 1.3105950653120464e-05, "loss": 0.7515, "step": 1050 }, { "epoch": 1.11, "learning_rate": 1.3033381712626995e-05, "loss": 0.446, "step": 1060 }, { "epoch": 1.12, "learning_rate": 1.2960812772133528e-05, "loss": 0.6326, "step": 1070 }, { "epoch": 1.13, "learning_rate": 1.2888243831640059e-05, "loss": 0.7661, "step": 1080 }, { "epoch": 1.14, "learning_rate": 1.281567489114659e-05, "loss": 0.4974, "step": 1090 }, { "epoch": 1.16, "learning_rate": 1.274310595065312e-05, "loss": 0.7276, "step": 1100 }, { "epoch": 1.17, "learning_rate": 1.2670537010159653e-05, "loss": 0.3897, "step": 1110 }, { "epoch": 1.18, "learning_rate": 1.2597968069666184e-05, "loss": 0.7384, "step": 1120 }, { "epoch": 1.19, "learning_rate": 1.2525399129172715e-05, "loss": 0.7007, "step": 1130 }, { "epoch": 1.2, "learning_rate": 1.2452830188679246e-05, "loss": 0.6739, "step": 1140 }, { "epoch": 1.21, "learning_rate": 1.2380261248185777e-05, "loss": 0.5027, "step": 1150 }, { "epoch": 1.22, "learning_rate": 1.230769230769231e-05, "loss": 0.5931, "step": 1160 }, { "epoch": 1.23, "learning_rate": 1.223512336719884e-05, "loss": 0.3967, "step": 1170 }, { "epoch": 1.24, "learning_rate": 1.216255442670537e-05, "loss": 0.5215, "step": 1180 }, { "epoch": 1.25, "learning_rate": 1.2089985486211901e-05, "loss": 0.5034, "step": 1190 }, { "epoch": 1.26, "learning_rate": 1.2017416545718435e-05, "loss": 0.4521, "step": 1200 }, { "epoch": 1.27, "learning_rate": 1.1944847605224965e-05, "loss": 0.255, "step": 1210 }, { "epoch": 1.28, "learning_rate": 1.1872278664731495e-05, "loss": 0.2349, "step": 1220 }, { "epoch": 1.29, "learning_rate": 1.1799709724238026e-05, "loss": 0.6059, "step": 1230 }, { "epoch": 1.3, "learning_rate": 1.1727140783744557e-05, "loss": 0.3443, "step": 1240 }, { "epoch": 1.31, "learning_rate": 1.165457184325109e-05, "loss": 0.3367, "step": 1250 }, { "epoch": 1.32, "learning_rate": 1.1582002902757621e-05, "loss": 0.4613, "step": 1260 }, { "epoch": 1.33, "learning_rate": 1.1509433962264152e-05, "loss": 0.3399, "step": 1270 }, { "epoch": 1.34, "learning_rate": 1.1436865021770683e-05, "loss": 0.5677, "step": 1280 }, { "epoch": 1.36, "learning_rate": 1.1364296081277216e-05, "loss": 0.8075, "step": 1290 }, { "epoch": 1.37, "learning_rate": 1.1291727140783746e-05, "loss": 0.4384, "step": 1300 }, { "epoch": 1.38, "learning_rate": 1.1219158200290277e-05, "loss": 0.3861, "step": 1310 }, { "epoch": 1.39, "learning_rate": 1.1146589259796807e-05, "loss": 0.1245, "step": 1320 }, { "epoch": 1.4, "learning_rate": 1.1074020319303338e-05, "loss": 0.6668, "step": 1330 }, { "epoch": 1.41, "learning_rate": 1.100145137880987e-05, "loss": 0.7036, "step": 1340 }, { "epoch": 1.42, "learning_rate": 1.0928882438316401e-05, "loss": 0.205, "step": 1350 }, { "epoch": 1.43, "learning_rate": 1.0856313497822932e-05, "loss": 0.588, "step": 1360 }, { "epoch": 1.44, "learning_rate": 1.0783744557329463e-05, "loss": 0.7293, "step": 1370 }, { "epoch": 1.45, "learning_rate": 1.0711175616835996e-05, "loss": 0.602, "step": 1380 }, { "epoch": 1.46, "learning_rate": 1.0638606676342527e-05, "loss": 0.3027, "step": 1390 }, { "epoch": 1.47, "learning_rate": 1.0566037735849058e-05, "loss": 0.4086, "step": 1400 }, { "epoch": 1.48, "learning_rate": 1.0493468795355589e-05, "loss": 0.4706, "step": 1410 }, { "epoch": 1.49, "learning_rate": 1.0420899854862121e-05, "loss": 0.2685, "step": 1420 }, { "epoch": 1.5, "learning_rate": 1.0348330914368652e-05, "loss": 0.4058, "step": 1430 }, { "epoch": 1.51, "learning_rate": 1.0275761973875183e-05, "loss": 0.4807, "step": 1440 }, { "epoch": 1.52, "learning_rate": 1.0203193033381712e-05, "loss": 0.3557, "step": 1450 }, { "epoch": 1.53, "learning_rate": 1.0130624092888243e-05, "loss": 0.3329, "step": 1460 }, { "epoch": 1.54, "learning_rate": 1.0058055152394776e-05, "loss": 0.2153, "step": 1470 }, { "epoch": 1.55, "learning_rate": 9.985486211901307e-06, "loss": 0.7515, "step": 1480 }, { "epoch": 1.57, "learning_rate": 9.912917271407838e-06, "loss": 0.3258, "step": 1490 }, { "epoch": 1.58, "learning_rate": 9.84034833091437e-06, "loss": 0.1792, "step": 1500 }, { "epoch": 1.59, "learning_rate": 9.7677793904209e-06, "loss": 0.5118, "step": 1510 }, { "epoch": 1.6, "learning_rate": 9.69521044992743e-06, "loss": 0.3737, "step": 1520 }, { "epoch": 1.61, "learning_rate": 9.622641509433963e-06, "loss": 0.3137, "step": 1530 }, { "epoch": 1.62, "learning_rate": 9.550072568940494e-06, "loss": 0.473, "step": 1540 }, { "epoch": 1.63, "learning_rate": 9.477503628447025e-06, "loss": 0.5268, "step": 1550 }, { "epoch": 1.64, "learning_rate": 9.404934687953556e-06, "loss": 0.4378, "step": 1560 }, { "epoch": 1.65, "learning_rate": 9.332365747460089e-06, "loss": 0.4031, "step": 1570 }, { "epoch": 1.66, "learning_rate": 9.259796806966618e-06, "loss": 0.7125, "step": 1580 }, { "epoch": 1.67, "learning_rate": 9.187227866473151e-06, "loss": 0.3404, "step": 1590 }, { "epoch": 1.68, "learning_rate": 9.114658925979682e-06, "loss": 0.4025, "step": 1600 }, { "epoch": 1.69, "learning_rate": 9.042089985486213e-06, "loss": 0.6217, "step": 1610 }, { "epoch": 1.7, "learning_rate": 8.969521044992744e-06, "loss": 0.4097, "step": 1620 }, { "epoch": 1.71, "learning_rate": 8.896952104499275e-06, "loss": 0.4337, "step": 1630 }, { "epoch": 1.72, "learning_rate": 8.824383164005806e-06, "loss": 0.2964, "step": 1640 }, { "epoch": 1.73, "learning_rate": 8.751814223512337e-06, "loss": 0.5013, "step": 1650 }, { "epoch": 1.74, "learning_rate": 8.67924528301887e-06, "loss": 0.6303, "step": 1660 }, { "epoch": 1.75, "learning_rate": 8.6066763425254e-06, "loss": 0.3938, "step": 1670 }, { "epoch": 1.76, "learning_rate": 8.534107402031931e-06, "loss": 0.1885, "step": 1680 }, { "epoch": 1.78, "learning_rate": 8.461538461538462e-06, "loss": 0.3159, "step": 1690 }, { "epoch": 1.79, "learning_rate": 8.388969521044995e-06, "loss": 0.5674, "step": 1700 }, { "epoch": 1.8, "learning_rate": 8.316400580551524e-06, "loss": 0.1875, "step": 1710 }, { "epoch": 1.81, "learning_rate": 8.243831640058055e-06, "loss": 0.4107, "step": 1720 }, { "epoch": 1.82, "learning_rate": 8.171262699564588e-06, "loss": 0.693, "step": 1730 }, { "epoch": 1.83, "learning_rate": 8.098693759071119e-06, "loss": 0.5109, "step": 1740 }, { "epoch": 1.84, "learning_rate": 8.02612481857765e-06, "loss": 0.3853, "step": 1750 }, { "epoch": 1.85, "learning_rate": 7.95355587808418e-06, "loss": 0.4673, "step": 1760 }, { "epoch": 1.86, "learning_rate": 7.880986937590711e-06, "loss": 0.2653, "step": 1770 }, { "epoch": 1.87, "learning_rate": 7.808417997097242e-06, "loss": 0.3816, "step": 1780 }, { "epoch": 1.88, "learning_rate": 7.735849056603775e-06, "loss": 0.2759, "step": 1790 }, { "epoch": 1.89, "learning_rate": 7.663280116110306e-06, "loss": 0.2267, "step": 1800 }, { "epoch": 1.9, "learning_rate": 7.590711175616836e-06, "loss": 0.3894, "step": 1810 }, { "epoch": 1.91, "learning_rate": 7.518142235123368e-06, "loss": 0.1902, "step": 1820 }, { "epoch": 1.92, "learning_rate": 7.445573294629899e-06, "loss": 0.3042, "step": 1830 }, { "epoch": 1.93, "learning_rate": 7.373004354136431e-06, "loss": 0.2981, "step": 1840 }, { "epoch": 1.94, "learning_rate": 7.300435413642961e-06, "loss": 0.2297, "step": 1850 }, { "epoch": 1.95, "learning_rate": 7.227866473149493e-06, "loss": 0.2528, "step": 1860 }, { "epoch": 1.96, "learning_rate": 7.1552975326560235e-06, "loss": 0.2361, "step": 1870 }, { "epoch": 1.97, "learning_rate": 7.082728592162555e-06, "loss": 0.327, "step": 1880 }, { "epoch": 1.99, "learning_rate": 7.010159651669086e-06, "loss": 0.2838, "step": 1890 }, { "epoch": 2.0, "learning_rate": 6.937590711175617e-06, "loss": 0.3027, "step": 1900 }, { "epoch": 2.0, "eval_Accuracy": 0.935375191424196, "eval_F1": 0.9231380335717158, "eval_loss": 0.42070162296295166, "eval_runtime": 98.699, "eval_samples_per_second": 33.08, "eval_steps_per_second": 2.077, "step": 1904 }, { "epoch": 2.01, "learning_rate": 6.865021770682149e-06, "loss": 0.3852, "step": 1910 }, { "epoch": 2.02, "learning_rate": 6.792452830188679e-06, "loss": 0.4178, "step": 1920 }, { "epoch": 2.03, "learning_rate": 6.719883889695211e-06, "loss": 0.4089, "step": 1930 }, { "epoch": 2.04, "learning_rate": 6.647314949201742e-06, "loss": 0.3711, "step": 1940 }, { "epoch": 2.05, "learning_rate": 6.574746008708274e-06, "loss": 0.4505, "step": 1950 }, { "epoch": 2.06, "learning_rate": 6.502177068214805e-06, "loss": 0.2162, "step": 1960 }, { "epoch": 2.07, "learning_rate": 6.429608127721336e-06, "loss": 0.2023, "step": 1970 }, { "epoch": 2.08, "learning_rate": 6.357039187227867e-06, "loss": 0.2912, "step": 1980 }, { "epoch": 2.09, "learning_rate": 6.2844702467343975e-06, "loss": 0.4556, "step": 1990 }, { "epoch": 2.1, "learning_rate": 6.211901306240929e-06, "loss": 0.3712, "step": 2000 }, { "epoch": 2.11, "learning_rate": 6.13933236574746e-06, "loss": 0.407, "step": 2010 }, { "epoch": 2.12, "learning_rate": 6.066763425253992e-06, "loss": 0.3238, "step": 2020 }, { "epoch": 2.13, "learning_rate": 5.994194484760523e-06, "loss": 0.118, "step": 2030 }, { "epoch": 2.14, "learning_rate": 5.921625544267055e-06, "loss": 0.1698, "step": 2040 }, { "epoch": 2.15, "learning_rate": 5.849056603773585e-06, "loss": 0.3557, "step": 2050 }, { "epoch": 2.16, "learning_rate": 5.776487663280117e-06, "loss": 0.2857, "step": 2060 }, { "epoch": 2.17, "learning_rate": 5.703918722786648e-06, "loss": 0.6379, "step": 2070 }, { "epoch": 2.18, "learning_rate": 5.6313497822931794e-06, "loss": 0.7017, "step": 2080 }, { "epoch": 2.2, "learning_rate": 5.55878084179971e-06, "loss": 0.2319, "step": 2090 }, { "epoch": 2.21, "learning_rate": 5.486211901306241e-06, "loss": 0.2963, "step": 2100 }, { "epoch": 2.22, "learning_rate": 5.413642960812773e-06, "loss": 0.284, "step": 2110 }, { "epoch": 2.23, "learning_rate": 5.341074020319303e-06, "loss": 0.2335, "step": 2120 }, { "epoch": 2.24, "learning_rate": 5.268505079825835e-06, "loss": 0.3575, "step": 2130 }, { "epoch": 2.25, "learning_rate": 5.195936139332366e-06, "loss": 0.3061, "step": 2140 }, { "epoch": 2.26, "learning_rate": 5.123367198838898e-06, "loss": 0.2509, "step": 2150 }, { "epoch": 2.27, "learning_rate": 5.050798258345429e-06, "loss": 0.669, "step": 2160 }, { "epoch": 2.28, "learning_rate": 4.97822931785196e-06, "loss": 0.3276, "step": 2170 }, { "epoch": 2.29, "learning_rate": 4.905660377358491e-06, "loss": 0.2792, "step": 2180 }, { "epoch": 2.3, "learning_rate": 4.8330914368650224e-06, "loss": 0.3574, "step": 2190 }, { "epoch": 2.31, "learning_rate": 4.760522496371553e-06, "loss": 0.1847, "step": 2200 }, { "epoch": 2.32, "learning_rate": 4.687953555878084e-06, "loss": 0.3664, "step": 2210 }, { "epoch": 2.33, "learning_rate": 4.615384615384616e-06, "loss": 0.313, "step": 2220 }, { "epoch": 2.34, "learning_rate": 4.542815674891147e-06, "loss": 0.4165, "step": 2230 }, { "epoch": 2.35, "learning_rate": 4.470246734397678e-06, "loss": 0.2627, "step": 2240 }, { "epoch": 2.36, "learning_rate": 4.397677793904209e-06, "loss": 0.4678, "step": 2250 }, { "epoch": 2.37, "learning_rate": 4.325108853410741e-06, "loss": 0.4254, "step": 2260 }, { "epoch": 2.38, "learning_rate": 4.252539912917272e-06, "loss": 0.2166, "step": 2270 }, { "epoch": 2.39, "learning_rate": 4.179970972423803e-06, "loss": 0.2313, "step": 2280 }, { "epoch": 2.41, "learning_rate": 4.1074020319303345e-06, "loss": 0.2503, "step": 2290 }, { "epoch": 2.42, "learning_rate": 4.0348330914368655e-06, "loss": 0.2127, "step": 2300 }, { "epoch": 2.43, "learning_rate": 3.962264150943396e-06, "loss": 0.1339, "step": 2310 }, { "epoch": 2.44, "learning_rate": 3.889695210449927e-06, "loss": 0.1467, "step": 2320 }, { "epoch": 2.45, "learning_rate": 3.817126269956458e-06, "loss": 0.3517, "step": 2330 }, { "epoch": 2.46, "learning_rate": 3.74455732946299e-06, "loss": 0.1513, "step": 2340 }, { "epoch": 2.47, "learning_rate": 3.671988388969521e-06, "loss": 0.1561, "step": 2350 }, { "epoch": 2.48, "learning_rate": 3.5994194484760525e-06, "loss": 0.2388, "step": 2360 }, { "epoch": 2.49, "learning_rate": 3.526850507982584e-06, "loss": 0.5018, "step": 2370 }, { "epoch": 2.5, "learning_rate": 3.454281567489115e-06, "loss": 0.3328, "step": 2380 }, { "epoch": 2.51, "learning_rate": 3.381712626995646e-06, "loss": 0.4347, "step": 2390 }, { "epoch": 2.52, "learning_rate": 3.3091436865021775e-06, "loss": 0.3993, "step": 2400 }, { "epoch": 2.53, "learning_rate": 3.236574746008709e-06, "loss": 0.4217, "step": 2410 }, { "epoch": 2.54, "learning_rate": 3.1640058055152394e-06, "loss": 0.651, "step": 2420 }, { "epoch": 2.55, "learning_rate": 3.091436865021771e-06, "loss": 0.3912, "step": 2430 }, { "epoch": 2.56, "learning_rate": 3.018867924528302e-06, "loss": 0.2198, "step": 2440 }, { "epoch": 2.57, "learning_rate": 2.946298984034833e-06, "loss": 0.1841, "step": 2450 }, { "epoch": 2.58, "learning_rate": 2.8737300435413645e-06, "loss": 0.2908, "step": 2460 }, { "epoch": 2.59, "learning_rate": 2.801161103047896e-06, "loss": 0.2179, "step": 2470 }, { "epoch": 2.61, "learning_rate": 2.728592162554427e-06, "loss": 0.1909, "step": 2480 }, { "epoch": 2.62, "learning_rate": 2.6560232220609582e-06, "loss": 0.4162, "step": 2490 }, { "epoch": 2.63, "learning_rate": 2.5834542815674896e-06, "loss": 0.3347, "step": 2500 } ], "max_steps": 2856, "num_train_epochs": 3, "total_flos": 5266095390720000.0, "trial_name": null, "trial_params": null }