{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995764506565015, "eval_steps": 500, "global_step": 1180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6949152542372882e-06, "loss": 1.9981, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.47457627118644e-06, "loss": 2.0358, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.694915254237288e-05, "loss": 1.9674, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.5423728813559322e-05, "loss": 1.6753, "step": 15 }, { "epoch": 0.02, "learning_rate": 3.389830508474576e-05, "loss": 1.277, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.2372881355932206e-05, "loss": 0.9453, "step": 25 }, { "epoch": 0.03, "learning_rate": 5.0847457627118643e-05, "loss": 0.7953, "step": 30 }, { "epoch": 0.03, "learning_rate": 5.932203389830509e-05, "loss": 0.7426, "step": 35 }, { "epoch": 0.03, "learning_rate": 6.779661016949152e-05, "loss": 0.6909, "step": 40 }, { "epoch": 0.04, "learning_rate": 7.627118644067796e-05, "loss": 0.723, "step": 45 }, { "epoch": 0.04, "learning_rate": 8.474576271186441e-05, "loss": 0.6646, "step": 50 }, { "epoch": 0.05, "learning_rate": 9.322033898305085e-05, "loss": 0.6685, "step": 55 }, { "epoch": 0.05, "learning_rate": 0.00010169491525423729, "loss": 0.6565, "step": 60 }, { "epoch": 0.06, "learning_rate": 0.00011016949152542372, "loss": 0.6032, "step": 65 }, { "epoch": 0.06, "learning_rate": 0.00011864406779661017, "loss": 0.6357, "step": 70 }, { "epoch": 0.06, "learning_rate": 0.0001271186440677966, "loss": 0.641, "step": 75 }, { "epoch": 0.07, "learning_rate": 0.00013559322033898305, "loss": 0.581, "step": 80 }, { "epoch": 0.07, "learning_rate": 0.00014406779661016949, "loss": 0.5263, "step": 85 }, { "epoch": 0.08, "learning_rate": 0.00015254237288135592, "loss": 0.5573, "step": 90 }, { "epoch": 0.08, "learning_rate": 0.00016101694915254236, "loss": 0.4486, "step": 95 }, { "epoch": 0.08, "learning_rate": 0.00016949152542372882, "loss": 0.4374, "step": 100 }, { "epoch": 0.09, "learning_rate": 0.00017796610169491526, "loss": 0.4425, "step": 105 }, { "epoch": 0.09, "learning_rate": 0.0001864406779661017, "loss": 0.4849, "step": 110 }, { "epoch": 0.1, "learning_rate": 0.00019491525423728814, "loss": 0.3484, "step": 115 }, { "epoch": 0.1, "learning_rate": 0.00019999824983320177, "loss": 0.3903, "step": 120 }, { "epoch": 0.11, "learning_rate": 0.00019997856116026692, "loss": 0.3913, "step": 125 }, { "epoch": 0.11, "learning_rate": 0.00019993700042749937, "loss": 0.3313, "step": 130 }, { "epoch": 0.11, "learning_rate": 0.00019987357672703672, "loss": 0.3447, "step": 135 }, { "epoch": 0.12, "learning_rate": 0.0001997883039339234, "loss": 0.3978, "step": 140 }, { "epoch": 0.12, "learning_rate": 0.000199681200703075, "loss": 0.3689, "step": 145 }, { "epoch": 0.13, "learning_rate": 0.0001995522904651977, "loss": 0.3055, "step": 150 }, { "epoch": 0.13, "learning_rate": 0.00019940160142166172, "loss": 0.3499, "step": 155 }, { "epoch": 0.14, "learning_rate": 0.00019922916653833248, "loss": 0.3107, "step": 160 }, { "epoch": 0.14, "learning_rate": 0.00019903502353835813, "loss": 0.313, "step": 165 }, { "epoch": 0.14, "learning_rate": 0.00019881921489391737, "loss": 0.3267, "step": 170 }, { "epoch": 0.15, "learning_rate": 0.00019858178781692774, "loss": 0.3396, "step": 175 }, { "epoch": 0.15, "learning_rate": 0.00019832279424871719, "loss": 0.3315, "step": 180 }, { "epoch": 0.16, "learning_rate": 0.00019804229084866102, "loss": 0.2894, "step": 185 }, { "epoch": 0.16, "learning_rate": 0.00019774033898178667, "loss": 0.2326, "step": 190 }, { "epoch": 0.17, "learning_rate": 0.00019741700470534906, "loss": 0.2904, "step": 195 }, { "epoch": 0.17, "learning_rate": 0.00019707235875437933, "loss": 0.2489, "step": 200 }, { "epoch": 0.17, "learning_rate": 0.00019670647652621041, "loss": 0.231, "step": 205 }, { "epoch": 0.18, "learning_rate": 0.00019631943806398252, "loss": 0.2691, "step": 210 }, { "epoch": 0.18, "learning_rate": 0.0001959113280391322, "loss": 0.3092, "step": 215 }, { "epoch": 0.19, "learning_rate": 0.0001954822357328692, "loss": 0.2461, "step": 220 }, { "epoch": 0.19, "learning_rate": 0.0001950322550166444, "loss": 0.2451, "step": 225 }, { "epoch": 0.19, "learning_rate": 0.00019456148433161387, "loss": 0.2183, "step": 230 }, { "epoch": 0.2, "learning_rate": 0.00019407002666710336, "loss": 0.2332, "step": 235 }, { "epoch": 0.2, "learning_rate": 0.00019355798953807715, "loss": 0.2203, "step": 240 }, { "epoch": 0.21, "learning_rate": 0.00019302548496161765, "loss": 0.2358, "step": 245 }, { "epoch": 0.21, "learning_rate": 0.0001924726294324196, "loss": 0.2191, "step": 250 }, { "epoch": 0.22, "learning_rate": 0.00019189954389730468, "loss": 0.2226, "step": 255 }, { "epoch": 0.22, "learning_rate": 0.00019130635372876246, "loss": 0.2194, "step": 260 }, { "epoch": 0.22, "learning_rate": 0.00019069318869752283, "loss": 0.1904, "step": 265 }, { "epoch": 0.23, "learning_rate": 0.00019006018294416647, "loss": 0.2449, "step": 270 }, { "epoch": 0.23, "learning_rate": 0.00018940747494977932, "loss": 0.2084, "step": 275 }, { "epoch": 0.24, "learning_rate": 0.00018873520750565718, "loss": 0.2174, "step": 280 }, { "epoch": 0.24, "learning_rate": 0.00018804352768206782, "loss": 0.2243, "step": 285 }, { "epoch": 0.25, "learning_rate": 0.00018733258679607672, "loss": 0.1996, "step": 290 }, { "epoch": 0.25, "learning_rate": 0.00018660254037844388, "loss": 0.2254, "step": 295 }, { "epoch": 0.25, "learning_rate": 0.00018585354813959858, "loss": 0.2027, "step": 300 }, { "epoch": 0.26, "learning_rate": 0.00018508577393470013, "loss": 0.2096, "step": 305 }, { "epoch": 0.26, "learning_rate": 0.00018429938572779152, "loss": 0.1661, "step": 310 }, { "epoch": 0.27, "learning_rate": 0.00018349455555505446, "loss": 0.2263, "step": 315 }, { "epoch": 0.27, "learning_rate": 0.00018267145948717338, "loss": 0.1851, "step": 320 }, { "epoch": 0.28, "learning_rate": 0.0001818302775908169, "loss": 0.1796, "step": 325 }, { "epoch": 0.28, "learning_rate": 0.00018097119388924525, "loss": 0.1906, "step": 330 }, { "epoch": 0.28, "learning_rate": 0.0001800943963220516, "loss": 0.1766, "step": 335 }, { "epoch": 0.29, "learning_rate": 0.0001792000767040474, "loss": 0.1615, "step": 340 }, { "epoch": 0.29, "learning_rate": 0.00017828843068329924, "loss": 0.1624, "step": 345 }, { "epoch": 0.3, "learning_rate": 0.00017735965769832753, "loss": 0.1694, "step": 350 }, { "epoch": 0.3, "learning_rate": 0.00017641396093447592, "loss": 0.18, "step": 355 }, { "epoch": 0.3, "learning_rate": 0.00017545154727946065, "loss": 0.1672, "step": 360 }, { "epoch": 0.31, "learning_rate": 0.00017447262727811062, "loss": 0.1708, "step": 365 }, { "epoch": 0.31, "learning_rate": 0.00017347741508630672, "loss": 0.1739, "step": 370 }, { "epoch": 0.32, "learning_rate": 0.00017246612842413173, "loss": 0.1595, "step": 375 }, { "epoch": 0.32, "learning_rate": 0.00017143898852824003, "loss": 0.1739, "step": 380 }, { "epoch": 0.33, "learning_rate": 0.0001703962201034583, "loss": 0.1701, "step": 385 }, { "epoch": 0.33, "learning_rate": 0.00016933805127362743, "loss": 0.1549, "step": 390 }, { "epoch": 0.33, "learning_rate": 0.00016826471353169646, "loss": 0.1693, "step": 395 }, { "epoch": 0.34, "learning_rate": 0.0001671764416890793, "loss": 0.1541, "step": 400 }, { "epoch": 0.34, "learning_rate": 0.00016607347382428578, "loss": 0.1554, "step": 405 }, { "epoch": 0.35, "learning_rate": 0.0001649560512308378, "loss": 0.1635, "step": 410 }, { "epoch": 0.35, "learning_rate": 0.00016382441836448202, "loss": 0.1499, "step": 415 }, { "epoch": 0.36, "learning_rate": 0.00016267882278971101, "loss": 0.1501, "step": 420 }, { "epoch": 0.36, "learning_rate": 0.0001615195151256042, "loss": 0.1634, "step": 425 }, { "epoch": 0.36, "learning_rate": 0.0001603467489910004, "loss": 0.1623, "step": 430 }, { "epoch": 0.37, "learning_rate": 0.00015916078094901445, "loss": 0.1666, "step": 435 }, { "epoch": 0.37, "learning_rate": 0.00015796187045090943, "loss": 0.1718, "step": 440 }, { "epoch": 0.38, "learning_rate": 0.0001567502797793372, "loss": 0.162, "step": 445 }, { "epoch": 0.38, "learning_rate": 0.0001555262739909594, "loss": 0.166, "step": 450 }, { "epoch": 0.39, "learning_rate": 0.00015429012085846182, "loss": 0.1682, "step": 455 }, { "epoch": 0.39, "learning_rate": 0.00015304209081197425, "loss": 0.172, "step": 460 }, { "epoch": 0.39, "learning_rate": 0.00015178245687990938, "loss": 0.1414, "step": 465 }, { "epoch": 0.4, "learning_rate": 0.00015051149462923285, "loss": 0.1778, "step": 470 }, { "epoch": 0.4, "learning_rate": 0.00014922948210517822, "loss": 0.1411, "step": 475 }, { "epoch": 0.41, "learning_rate": 0.00014793669977041977, "loss": 0.156, "step": 480 }, { "epoch": 0.41, "learning_rate": 0.00014663343044371633, "loss": 0.1383, "step": 485 }, { "epoch": 0.42, "learning_rate": 0.00014531995923803973, "loss": 0.1653, "step": 490 }, { "epoch": 0.42, "learning_rate": 0.00014399657349820134, "loss": 0.1507, "step": 495 }, { "epoch": 0.42, "learning_rate": 0.00014266356273799044, "loss": 0.1413, "step": 500 }, { "epoch": 0.43, "learning_rate": 0.00014132121857683783, "loss": 0.1659, "step": 505 }, { "epoch": 0.43, "learning_rate": 0.0001399698346760192, "loss": 0.1468, "step": 510 }, { "epoch": 0.44, "learning_rate": 0.00013860970667441147, "loss": 0.1211, "step": 515 }, { "epoch": 0.44, "learning_rate": 0.0001372411321238166, "loss": 0.1353, "step": 520 }, { "epoch": 0.44, "learning_rate": 0.00013586441042386693, "loss": 0.1566, "step": 525 }, { "epoch": 0.45, "learning_rate": 0.00013447984275652636, "loss": 0.1449, "step": 530 }, { "epoch": 0.45, "learning_rate": 0.0001330877320202014, "loss": 0.1635, "step": 535 }, { "epoch": 0.46, "learning_rate": 0.0001316883827634769, "loss": 0.135, "step": 540 }, { "epoch": 0.46, "learning_rate": 0.00013028210111849076, "loss": 0.1441, "step": 545 }, { "epoch": 0.47, "learning_rate": 0.0001288691947339621, "loss": 0.1637, "step": 550 }, { "epoch": 0.47, "learning_rate": 0.00012744997270788775, "loss": 0.1479, "step": 555 }, { "epoch": 0.47, "learning_rate": 0.00012602474551992165, "loss": 0.1404, "step": 560 }, { "epoch": 0.48, "learning_rate": 0.00012459382496345198, "loss": 0.1442, "step": 565 }, { "epoch": 0.48, "learning_rate": 0.00012315752407739093, "loss": 0.1464, "step": 570 }, { "epoch": 0.49, "learning_rate": 0.0001217161570776919, "loss": 0.1337, "step": 575 }, { "epoch": 0.49, "learning_rate": 0.00012027003928860937, "loss": 0.1497, "step": 580 }, { "epoch": 0.5, "learning_rate": 0.00011881948707371608, "loss": 0.1451, "step": 585 }, { "epoch": 0.5, "learning_rate": 0.00011736481776669306, "loss": 0.1421, "step": 590 }, { "epoch": 0.5, "learning_rate": 0.00011590634960190721, "loss": 0.1286, "step": 595 }, { "epoch": 0.51, "learning_rate": 0.00011444440164479215, "loss": 0.1421, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.00011297929372204692, "loss": 0.1374, "step": 605 }, { "epoch": 0.52, "learning_rate": 0.00011151134635166829, "loss": 0.1502, "step": 610 }, { "epoch": 0.52, "learning_rate": 0.000110040880672832, "loss": 0.159, "step": 615 }, { "epoch": 0.53, "learning_rate": 0.00010856821837563769, "loss": 0.1316, "step": 620 }, { "epoch": 0.53, "learning_rate": 0.00010709368163073386, "loss": 0.1271, "step": 625 }, { "epoch": 0.53, "learning_rate": 0.00010561759301883714, "loss": 0.1289, "step": 630 }, { "epoch": 0.54, "learning_rate": 0.00010414027546016222, "loss": 0.146, "step": 635 }, { "epoch": 0.54, "learning_rate": 0.00010266205214377748, "loss": 0.1417, "step": 640 }, { "epoch": 0.55, "learning_rate": 0.00010118324645690161, "loss": 0.1563, "step": 645 }, { "epoch": 0.55, "learning_rate": 9.970418191415703e-05, "loss": 0.1432, "step": 650 }, { "epoch": 0.55, "learning_rate": 9.822518208679542e-05, "loss": 0.1336, "step": 655 }, { "epoch": 0.56, "learning_rate": 9.674657053191079e-05, "loss": 0.1216, "step": 660 }, { "epoch": 0.56, "learning_rate": 9.526867072165581e-05, "loss": 0.1379, "step": 665 }, { "epoch": 0.57, "learning_rate": 9.379180597247661e-05, "loss": 0.1471, "step": 670 }, { "epoch": 0.57, "learning_rate": 9.231629937438142e-05, "loss": 0.1385, "step": 675 }, { "epoch": 0.58, "learning_rate": 9.084247372025937e-05, "loss": 0.1532, "step": 680 }, { "epoch": 0.58, "learning_rate": 8.937065143526347e-05, "loss": 0.1337, "step": 685 }, { "epoch": 0.58, "learning_rate": 8.790115450627485e-05, "loss": 0.1539, "step": 690 }, { "epoch": 0.59, "learning_rate": 8.64343044114622e-05, "loss": 0.1218, "step": 695 }, { "epoch": 0.59, "learning_rate": 8.497042204995299e-05, "loss": 0.1282, "step": 700 }, { "epoch": 0.6, "learning_rate": 8.350982767163104e-05, "loss": 0.1215, "step": 705 }, { "epoch": 0.6, "learning_rate": 8.205284080707634e-05, "loss": 0.129, "step": 710 }, { "epoch": 0.61, "learning_rate": 8.059978019766219e-05, "loss": 0.1435, "step": 715 }, { "epoch": 0.61, "learning_rate": 7.915096372582466e-05, "loss": 0.1378, "step": 720 }, { "epoch": 0.61, "learning_rate": 7.770670834552074e-05, "loss": 0.1361, "step": 725 }, { "epoch": 0.62, "learning_rate": 7.626733001288851e-05, "loss": 0.127, "step": 730 }, { "epoch": 0.62, "learning_rate": 7.483314361712685e-05, "loss": 0.1344, "step": 735 }, { "epoch": 0.63, "learning_rate": 7.340446291160732e-05, "loss": 0.1439, "step": 740 }, { "epoch": 0.63, "learning_rate": 7.198160044523554e-05, "loss": 0.1239, "step": 745 }, { "epoch": 0.64, "learning_rate": 7.056486749407551e-05, "loss": 0.135, "step": 750 }, { "epoch": 0.64, "learning_rate": 6.915457399325242e-05, "loss": 0.1532, "step": 755 }, { "epoch": 0.64, "learning_rate": 6.775102846914911e-05, "loss": 0.1206, "step": 760 }, { "epoch": 0.65, "learning_rate": 6.635453797191034e-05, "loss": 0.1312, "step": 765 }, { "epoch": 0.65, "learning_rate": 6.496540800827036e-05, "loss": 0.1366, "step": 770 }, { "epoch": 0.66, "learning_rate": 6.358394247471778e-05, "loss": 0.1473, "step": 775 }, { "epoch": 0.66, "learning_rate": 6.221044359101317e-05, "loss": 0.13, "step": 780 }, { "epoch": 0.66, "learning_rate": 6.084521183407286e-05, "loss": 0.1126, "step": 785 }, { "epoch": 0.67, "learning_rate": 5.9488545872234645e-05, "loss": 0.1235, "step": 790 }, { "epoch": 0.67, "learning_rate": 5.8140742499918756e-05, "loss": 0.1425, "step": 795 }, { "epoch": 0.68, "learning_rate": 5.680209657269871e-05, "loss": 0.1386, "step": 800 }, { "epoch": 0.68, "learning_rate": 5.547290094279687e-05, "loss": 0.1325, "step": 805 }, { "epoch": 0.69, "learning_rate": 5.4153446395017537e-05, "loss": 0.1237, "step": 810 }, { "epoch": 0.69, "learning_rate": 5.2844021583132983e-05, "loss": 0.126, "step": 815 }, { "epoch": 0.69, "learning_rate": 5.1544912966734994e-05, "loss": 0.1205, "step": 820 }, { "epoch": 0.7, "learning_rate": 5.025640474856732e-05, "loss": 0.1358, "step": 825 }, { "epoch": 0.7, "learning_rate": 4.897877881235091e-05, "loss": 0.1269, "step": 830 }, { "epoch": 0.71, "learning_rate": 4.771231466111725e-05, "loss": 0.1173, "step": 835 }, { "epoch": 0.71, "learning_rate": 4.645728935606194e-05, "loss": 0.1261, "step": 840 }, { "epoch": 0.72, "learning_rate": 4.5213977455932785e-05, "loss": 0.1302, "step": 845 }, { "epoch": 0.72, "learning_rate": 4.398265095696539e-05, "loss": 0.1237, "step": 850 }, { "epoch": 0.72, "learning_rate": 4.2763579233379025e-05, "loss": 0.1214, "step": 855 }, { "epoch": 0.73, "learning_rate": 4.155702897844641e-05, "loss": 0.1409, "step": 860 }, { "epoch": 0.73, "learning_rate": 4.036326414614985e-05, "loss": 0.1182, "step": 865 }, { "epoch": 0.74, "learning_rate": 3.918254589343683e-05, "loss": 0.138, "step": 870 }, { "epoch": 0.74, "learning_rate": 3.8015132523087116e-05, "loss": 0.1221, "step": 875 }, { "epoch": 0.75, "learning_rate": 3.686127942720463e-05, "loss": 0.1381, "step": 880 }, { "epoch": 0.75, "learning_rate": 3.5721239031346066e-05, "loss": 0.1386, "step": 885 }, { "epoch": 0.75, "learning_rate": 3.459526073929818e-05, "loss": 0.1145, "step": 890 }, { "epoch": 0.76, "learning_rate": 3.3483590878516435e-05, "loss": 0.1108, "step": 895 }, { "epoch": 0.76, "learning_rate": 3.2386472646236566e-05, "loss": 0.1218, "step": 900 }, { "epoch": 0.77, "learning_rate": 3.1304146056271024e-05, "loss": 0.1142, "step": 905 }, { "epoch": 0.77, "learning_rate": 3.0236847886501542e-05, "loss": 0.128, "step": 910 }, { "epoch": 0.78, "learning_rate": 2.9184811627079987e-05, "loss": 0.1268, "step": 915 }, { "epoch": 0.78, "learning_rate": 2.814826742934823e-05, "loss": 0.1271, "step": 920 }, { "epoch": 0.78, "learning_rate": 2.7127442055488615e-05, "loss": 0.1205, "step": 925 }, { "epoch": 0.79, "learning_rate": 2.6122558828915646e-05, "loss": 0.1196, "step": 930 }, { "epoch": 0.79, "learning_rate": 2.5133837585420084e-05, "loss": 0.1305, "step": 935 }, { "epoch": 0.8, "learning_rate": 2.4161494625076165e-05, "loss": 0.1213, "step": 940 }, { "epoch": 0.8, "learning_rate": 2.3205742664922004e-05, "loss": 0.1308, "step": 945 }, { "epoch": 0.8, "learning_rate": 2.2266790792424097e-05, "loss": 0.1229, "step": 950 }, { "epoch": 0.81, "learning_rate": 2.1344844419735755e-05, "loss": 0.1212, "step": 955 }, { "epoch": 0.81, "learning_rate": 2.0440105238759687e-05, "loss": 0.138, "step": 960 }, { "epoch": 0.82, "learning_rate": 1.9552771177024242e-05, "loss": 0.119, "step": 965 }, { "epoch": 0.82, "learning_rate": 1.868303635438332e-05, "loss": 0.118, "step": 970 }, { "epoch": 0.83, "learning_rate": 1.7831091040549397e-05, "loss": 0.1286, "step": 975 }, { "epoch": 0.83, "learning_rate": 1.6997121613468458e-05, "loss": 0.1292, "step": 980 }, { "epoch": 0.83, "learning_rate": 1.6181310518546856e-05, "loss": 0.1232, "step": 985 }, { "epoch": 0.84, "learning_rate": 1.5383836228737814e-05, "loss": 0.1282, "step": 990 }, { "epoch": 0.84, "learning_rate": 1.4604873205497727e-05, "loss": 0.1235, "step": 995 }, { "epoch": 0.85, "learning_rate": 1.3844591860619383e-05, "loss": 0.1266, "step": 1000 }, { "epoch": 0.85, "learning_rate": 1.3103158518951618e-05, "loss": 0.1248, "step": 1005 }, { "epoch": 0.86, "learning_rate": 1.2380735382012576e-05, "loss": 0.1321, "step": 1010 }, { "epoch": 0.86, "learning_rate": 1.1677480492505588e-05, "loss": 0.1261, "step": 1015 }, { "epoch": 0.86, "learning_rate": 1.0993547699744366e-05, "loss": 0.1274, "step": 1020 }, { "epoch": 0.87, "learning_rate": 1.0329086625995843e-05, "loss": 0.1243, "step": 1025 }, { "epoch": 0.87, "learning_rate": 9.684242633747642e-06, "loss": 0.1493, "step": 1030 }, { "epoch": 0.88, "learning_rate": 9.059156793907541e-06, "loss": 0.1342, "step": 1035 }, { "epoch": 0.88, "learning_rate": 8.453965854941748e-06, "loss": 0.1203, "step": 1040 }, { "epoch": 0.89, "learning_rate": 7.868802212958703e-06, "loss": 0.1182, "step": 1045 }, { "epoch": 0.89, "learning_rate": 7.3037938827451805e-06, "loss": 0.1373, "step": 1050 }, { "epoch": 0.89, "learning_rate": 6.759064469760823e-06, "loss": 0.135, "step": 1055 }, { "epoch": 0.9, "learning_rate": 6.234733143097216e-06, "loss": 0.1093, "step": 1060 }, { "epoch": 0.9, "learning_rate": 5.730914609407634e-06, "loss": 0.1354, "step": 1065 }, { "epoch": 0.91, "learning_rate": 5.247719087812897e-06, "loss": 0.1156, "step": 1070 }, { "epoch": 0.91, "learning_rate": 4.785252285789077e-06, "loss": 0.1157, "step": 1075 }, { "epoch": 0.91, "learning_rate": 4.3436153760420654e-06, "loss": 0.1402, "step": 1080 }, { "epoch": 0.92, "learning_rate": 3.922904974374309e-06, "loss": 0.1291, "step": 1085 }, { "epoch": 0.92, "learning_rate": 3.5232131185484076e-06, "loss": 0.1371, "step": 1090 }, { "epoch": 0.93, "learning_rate": 3.1446272481522543e-06, "loss": 0.1198, "step": 1095 }, { "epoch": 0.93, "learning_rate": 2.78723018547008e-06, "loss": 0.1456, "step": 1100 }, { "epoch": 0.94, "learning_rate": 2.451100117363603e-06, "loss": 0.1208, "step": 1105 }, { "epoch": 0.94, "learning_rate": 2.1363105781673888e-06, "loss": 0.1089, "step": 1110 }, { "epoch": 0.94, "learning_rate": 1.842930433601775e-06, "loss": 0.1166, "step": 1115 }, { "epoch": 0.95, "learning_rate": 1.5710238657074217e-06, "loss": 0.1258, "step": 1120 }, { "epoch": 0.95, "learning_rate": 1.3206503588042762e-06, "loss": 0.1338, "step": 1125 }, { "epoch": 0.96, "learning_rate": 1.0918646864784166e-06, "loss": 0.1282, "step": 1130 }, { "epoch": 0.96, "learning_rate": 8.847168995992916e-07, "loss": 0.1266, "step": 1135 }, { "epoch": 0.97, "learning_rate": 6.992523153702691e-07, "loss": 0.1218, "step": 1140 }, { "epoch": 0.97, "learning_rate": 5.35511507414721e-07, "loss": 0.1172, "step": 1145 }, { "epoch": 0.97, "learning_rate": 3.9353029689974674e-07, "loss": 0.1367, "step": 1150 }, { "epoch": 0.98, "learning_rate": 2.733397446997632e-07, "loss": 0.1164, "step": 1155 }, { "epoch": 0.98, "learning_rate": 1.7496614460135176e-07, "loss": 0.1091, "step": 1160 }, { "epoch": 0.99, "learning_rate": 9.843101755101502e-08, "loss": 0.1402, "step": 1165 }, { "epoch": 0.99, "learning_rate": 4.375110694713191e-08, "loss": 0.1242, "step": 1170 }, { "epoch": 1.0, "learning_rate": 1.0938374977020882e-08, "loss": 0.122, "step": 1175 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.1162, "step": 1180 }, { "epoch": 1.0, "eval_loss": 0.10465715825557709, "eval_runtime": 0.1015, "eval_samples_per_second": 49.268, "eval_steps_per_second": 9.854, "step": 1180 }, { "epoch": 1.0, "step": 1180, "total_flos": 6.174977435684045e+16, "train_loss": 0.2257307288000139, "train_runtime": 713.6543, "train_samples_per_second": 13.23, "train_steps_per_second": 1.653 } ], "logging_steps": 5, "max_steps": 1180, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 6.174977435684045e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }