{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9951749095295535, "eval_steps": 500, "global_step": 2484, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.606425702811245e-05, "loss": 1.1216, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.132530120481928e-05, "loss": 0.9435, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.738955823293173e-05, "loss": 0.7337, "step": 60 }, { "epoch": 0.13, "learning_rate": 6.345381526104418e-05, "loss": 0.4553, "step": 80 }, { "epoch": 0.16, "learning_rate": 7.951807228915663e-05, "loss": 0.2854, "step": 100 }, { "epoch": 0.19, "learning_rate": 9.558232931726909e-05, "loss": 0.2014, "step": 120 }, { "epoch": 0.23, "learning_rate": 0.00011164658634538152, "loss": 0.1777, "step": 140 }, { "epoch": 0.26, "learning_rate": 0.00012771084337349396, "loss": 0.164, "step": 160 }, { "epoch": 0.29, "learning_rate": 0.00014377510040160642, "loss": 0.1561, "step": 180 }, { "epoch": 0.32, "learning_rate": 0.00015983935742971888, "loss": 0.1513, "step": 200 }, { "epoch": 0.35, "learning_rate": 0.00017590361445783134, "loss": 0.1481, "step": 220 }, { "epoch": 0.39, "learning_rate": 0.00019196787148594377, "loss": 0.1468, "step": 240 }, { "epoch": 0.42, "learning_rate": 0.00019910514541387027, "loss": 0.1437, "step": 260 }, { "epoch": 0.45, "learning_rate": 0.00019731543624161075, "loss": 0.1408, "step": 280 }, { "epoch": 0.48, "learning_rate": 0.00019552572706935123, "loss": 0.1414, "step": 300 }, { "epoch": 0.51, "learning_rate": 0.00019373601789709173, "loss": 0.137, "step": 320 }, { "epoch": 0.55, "learning_rate": 0.0001919463087248322, "loss": 0.1371, "step": 340 }, { "epoch": 0.58, "learning_rate": 0.0001901565995525727, "loss": 0.1367, "step": 360 }, { "epoch": 0.61, "learning_rate": 0.0001883668903803132, "loss": 0.1358, "step": 380 }, { "epoch": 0.64, "learning_rate": 0.0001865771812080537, "loss": 0.1362, "step": 400 }, { "epoch": 0.68, "learning_rate": 0.0001847874720357942, "loss": 0.1339, "step": 420 }, { "epoch": 0.71, "learning_rate": 0.0001829977628635347, "loss": 0.1345, "step": 440 }, { "epoch": 0.74, "learning_rate": 0.00018120805369127517, "loss": 0.1309, "step": 460 }, { "epoch": 0.77, "learning_rate": 0.00017941834451901567, "loss": 0.1315, "step": 480 }, { "epoch": 0.8, "learning_rate": 0.00017762863534675615, "loss": 0.1353, "step": 500 }, { "epoch": 0.84, "learning_rate": 0.00017583892617449665, "loss": 0.1339, "step": 520 }, { "epoch": 0.87, "learning_rate": 0.00017404921700223716, "loss": 0.1319, "step": 540 }, { "epoch": 0.9, "learning_rate": 0.00017225950782997763, "loss": 0.1328, "step": 560 }, { "epoch": 0.93, "learning_rate": 0.00017046979865771814, "loss": 0.1332, "step": 580 }, { "epoch": 0.97, "learning_rate": 0.0001686800894854586, "loss": 0.131, "step": 600 }, { "epoch": 1.0, "learning_rate": 0.00016689038031319912, "loss": 0.1278, "step": 620 }, { "epoch": 1.03, "learning_rate": 0.00016510067114093962, "loss": 0.1331, "step": 640 }, { "epoch": 1.06, "learning_rate": 0.0001633109619686801, "loss": 0.1286, "step": 660 }, { "epoch": 1.09, "learning_rate": 0.0001615212527964206, "loss": 0.1276, "step": 680 }, { "epoch": 1.13, "learning_rate": 0.00015973154362416107, "loss": 0.1287, "step": 700 }, { "epoch": 1.16, "learning_rate": 0.00015794183445190158, "loss": 0.1312, "step": 720 }, { "epoch": 1.19, "learning_rate": 0.00015615212527964208, "loss": 0.1298, "step": 740 }, { "epoch": 1.22, "learning_rate": 0.00015436241610738256, "loss": 0.1288, "step": 760 }, { "epoch": 1.25, "learning_rate": 0.00015257270693512303, "loss": 0.1297, "step": 780 }, { "epoch": 1.29, "learning_rate": 0.00015078299776286354, "loss": 0.129, "step": 800 }, { "epoch": 1.32, "learning_rate": 0.00014899328859060404, "loss": 0.1286, "step": 820 }, { "epoch": 1.35, "learning_rate": 0.00014720357941834454, "loss": 0.1289, "step": 840 }, { "epoch": 1.38, "learning_rate": 0.00014541387024608502, "loss": 0.1285, "step": 860 }, { "epoch": 1.42, "learning_rate": 0.0001436241610738255, "loss": 0.1283, "step": 880 }, { "epoch": 1.45, "learning_rate": 0.00014183445190156602, "loss": 0.1247, "step": 900 }, { "epoch": 1.48, "learning_rate": 0.0001400447427293065, "loss": 0.1279, "step": 920 }, { "epoch": 1.51, "learning_rate": 0.00013825503355704698, "loss": 0.1249, "step": 940 }, { "epoch": 1.54, "learning_rate": 0.00013646532438478748, "loss": 0.1244, "step": 960 }, { "epoch": 1.58, "learning_rate": 0.00013467561521252796, "loss": 0.1259, "step": 980 }, { "epoch": 1.61, "learning_rate": 0.00013288590604026846, "loss": 0.1257, "step": 1000 }, { "epoch": 1.64, "learning_rate": 0.00013109619686800896, "loss": 0.1249, "step": 1020 }, { "epoch": 1.67, "learning_rate": 0.00012930648769574944, "loss": 0.1244, "step": 1040 }, { "epoch": 1.7, "learning_rate": 0.00012751677852348994, "loss": 0.125, "step": 1060 }, { "epoch": 1.74, "learning_rate": 0.00012572706935123044, "loss": 0.1213, "step": 1080 }, { "epoch": 1.77, "learning_rate": 0.00012393736017897092, "loss": 0.1235, "step": 1100 }, { "epoch": 1.8, "learning_rate": 0.00012214765100671142, "loss": 0.1265, "step": 1120 }, { "epoch": 1.83, "learning_rate": 0.0001203579418344519, "loss": 0.1245, "step": 1140 }, { "epoch": 1.87, "learning_rate": 0.00011856823266219239, "loss": 0.1245, "step": 1160 }, { "epoch": 1.9, "learning_rate": 0.0001167785234899329, "loss": 0.1249, "step": 1180 }, { "epoch": 1.93, "learning_rate": 0.00011498881431767338, "loss": 0.1252, "step": 1200 }, { "epoch": 1.96, "learning_rate": 0.00011319910514541387, "loss": 0.1236, "step": 1220 }, { "epoch": 1.99, "learning_rate": 0.00011140939597315436, "loss": 0.1213, "step": 1240 }, { "epoch": 2.03, "learning_rate": 0.00010961968680089485, "loss": 0.1254, "step": 1260 }, { "epoch": 2.06, "learning_rate": 0.00010782997762863535, "loss": 0.1221, "step": 1280 }, { "epoch": 2.09, "learning_rate": 0.00010604026845637584, "loss": 0.1207, "step": 1300 }, { "epoch": 2.12, "learning_rate": 0.00010425055928411633, "loss": 0.1218, "step": 1320 }, { "epoch": 2.16, "learning_rate": 0.00010246085011185682, "loss": 0.1244, "step": 1340 }, { "epoch": 2.19, "learning_rate": 0.00010067114093959733, "loss": 0.1218, "step": 1360 }, { "epoch": 2.22, "learning_rate": 9.888143176733782e-05, "loss": 0.1245, "step": 1380 }, { "epoch": 2.25, "learning_rate": 9.70917225950783e-05, "loss": 0.1229, "step": 1400 }, { "epoch": 2.28, "learning_rate": 9.53020134228188e-05, "loss": 0.123, "step": 1420 }, { "epoch": 2.32, "learning_rate": 9.351230425055928e-05, "loss": 0.1214, "step": 1440 }, { "epoch": 2.35, "learning_rate": 9.172259507829977e-05, "loss": 0.1231, "step": 1460 }, { "epoch": 2.38, "learning_rate": 8.993288590604028e-05, "loss": 0.123, "step": 1480 }, { "epoch": 2.41, "learning_rate": 8.814317673378077e-05, "loss": 0.123, "step": 1500 }, { "epoch": 2.44, "learning_rate": 8.635346756152126e-05, "loss": 0.1179, "step": 1520 }, { "epoch": 2.48, "learning_rate": 8.456375838926175e-05, "loss": 0.1233, "step": 1540 }, { "epoch": 2.51, "learning_rate": 8.277404921700224e-05, "loss": 0.1195, "step": 1560 }, { "epoch": 2.54, "learning_rate": 8.098434004474274e-05, "loss": 0.1195, "step": 1580 }, { "epoch": 2.57, "learning_rate": 7.919463087248322e-05, "loss": 0.1204, "step": 1600 }, { "epoch": 2.61, "learning_rate": 7.740492170022372e-05, "loss": 0.1197, "step": 1620 }, { "epoch": 2.64, "learning_rate": 7.561521252796421e-05, "loss": 0.1195, "step": 1640 }, { "epoch": 2.67, "learning_rate": 7.382550335570471e-05, "loss": 0.1205, "step": 1660 }, { "epoch": 2.7, "learning_rate": 7.203579418344519e-05, "loss": 0.1205, "step": 1680 }, { "epoch": 2.73, "learning_rate": 7.024608501118568e-05, "loss": 0.116, "step": 1700 }, { "epoch": 2.77, "learning_rate": 6.845637583892618e-05, "loss": 0.1184, "step": 1720 }, { "epoch": 2.8, "learning_rate": 6.666666666666667e-05, "loss": 0.1225, "step": 1740 }, { "epoch": 2.83, "learning_rate": 6.487695749440716e-05, "loss": 0.1195, "step": 1760 }, { "epoch": 2.86, "learning_rate": 6.308724832214765e-05, "loss": 0.1189, "step": 1780 }, { "epoch": 2.9, "learning_rate": 6.129753914988815e-05, "loss": 0.1206, "step": 1800 }, { "epoch": 2.93, "learning_rate": 5.9507829977628635e-05, "loss": 0.1199, "step": 1820 }, { "epoch": 2.96, "learning_rate": 5.771812080536914e-05, "loss": 0.1189, "step": 1840 }, { "epoch": 2.99, "learning_rate": 5.592841163310962e-05, "loss": 0.1174, "step": 1860 }, { "epoch": 3.02, "learning_rate": 5.413870246085011e-05, "loss": 0.1197, "step": 1880 }, { "epoch": 3.06, "learning_rate": 5.234899328859061e-05, "loss": 0.119, "step": 1900 }, { "epoch": 3.09, "learning_rate": 5.05592841163311e-05, "loss": 0.1161, "step": 1920 }, { "epoch": 3.12, "learning_rate": 4.8769574944071586e-05, "loss": 0.1171, "step": 1940 }, { "epoch": 3.15, "learning_rate": 4.697986577181208e-05, "loss": 0.1201, "step": 1960 }, { "epoch": 3.18, "learning_rate": 4.519015659955257e-05, "loss": 0.1191, "step": 1980 }, { "epoch": 3.22, "learning_rate": 4.340044742729307e-05, "loss": 0.1193, "step": 2000 }, { "epoch": 3.25, "learning_rate": 4.161073825503356e-05, "loss": 0.119, "step": 2020 }, { "epoch": 3.28, "learning_rate": 3.9821029082774055e-05, "loss": 0.1188, "step": 2040 }, { "epoch": 3.31, "learning_rate": 3.8031319910514545e-05, "loss": 0.1165, "step": 2060 }, { "epoch": 3.35, "learning_rate": 3.6241610738255034e-05, "loss": 0.1204, "step": 2080 }, { "epoch": 3.38, "learning_rate": 3.4451901565995524e-05, "loss": 0.1196, "step": 2100 }, { "epoch": 3.41, "learning_rate": 3.266219239373602e-05, "loss": 0.1186, "step": 2120 }, { "epoch": 3.44, "learning_rate": 3.087248322147651e-05, "loss": 0.1135, "step": 2140 }, { "epoch": 3.47, "learning_rate": 2.9082774049217003e-05, "loss": 0.1196, "step": 2160 }, { "epoch": 3.51, "learning_rate": 2.7293064876957496e-05, "loss": 0.1161, "step": 2180 }, { "epoch": 3.54, "learning_rate": 2.550335570469799e-05, "loss": 0.1162, "step": 2200 }, { "epoch": 3.57, "learning_rate": 2.371364653243848e-05, "loss": 0.1165, "step": 2220 }, { "epoch": 3.6, "learning_rate": 2.192393736017897e-05, "loss": 0.1164, "step": 2240 }, { "epoch": 3.63, "learning_rate": 2.013422818791946e-05, "loss": 0.116, "step": 2260 }, { "epoch": 3.67, "learning_rate": 1.8344519015659954e-05, "loss": 0.1172, "step": 2280 }, { "epoch": 3.7, "learning_rate": 1.6554809843400447e-05, "loss": 0.117, "step": 2300 }, { "epoch": 3.73, "learning_rate": 1.4765100671140942e-05, "loss": 0.1127, "step": 2320 }, { "epoch": 3.76, "learning_rate": 1.2975391498881432e-05, "loss": 0.1157, "step": 2340 }, { "epoch": 3.8, "learning_rate": 1.1185682326621925e-05, "loss": 0.1193, "step": 2360 }, { "epoch": 3.83, "learning_rate": 9.395973154362418e-06, "loss": 0.1153, "step": 2380 }, { "epoch": 3.86, "learning_rate": 7.606263982102908e-06, "loss": 0.1164, "step": 2400 }, { "epoch": 3.89, "learning_rate": 5.8165548098434e-06, "loss": 0.1181, "step": 2420 }, { "epoch": 3.92, "learning_rate": 4.026845637583892e-06, "loss": 0.1166, "step": 2440 }, { "epoch": 3.96, "learning_rate": 2.237136465324385e-06, "loss": 0.1156, "step": 2460 }, { "epoch": 3.99, "learning_rate": 4.4742729306487696e-07, "loss": 0.1148, "step": 2480 } ], "logging_steps": 20, "max_steps": 2484, "num_train_epochs": 4, "save_steps": 500, "total_flos": 1.2566003090639028e+19, "trial_name": null, "trial_params": null }