{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 724, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.999411623120412e-05, "loss": 2.1959, "step": 5 }, { "epoch": 0.03, "learning_rate": 4.997646769431532e-05, "loss": 1.7787, "step": 10 }, { "epoch": 0.04, "learning_rate": 4.9947062696526445e-05, "loss": 1.167, "step": 15 }, { "epoch": 0.06, "learning_rate": 4.990591507881416e-05, "loss": 2.0242, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.9853044209423996e-05, "loss": 2.2836, "step": 25 }, { "epoch": 0.08, "learning_rate": 4.9788474974753686e-05, "loss": 1.9629, "step": 30 }, { "epoch": 0.1, "learning_rate": 4.971223776763907e-05, "loss": 2.5583, "step": 35 }, { "epoch": 0.11, "learning_rate": 4.962436847304818e-05, "loss": 1.9105, "step": 40 }, { "epoch": 0.12, "learning_rate": 4.9524908451190096e-05, "loss": 2.1563, "step": 45 }, { "epoch": 0.14, "learning_rate": 4.9413904518046674e-05, "loss": 2.3299, "step": 50 }, { "epoch": 0.15, "learning_rate": 4.929140892333616e-05, "loss": 1.4083, "step": 55 }, { "epoch": 0.17, "learning_rate": 4.9157479325919156e-05, "loss": 1.7837, "step": 60 }, { "epoch": 0.18, "learning_rate": 4.901217876665858e-05, "loss": 1.4737, "step": 65 }, { "epoch": 0.19, "learning_rate": 4.8855575638746135e-05, "loss": 2.1072, "step": 70 }, { "epoch": 0.21, "learning_rate": 4.868774365550962e-05, "loss": 0.8345, "step": 75 }, { "epoch": 0.22, "learning_rate": 4.850876181571592e-05, "loss": 2.4821, "step": 80 }, { "epoch": 0.23, "learning_rate": 4.831871436638613e-05, "loss": 1.6394, "step": 85 }, { "epoch": 0.25, "learning_rate": 4.811769076314044e-05, "loss": 1.6403, "step": 90 }, { "epoch": 0.26, "learning_rate": 4.790578562809116e-05, "loss": 1.6487, "step": 95 }, { "epoch": 0.28, "learning_rate": 4.7683098705304e-05, "loss": 1.751, "step": 100 }, { "epoch": 0.29, "learning_rate": 4.7449734813848345e-05, "loss": 1.7423, "step": 105 }, { "epoch": 0.3, "learning_rate": 4.720580379845883e-05, "loss": 1.9435, "step": 110 }, { "epoch": 0.32, "learning_rate": 4.695142047783118e-05, "loss": 1.2112, "step": 115 }, { "epoch": 0.33, "learning_rate": 4.668670459057692e-05, "loss": 1.5366, "step": 120 }, { "epoch": 0.35, "learning_rate": 4.641178073886224e-05, "loss": 1.1842, "step": 125 }, { "epoch": 0.36, "learning_rate": 4.6126778329757516e-05, "loss": 1.2011, "step": 130 }, { "epoch": 0.37, "learning_rate": 4.583183151432527e-05, "loss": 1.2053, "step": 135 }, { "epoch": 0.39, "learning_rate": 4.5527079124475045e-05, "loss": 1.4354, "step": 140 }, { "epoch": 0.4, "learning_rate": 4.521266460761497e-05, "loss": 1.5641, "step": 145 }, { "epoch": 0.41, "learning_rate": 4.488873595913091e-05, "loss": 1.7298, "step": 150 }, { "epoch": 0.43, "learning_rate": 4.4555445652724795e-05, "loss": 1.2547, "step": 155 }, { "epoch": 0.44, "learning_rate": 4.4212950568645007e-05, "loss": 1.6051, "step": 160 }, { "epoch": 0.46, "learning_rate": 4.386141191984262e-05, "loss": 1.5891, "step": 165 }, { "epoch": 0.47, "learning_rate": 4.350099517608823e-05, "loss": 1.2693, "step": 170 }, { "epoch": 0.48, "learning_rate": 4.313186998608506e-05, "loss": 1.5962, "step": 175 }, { "epoch": 0.5, "learning_rate": 4.275421009761509e-05, "loss": 1.5196, "step": 180 }, { "epoch": 0.51, "learning_rate": 4.236819327575571e-05, "loss": 1.6221, "step": 185 }, { "epoch": 0.52, "learning_rate": 4.197400121920539e-05, "loss": 1.1342, "step": 190 }, { "epoch": 0.54, "learning_rate": 4.1571819474757894e-05, "loss": 2.024, "step": 195 }, { "epoch": 0.55, "learning_rate": 4.116183734996509e-05, "loss": 1.3701, "step": 200 }, { "epoch": 0.57, "learning_rate": 4.074424782402958e-05, "loss": 1.7944, "step": 205 }, { "epoch": 0.58, "learning_rate": 4.031924745696915e-05, "loss": 1.561, "step": 210 }, { "epoch": 0.59, "learning_rate": 3.988703629709564e-05, "loss": 1.828, "step": 215 }, { "epoch": 0.61, "learning_rate": 3.944781778685189e-05, "loss": 1.2159, "step": 220 }, { "epoch": 0.62, "learning_rate": 3.900179866705112e-05, "loss": 1.6311, "step": 225 }, { "epoch": 0.64, "learning_rate": 3.854918887956369e-05, "loss": 1.8962, "step": 230 }, { "epoch": 0.65, "learning_rate": 3.809020146849714e-05, "loss": 1.3081, "step": 235 }, { "epoch": 0.66, "learning_rate": 3.7625052479916015e-05, "loss": 1.6926, "step": 240 }, { "epoch": 0.68, "learning_rate": 3.715396086014869e-05, "loss": 1.9407, "step": 245 }, { "epoch": 0.69, "learning_rate": 3.667714835272895e-05, "loss": 1.3894, "step": 250 }, { "epoch": 0.7, "learning_rate": 3.6194839394021e-05, "loss": 1.2711, "step": 255 }, { "epoch": 0.72, "learning_rate": 3.570726100757693e-05, "loss": 2.0264, "step": 260 }, { "epoch": 0.73, "learning_rate": 3.5214642697276426e-05, "loss": 2.0718, "step": 265 }, { "epoch": 0.75, "learning_rate": 3.471721633929885e-05, "loss": 1.3508, "step": 270 }, { "epoch": 0.76, "learning_rate": 3.421521607297888e-05, "loss": 1.4645, "step": 275 }, { "epoch": 0.77, "learning_rate": 3.370887819059672e-05, "loss": 1.9456, "step": 280 }, { "epoch": 0.79, "learning_rate": 3.319844102615497e-05, "loss": 1.6282, "step": 285 }, { "epoch": 0.8, "learning_rate": 3.268414484319445e-05, "loss": 1.7652, "step": 290 }, { "epoch": 0.81, "learning_rate": 3.216623172170183e-05, "loss": 1.6169, "step": 295 }, { "epoch": 0.83, "learning_rate": 3.164494544416215e-05, "loss": 1.3647, "step": 300 }, { "epoch": 0.84, "learning_rate": 3.11205313808101e-05, "loss": 2.0791, "step": 305 }, { "epoch": 0.86, "learning_rate": 3.059323637413385e-05, "loss": 1.8486, "step": 310 }, { "epoch": 0.87, "learning_rate": 3.0063308622685903e-05, "loss": 1.0746, "step": 315 }, { "epoch": 0.88, "learning_rate": 2.9530997564255725e-05, "loss": 0.779, "step": 320 }, { "epoch": 0.9, "learning_rate": 2.8996553758458916e-05, "loss": 1.7561, "step": 325 }, { "epoch": 0.91, "learning_rate": 2.8460228768798506e-05, "loss": 0.8527, "step": 330 }, { "epoch": 0.93, "learning_rate": 2.792227504425359e-05, "loss": 1.211, "step": 335 }, { "epoch": 0.94, "learning_rate": 2.738294580045119e-05, "loss": 1.4435, "step": 340 }, { "epoch": 0.95, "learning_rate": 2.6842494900477365e-05, "loss": 1.4192, "step": 345 }, { "epoch": 0.97, "learning_rate": 2.6301176735383382e-05, "loss": 1.7033, "step": 350 }, { "epoch": 0.98, "learning_rate": 2.57592461044435e-05, "loss": 1.4952, "step": 355 }, { "epoch": 0.99, "learning_rate": 2.521695809522061e-05, "loss": 1.3207, "step": 360 }, { "epoch": 1.01, "learning_rate": 2.467456796349607e-05, "loss": 1.2689, "step": 365 }, { "epoch": 1.02, "learning_rate": 2.4132331013120453e-05, "loss": 1.5812, "step": 370 }, { "epoch": 1.04, "learning_rate": 2.3590502475841642e-05, "loss": 1.8583, "step": 375 }, { "epoch": 1.05, "learning_rate": 2.304933739116688e-05, "loss": 1.7619, "step": 380 }, { "epoch": 1.06, "learning_rate": 2.2509090486315246e-05, "loss": 1.5837, "step": 385 }, { "epoch": 1.08, "learning_rate": 2.1970016056317203e-05, "loss": 1.6458, "step": 390 }, { "epoch": 1.09, "learning_rate": 2.1432367844317558e-05, "loss": 1.4172, "step": 395 }, { "epoch": 1.1, "learning_rate": 2.0896398922138122e-05, "loss": 1.4327, "step": 400 }, { "epoch": 1.12, "learning_rate": 2.0362361571156505e-05, "loss": 1.0046, "step": 405 }, { "epoch": 1.13, "learning_rate": 1.9830507163556816e-05, "loss": 1.2355, "step": 410 }, { "epoch": 1.15, "learning_rate": 1.930108604400846e-05, "loss": 1.224, "step": 415 }, { "epoch": 1.16, "learning_rate": 1.8774347411828472e-05, "loss": 1.7486, "step": 420 }, { "epoch": 1.17, "learning_rate": 1.825053920368306e-05, "loss": 1.6091, "step": 425 }, { "epoch": 1.19, "learning_rate": 1.772990797688344e-05, "loss": 1.4323, "step": 430 }, { "epoch": 1.2, "learning_rate": 1.7212698793330916e-05, "loss": 1.0638, "step": 435 }, { "epoch": 1.22, "learning_rate": 1.6699155104165904e-05, "loss": 1.6245, "step": 440 }, { "epoch": 1.23, "learning_rate": 1.61895186351751e-05, "loss": 1.1679, "step": 445 }, { "epoch": 1.24, "learning_rate": 1.568402927301076e-05, "loss": 1.7155, "step": 450 }, { "epoch": 1.26, "learning_rate": 1.5182924952275768e-05, "loss": 1.2734, "step": 455 }, { "epoch": 1.27, "learning_rate": 1.4686441543527374e-05, "loss": 1.7568, "step": 460 }, { "epoch": 1.28, "learning_rate": 1.4194812742252638e-05, "loss": 1.7407, "step": 465 }, { "epoch": 1.3, "learning_rate": 1.3708269958867565e-05, "loss": 1.5895, "step": 470 }, { "epoch": 1.31, "learning_rate": 1.322704220979187e-05, "loss": 2.2967, "step": 475 }, { "epoch": 1.33, "learning_rate": 1.2751356009650681e-05, "loss": 1.4595, "step": 480 }, { "epoch": 1.34, "learning_rate": 1.2281435264653665e-05, "loss": 1.6341, "step": 485 }, { "epoch": 1.35, "learning_rate": 1.1817501167202099e-05, "loss": 1.3562, "step": 490 }, { "epoch": 1.37, "learning_rate": 1.1359772091773263e-05, "loss": 1.6777, "step": 495 }, { "epoch": 1.38, "learning_rate": 1.0908463492131227e-05, "loss": 1.0831, "step": 500 }, { "epoch": 1.4, "learning_rate": 1.0463787799912465e-05, "loss": 1.0575, "step": 505 }, { "epoch": 1.41, "learning_rate": 1.0025954324633948e-05, "loss": 1.3363, "step": 510 }, { "epoch": 1.42, "learning_rate": 9.595169155170852e-06, "loss": 0.9582, "step": 515 }, { "epoch": 1.44, "learning_rate": 9.171635062750189e-06, "loss": 1.8537, "step": 520 }, { "epoch": 1.45, "learning_rate": 8.755551405506143e-06, "loss": 1.372, "step": 525 }, { "epoch": 1.46, "learning_rate": 8.347114034641806e-06, "loss": 1.6832, "step": 530 }, { "epoch": 1.48, "learning_rate": 7.9465152022418e-06, "loss": 0.8202, "step": 535 }, { "epoch": 1.49, "learning_rate": 7.5539434707789266e-06, "loss": 1.4924, "step": 540 }, { "epoch": 1.51, "learning_rate": 7.169583624357451e-06, "loss": 0.8526, "step": 545 }, { "epoch": 1.52, "learning_rate": 6.793616581735062e-06, "loss": 1.8087, "step": 550 }, { "epoch": 1.53, "learning_rate": 6.42621931116405e-06, "loss": 1.5891, "step": 555 }, { "epoch": 1.55, "learning_rate": 6.067564747092094e-06, "loss": 1.9362, "step": 560 }, { "epoch": 1.56, "learning_rate": 5.717821708761822e-06, "loss": 1.0487, "step": 565 }, { "epoch": 1.57, "learning_rate": 5.377154820747271e-06, "loss": 1.6339, "step": 570 }, { "epoch": 1.59, "learning_rate": 5.045724435464874e-06, "loss": 1.2953, "step": 575 }, { "epoch": 1.6, "learning_rate": 4.72368655769535e-06, "loss": 1.0077, "step": 580 }, { "epoch": 1.62, "learning_rate": 4.411192771152004e-06, "loss": 1.1702, "step": 585 }, { "epoch": 1.63, "learning_rate": 4.108390167130044e-06, "loss": 1.5571, "step": 590 }, { "epoch": 1.64, "learning_rate": 3.8154212752704976e-06, "loss": 1.3669, "step": 595 }, { "epoch": 1.66, "learning_rate": 3.532423996471307e-06, "loss": 1.6162, "step": 600 }, { "epoch": 1.67, "learning_rate": 3.259531537977123e-06, "loss": 0.4967, "step": 605 }, { "epoch": 1.69, "learning_rate": 2.9968723506784953e-06, "loss": 1.066, "step": 610 }, { "epoch": 1.7, "learning_rate": 2.7445700686498545e-06, "loss": 1.4617, "step": 615 }, { "epoch": 1.71, "learning_rate": 2.502743450954714e-06, "loss": 1.3762, "step": 620 }, { "epoch": 1.73, "learning_rate": 2.271506325745662e-06, "loss": 1.6011, "step": 625 }, { "epoch": 1.74, "learning_rate": 2.050967536685233e-06, "loss": 1.7583, "step": 630 }, { "epoch": 1.75, "learning_rate": 1.8412308917130611e-06, "loss": 1.4909, "step": 635 }, { "epoch": 1.77, "learning_rate": 1.6423951141833011e-06, "loss": 0.8267, "step": 640 }, { "epoch": 1.78, "learning_rate": 1.4545537963954247e-06, "loss": 1.3815, "step": 645 }, { "epoch": 1.8, "learning_rate": 1.2777953555401678e-06, "loss": 1.618, "step": 650 }, { "epoch": 1.81, "learning_rate": 1.1122029920814236e-06, "loss": 1.6904, "step": 655 }, { "epoch": 1.82, "learning_rate": 9.578546505936676e-07, "loss": 1.1602, "step": 660 }, { "epoch": 1.84, "learning_rate": 8.148229830733295e-07, "loss": 1.7147, "step": 665 }, { "epoch": 1.85, "learning_rate": 6.831753147413827e-07, "loss": 1.3837, "step": 670 }, { "epoch": 1.86, "learning_rate": 5.629736123532653e-07, "loss": 1.5324, "step": 675 }, { "epoch": 1.88, "learning_rate": 4.5427445503103684e-07, "loss": 0.9563, "step": 680 }, { "epoch": 1.89, "learning_rate": 3.571290076314959e-07, "loss": 1.2545, "step": 685 }, { "epoch": 1.91, "learning_rate": 2.7158299666280864e-07, "loss": 1.5162, "step": 690 }, { "epoch": 1.92, "learning_rate": 1.9767668876096713e-07, "loss": 1.3806, "step": 695 }, { "epoch": 1.93, "learning_rate": 1.3544487173623443e-07, "loss": 1.4588, "step": 700 }, { "epoch": 1.95, "learning_rate": 8.491683819846219e-08, "loss": 2.3318, "step": 705 }, { "epoch": 1.96, "learning_rate": 4.611637176901162e-08, "loss": 1.021, "step": 710 }, { "epoch": 1.98, "learning_rate": 1.9061735885772536e-08, "loss": 1.5645, "step": 715 }, { "epoch": 1.99, "learning_rate": 3.76566520653987e-09, "loss": 1.6854, "step": 720 }, { "epoch": 2.0, "step": 724, "total_flos": 3.95850929775575e+16, "train_loss": 1.5255728854658854, "train_runtime": 1381.7862, "train_samples_per_second": 0.524, "train_steps_per_second": 0.524 } ], "logging_steps": 5, "max_steps": 724, "num_train_epochs": 2, "save_steps": 5000, "total_flos": 3.95850929775575e+16, "trial_name": null, "trial_params": null }