{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.024089983172439, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 9.92912827781715e-09, "loss": 2.0407, "step": 100 }, { "epoch": 0.14, "learning_rate": 9.858256555634302e-09, "loss": 2.0254, "step": 200 }, { "epoch": 0.21, "learning_rate": 9.787384833451452e-09, "loss": 2.0, "step": 300 }, { "epoch": 0.28, "learning_rate": 9.716513111268604e-09, "loss": 2.021, "step": 400 }, { "epoch": 0.35, "learning_rate": 9.645641389085754e-09, "loss": 2.021, "step": 500 }, { "epoch": 0.43, "learning_rate": 9.574769666902906e-09, "loss": 2.0231, "step": 600 }, { "epoch": 0.5, "learning_rate": 9.503897944720057e-09, "loss": 2.0069, "step": 700 }, { "epoch": 0.57, "learning_rate": 9.433026222537208e-09, "loss": 2.012, "step": 800 }, { "epoch": 0.64, "learning_rate": 9.362154500354359e-09, "loss": 2.0211, "step": 900 }, { "epoch": 0.71, "learning_rate": 9.291282778171509e-09, "loss": 2.0124, "step": 1000 }, { "epoch": 0.78, "learning_rate": 9.22041105598866e-09, "loss": 2.0022, "step": 1100 }, { "epoch": 0.85, "learning_rate": 9.149539333805812e-09, "loss": 2.0183, "step": 1200 }, { "epoch": 0.92, "learning_rate": 9.078667611622963e-09, "loss": 2.0216, "step": 1300 }, { "epoch": 0.99, "learning_rate": 9.007795889440113e-09, "loss": 2.0148, "step": 1400 }, { "epoch": 1.06, "learning_rate": 8.936924167257265e-09, "loss": 2.037, "step": 1500 }, { "epoch": 1.13, "learning_rate": 8.866052445074415e-09, "loss": 2.0141, "step": 1600 }, { "epoch": 1.2, "learning_rate": 8.795180722891567e-09, "loss": 2.0392, "step": 1700 }, { "epoch": 1.28, "learning_rate": 8.724309000708719e-09, "loss": 2.0102, "step": 1800 }, { "epoch": 1.35, "learning_rate": 8.653437278525869e-09, "loss": 2.0282, "step": 1900 }, { "epoch": 1.42, "learning_rate": 8.582565556343019e-09, "loss": 2.0227, "step": 2000 }, { "epoch": 1.49, "learning_rate": 8.51169383416017e-09, "loss": 2.0205, "step": 2100 }, { "epoch": 1.56, "learning_rate": 8.440822111977321e-09, "loss": 2.0152, "step": 2200 }, { "epoch": 1.63, "learning_rate": 8.369950389794471e-09, "loss": 2.0001, "step": 2300 }, { "epoch": 1.7, "learning_rate": 8.299078667611623e-09, "loss": 2.0182, "step": 2400 }, { "epoch": 1.77, "learning_rate": 8.228206945428775e-09, "loss": 2.0066, "step": 2500 }, { "epoch": 1.84, "learning_rate": 8.157335223245925e-09, "loss": 2.0176, "step": 2600 }, { "epoch": 1.91, "learning_rate": 8.086463501063075e-09, "loss": 2.0317, "step": 2700 }, { "epoch": 1.98, "learning_rate": 8.015591778880227e-09, "loss": 2.0088, "step": 2800 }, { "epoch": 2.06, "learning_rate": 7.944720056697377e-09, "loss": 2.0326, "step": 2900 }, { "epoch": 2.13, "learning_rate": 7.873848334514529e-09, "loss": 2.0085, "step": 3000 }, { "epoch": 2.2, "learning_rate": 7.802976612331681e-09, "loss": 2.0435, "step": 3100 }, { "epoch": 2.27, "learning_rate": 7.732104890148831e-09, "loss": 2.0367, "step": 3200 }, { "epoch": 2.34, "learning_rate": 7.661233167965981e-09, "loss": 2.0237, "step": 3300 }, { "epoch": 2.41, "learning_rate": 7.590361445783133e-09, "loss": 2.0296, "step": 3400 }, { "epoch": 2.48, "learning_rate": 7.519489723600283e-09, "loss": 2.0079, "step": 3500 }, { "epoch": 2.55, "learning_rate": 7.4486180014174344e-09, "loss": 2.0106, "step": 3600 }, { "epoch": 2.62, "learning_rate": 7.3777462792345854e-09, "loss": 2.0207, "step": 3700 }, { "epoch": 2.69, "learning_rate": 7.306874557051737e-09, "loss": 2.0215, "step": 3800 }, { "epoch": 2.76, "learning_rate": 7.2360028348688875e-09, "loss": 2.018, "step": 3900 }, { "epoch": 2.83, "learning_rate": 7.1651311126860385e-09, "loss": 1.996, "step": 4000 }, { "epoch": 2.91, "learning_rate": 7.0942593905031895e-09, "loss": 2.0043, "step": 4100 }, { "epoch": 2.98, "learning_rate": 7.0233876683203405e-09, "loss": 2.0007, "step": 4200 }, { "epoch": 3.05, "learning_rate": 6.952515946137491e-09, "loss": 2.0034, "step": 4300 }, { "epoch": 3.12, "learning_rate": 6.881644223954642e-09, "loss": 2.0066, "step": 4400 }, { "epoch": 3.19, "learning_rate": 6.810772501771794e-09, "loss": 1.9741, "step": 4500 }, { "epoch": 3.26, "learning_rate": 6.739900779588945e-09, "loss": 1.9767, "step": 4600 }, { "epoch": 3.33, "learning_rate": 6.669029057406096e-09, "loss": 1.9898, "step": 4700 }, { "epoch": 3.4, "learning_rate": 6.598157335223246e-09, "loss": 1.9723, "step": 4800 }, { "epoch": 3.47, "learning_rate": 6.527285613040397e-09, "loss": 1.9864, "step": 4900 }, { "epoch": 3.54, "learning_rate": 6.456413890857548e-09, "loss": 1.9762, "step": 5000 }, { "epoch": 3.61, "learning_rate": 6.385542168674698e-09, "loss": 2.0763, "step": 5100 }, { "epoch": 3.69, "learning_rate": 6.31467044649185e-09, "loss": 2.065, "step": 5200 }, { "epoch": 3.76, "learning_rate": 6.243798724309001e-09, "loss": 2.06, "step": 5300 }, { "epoch": 3.83, "learning_rate": 6.172927002126152e-09, "loss": 2.0403, "step": 5400 }, { "epoch": 3.9, "learning_rate": 6.102055279943303e-09, "loss": 2.0715, "step": 5500 }, { "epoch": 3.97, "learning_rate": 6.031183557760453e-09, "loss": 2.0762, "step": 5600 }, { "epoch": 4.04, "learning_rate": 5.960311835577604e-09, "loss": 2.0365, "step": 5700 }, { "epoch": 4.11, "learning_rate": 5.889440113394756e-09, "loss": 2.0154, "step": 5800 }, { "epoch": 4.18, "learning_rate": 5.818568391211907e-09, "loss": 2.0166, "step": 5900 }, { "epoch": 4.25, "learning_rate": 5.747696669029058e-09, "loss": 2.0339, "step": 6000 }, { "epoch": 4.32, "learning_rate": 5.676824946846208e-09, "loss": 2.0451, "step": 6100 }, { "epoch": 4.39, "learning_rate": 5.605953224663359e-09, "loss": 2.023, "step": 6200 }, { "epoch": 4.46, "learning_rate": 5.53508150248051e-09, "loss": 2.0222, "step": 6300 }, { "epoch": 4.54, "learning_rate": 5.464209780297661e-09, "loss": 2.0037, "step": 6400 }, { "epoch": 4.61, "learning_rate": 5.393338058114813e-09, "loss": 2.0116, "step": 6500 }, { "epoch": 4.68, "learning_rate": 5.322466335931963e-09, "loss": 2.0236, "step": 6600 }, { "epoch": 4.75, "learning_rate": 5.251594613749114e-09, "loss": 2.0302, "step": 6700 }, { "epoch": 4.82, "learning_rate": 5.180722891566265e-09, "loss": 2.0236, "step": 6800 }, { "epoch": 4.89, "learning_rate": 5.109851169383416e-09, "loss": 2.015, "step": 6900 }, { "epoch": 4.96, "learning_rate": 5.038979447200567e-09, "loss": 2.0037, "step": 7000 }, { "epoch": 5.03, "learning_rate": 4.9681077250177184e-09, "loss": 2.0319, "step": 7100 }, { "epoch": 5.1, "learning_rate": 4.897236002834869e-09, "loss": 2.0271, "step": 7200 }, { "epoch": 5.17, "learning_rate": 4.82636428065202e-09, "loss": 2.0337, "step": 7300 }, { "epoch": 5.24, "learning_rate": 4.755492558469171e-09, "loss": 2.0305, "step": 7400 }, { "epoch": 5.32, "learning_rate": 4.684620836286322e-09, "loss": 2.0261, "step": 7500 }, { "epoch": 5.39, "learning_rate": 4.613749114103473e-09, "loss": 2.0234, "step": 7600 }, { "epoch": 5.46, "learning_rate": 4.542877391920624e-09, "loss": 2.0209, "step": 7700 }, { "epoch": 5.53, "learning_rate": 4.472005669737775e-09, "loss": 2.0166, "step": 7800 }, { "epoch": 5.6, "learning_rate": 4.401133947554926e-09, "loss": 2.0268, "step": 7900 }, { "epoch": 5.67, "learning_rate": 4.330262225372077e-09, "loss": 2.0199, "step": 8000 }, { "epoch": 5.74, "learning_rate": 4.259390503189228e-09, "loss": 2.005, "step": 8100 }, { "epoch": 5.81, "learning_rate": 4.188518781006379e-09, "loss": 2.0096, "step": 8200 }, { "epoch": 5.88, "learning_rate": 4.117647058823529e-09, "loss": 2.0239, "step": 8300 }, { "epoch": 5.95, "learning_rate": 4.046775336640681e-09, "loss": 2.0096, "step": 8400 }, { "epoch": 6.02, "learning_rate": 3.975903614457831e-09, "loss": 2.0432, "step": 8500 } ], "max_steps": 14110, "num_train_epochs": 10, "total_flos": 6.087535537968384e+17, "trial_name": null, "trial_params": null }