{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 200, "global_step": 10600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 1.4528301886792454e-06, "loss": 129.1763, "step": 100 }, { "epoch": 1.89, "learning_rate": 3.3207547169811323e-06, "loss": 118.0881, "step": 200 }, { "epoch": 2.83, "learning_rate": 5.188679245283019e-06, "loss": 98.9424, "step": 300 }, { "epoch": 3.77, "learning_rate": 7.0754716981132075e-06, "loss": 43.3696, "step": 400 }, { "epoch": 4.72, "learning_rate": 8.962264150943398e-06, "loss": 23.1622, "step": 500 }, { "epoch": 5.66, "learning_rate": 1.0849056603773586e-05, "loss": 16.4914, "step": 600 }, { "epoch": 6.6, "learning_rate": 1.2735849056603775e-05, "loss": 11.8071, "step": 700 }, { "epoch": 7.55, "learning_rate": 1.4622641509433963e-05, "loss": 8.6248, "step": 800 }, { "epoch": 8.49, "learning_rate": 1.650943396226415e-05, "loss": 6.542, "step": 900 }, { "epoch": 9.43, "learning_rate": 1.839622641509434e-05, "loss": 5.467, "step": 1000 }, { "epoch": 10.38, "learning_rate": 1.9999878001459436e-05, "loss": 4.8661, "step": 1100 }, { "epoch": 11.32, "learning_rate": 1.9992830039282252e-05, "loss": 4.5895, "step": 1200 }, { "epoch": 12.26, "learning_rate": 1.997494649494382e-05, "loss": 4.4634, "step": 1300 }, { "epoch": 13.21, "learning_rate": 1.994624676020936e-05, "loss": 4.4113, "step": 1400 }, { "epoch": 14.15, "learning_rate": 1.990676195522527e-05, "loss": 4.3831, "step": 1500 }, { "epoch": 15.09, "learning_rate": 1.9856534894774423e-05, "loss": 4.3747, "step": 1600 }, { "epoch": 16.04, "learning_rate": 1.9795620041850602e-05, "loss": 4.3661, "step": 1700 }, { "epoch": 16.98, "learning_rate": 1.972408344860231e-05, "loss": 4.3533, "step": 1800 }, { "epoch": 17.92, "learning_rate": 1.9642002684710065e-05, "loss": 4.3362, "step": 1900 }, { "epoch": 18.87, "learning_rate": 1.9549466753274843e-05, "loss": 4.3183, "step": 2000 }, { "epoch": 19.81, "learning_rate": 1.9446575994308842e-05, "loss": 4.3012, "step": 2100 }, { "epoch": 20.75, "learning_rate": 1.933344197593319e-05, "loss": 4.2824, "step": 2200 }, { "epoch": 21.7, "learning_rate": 1.921018737340071e-05, "loss": 4.2574, "step": 2300 }, { "epoch": 22.64, "learning_rate": 1.9076945836074716e-05, "loss": 4.2355, "step": 2400 }, { "epoch": 23.58, "learning_rate": 1.8933861842508258e-05, "loss": 4.2128, "step": 2500 }, { "epoch": 24.53, "learning_rate": 1.8781090543780876e-05, "loss": 4.1852, "step": 2600 }, { "epoch": 25.47, "learning_rate": 1.8618797595262748e-05, "loss": 4.153, "step": 2700 }, { "epoch": 26.42, "learning_rate": 1.8447158976988656e-05, "loss": 4.0818, "step": 2800 }, { "epoch": 27.36, "learning_rate": 1.8266360802836542e-05, "loss": 3.9215, "step": 2900 }, { "epoch": 28.3, "learning_rate": 1.8076599118717634e-05, "loss": 3.7038, "step": 3000 }, { "epoch": 29.25, "learning_rate": 1.7878079689996806e-05, "loss": 3.3915, "step": 3100 }, { "epoch": 30.19, "learning_rate": 1.7671017778373914e-05, "loss": 2.9503, "step": 3200 }, { "epoch": 31.13, "learning_rate": 1.745563790846781e-05, "loss": 2.5289, "step": 3300 }, { "epoch": 32.08, "learning_rate": 1.7232173624356307e-05, "loss": 2.1726, "step": 3400 }, { "epoch": 33.02, "learning_rate": 1.7000867236335987e-05, "loss": 1.8909, "step": 3500 }, { "epoch": 33.96, "learning_rate": 1.676196955817653e-05, "loss": 1.6745, "step": 3600 }, { "epoch": 34.91, "learning_rate": 1.651573963515436e-05, "loss": 1.5141, "step": 3700 }, { "epoch": 35.85, "learning_rate": 1.6262444463160627e-05, "loss": 1.3735, "step": 3800 }, { "epoch": 36.79, "learning_rate": 1.6002358699188035e-05, "loss": 1.2418, "step": 3900 }, { "epoch": 37.74, "learning_rate": 1.573576436351046e-05, "loss": 1.1395, "step": 4000 }, { "epoch": 38.68, "learning_rate": 1.5462950533878318e-05, "loss": 1.0523, "step": 4100 }, { "epoch": 39.62, "learning_rate": 1.5184213032061217e-05, "loss": 0.9775, "step": 4200 }, { "epoch": 40.57, "learning_rate": 1.490272451088505e-05, "loss": 0.9198, "step": 4300 }, { "epoch": 41.51, "learning_rate": 1.4613104080651812e-05, "loss": 0.8618, "step": 4400 }, { "epoch": 42.45, "learning_rate": 1.4321451438907927e-05, "loss": 0.8094, "step": 4500 }, { "epoch": 43.4, "learning_rate": 1.4022191400246923e-05, "loss": 0.7578, "step": 4600 }, { "epoch": 44.34, "learning_rate": 1.3718569955936632e-05, "loss": 0.7338, "step": 4700 }, { "epoch": 45.28, "learning_rate": 1.3410916333542502e-05, "loss": 0.7023, "step": 4800 }, { "epoch": 46.23, "learning_rate": 1.309956413286455e-05, "loss": 0.6598, "step": 4900 }, { "epoch": 47.17, "learning_rate": 1.2784850964203184e-05, "loss": 0.6316, "step": 5000 }, { "epoch": 48.11, "learning_rate": 1.2467118082276324e-05, "loss": 0.6309, "step": 5100 }, { "epoch": 49.06, "learning_rate": 1.2146710016184722e-05, "loss": 0.5876, "step": 5200 }, { "epoch": 50.0, "learning_rate": 1.1823974195826782e-05, "loss": 0.5738, "step": 5300 }, { "epoch": 50.94, "learning_rate": 1.1499260575167908e-05, "loss": 0.5557, "step": 5400 }, { "epoch": 51.89, "learning_rate": 1.1172921252772938e-05, "loss": 0.5642, "step": 5500 }, { "epoch": 52.83, "learning_rate": 1.0845310090013116e-05, "loss": 0.5474, "step": 5600 }, { "epoch": 53.77, "learning_rate": 1.0516782327361563e-05, "loss": 0.5307, "step": 5700 }, { "epoch": 54.72, "learning_rate": 1.0187694199193371e-05, "loss": 0.5044, "step": 5800 }, { "epoch": 55.66, "learning_rate": 9.858402547507968e-06, "loss": 0.4961, "step": 5900 }, { "epoch": 56.6, "learning_rate": 9.529264434992576e-06, "loss": 0.5012, "step": 6000 }, { "epoch": 57.55, "learning_rate": 9.203919337260903e-06, "loss": 0.4874, "step": 6100 }, { "epoch": 58.49, "learning_rate": 8.876148009288813e-06, "loss": 0.4643, "step": 6200 }, { "epoch": 59.43, "learning_rate": 8.549595314129907e-06, "loss": 0.4569, "step": 6300 }, { "epoch": 60.38, "learning_rate": 8.224615344523123e-06, "loss": 0.4487, "step": 6400 }, { "epoch": 61.32, "learning_rate": 7.901560487844973e-06, "loss": 0.4353, "step": 6500 }, { "epoch": 62.26, "learning_rate": 7.580781044003324e-06, "loss": 0.4271, "step": 6600 }, { "epoch": 63.21, "learning_rate": 7.262624845594721e-06, "loss": 0.4138, "step": 6700 }, { "epoch": 64.15, "learning_rate": 6.947436880737089e-06, "loss": 0.4108, "step": 6800 }, { "epoch": 65.09, "learning_rate": 6.635558918986797e-06, "loss": 0.4056, "step": 6900 }, { "epoch": 66.04, "learning_rate": 6.327329140745751e-06, "loss": 0.3992, "step": 7000 }, { "epoch": 66.98, "learning_rate": 6.023081770560307e-06, "loss": 0.3955, "step": 7100 }, { "epoch": 67.92, "learning_rate": 5.723146714709664e-06, "loss": 0.3944, "step": 7200 }, { "epoch": 68.87, "learning_rate": 5.427849203476738e-06, "loss": 0.3883, "step": 7300 }, { "epoch": 69.81, "learning_rate": 5.140387256937587e-06, "loss": 0.3825, "step": 7400 }, { "epoch": 70.75, "learning_rate": 4.855265798347217e-06, "loss": 0.3769, "step": 7500 }, { "epoch": 71.7, "learning_rate": 4.575722958618642e-06, "loss": 0.3704, "step": 7600 }, { "epoch": 72.64, "learning_rate": 4.302061856027213e-06, "loss": 0.3676, "step": 7700 }, { "epoch": 73.58, "learning_rate": 4.034579231070851e-06, "loss": 0.3682, "step": 7800 }, { "epoch": 74.53, "learning_rate": 3.7735651247037275e-06, "loss": 0.3609, "step": 7900 }, { "epoch": 75.47, "learning_rate": 3.5193025638344956e-06, "loss": 0.3605, "step": 8000 }, { "epoch": 76.42, "learning_rate": 3.272067254430137e-06, "loss": 0.3554, "step": 8100 }, { "epoch": 77.36, "learning_rate": 3.0321272825581327e-06, "loss": 0.3493, "step": 8200 }, { "epoch": 78.3, "learning_rate": 2.7997428236911684e-06, "loss": 0.347, "step": 8300 }, { "epoch": 79.25, "learning_rate": 2.5751658605896003e-06, "loss": 0.3516, "step": 8400 }, { "epoch": 80.19, "learning_rate": 2.3586399100675516e-06, "loss": 0.3489, "step": 8500 }, { "epoch": 81.13, "learning_rate": 2.1503997589389334e-06, "loss": 0.3416, "step": 8600 }, { "epoch": 82.08, "learning_rate": 1.9506712094297386e-06, "loss": 0.3398, "step": 8700 }, { "epoch": 83.02, "learning_rate": 1.7596708343326285e-06, "loss": 0.3412, "step": 8800 }, { "epoch": 83.96, "learning_rate": 1.5776057421693126e-06, "loss": 0.3394, "step": 8900 }, { "epoch": 84.91, "learning_rate": 1.4046733526154088e-06, "loss": 0.3409, "step": 9000 }, { "epoch": 85.85, "learning_rate": 1.2410611824312334e-06, "loss": 0.3364, "step": 9100 }, { "epoch": 86.79, "learning_rate": 1.0884402169237329e-06, "loss": 0.338, "step": 9200 }, { "epoch": 87.74, "learning_rate": 9.438929780459661e-07, "loss": 0.3391, "step": 9300 }, { "epoch": 88.68, "learning_rate": 8.091655991389668e-07, "loss": 0.3327, "step": 9400 }, { "epoch": 89.62, "learning_rate": 6.844041699068349e-07, "loss": 0.3319, "step": 9500 }, { "epoch": 90.57, "learning_rate": 5.708401570907373e-07, "loss": 0.3362, "step": 9600 }, { "epoch": 91.51, "learning_rate": 4.6630268912521757e-07, "loss": 0.3307, "step": 9700 }, { "epoch": 92.45, "learning_rate": 3.7210294932142566e-07, "loss": 0.3338, "step": 9800 }, { "epoch": 93.4, "learning_rate": 2.8834308181833304e-07, "loss": 0.3329, "step": 9900 }, { "epoch": 94.34, "learning_rate": 2.151139104288702e-07, "loss": 0.3297, "step": 10000 }, { "epoch": 95.28, "learning_rate": 1.5249484015643078e-07, "loss": 0.332, "step": 10100 }, { "epoch": 96.23, "learning_rate": 1.0055377109319475e-07, "loss": 0.3324, "step": 10200 }, { "epoch": 97.17, "learning_rate": 5.934702479363341e-08, "loss": 0.3319, "step": 10300 }, { "epoch": 98.11, "learning_rate": 2.8919283203038274e-08, "loss": 0.3301, "step": 10400 }, { "epoch": 99.06, "learning_rate": 9.30354020729496e-09, "loss": 0.333, "step": 10500 }, { "epoch": 100.0, "learning_rate": 5.210658564336335e-10, "loss": 0.3296, "step": 10600 }, { "epoch": 100.0, "step": 10600, "total_flos": 6.483430015500288e+16, "train_loss": 5.64003086198051, "train_runtime": 5748.6483, "train_samples_per_second": 58.866, "train_steps_per_second": 1.844 } ], "logging_steps": 100, "max_steps": 10600, "num_train_epochs": 100, "save_steps": 100, "total_flos": 6.483430015500288e+16, "trial_name": null, "trial_params": null }