{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 125600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 2.988296178343949e-05, "loss": 7.6975, "step": 500 }, { "epoch": 0.8, "learning_rate": 2.9763535031847134e-05, "loss": 4.413, "step": 1000 }, { "epoch": 1.19, "learning_rate": 2.964410828025478e-05, "loss": 3.5707, "step": 1500 }, { "epoch": 1.59, "learning_rate": 2.952468152866242e-05, "loss": 3.4966, "step": 2000 }, { "epoch": 1.99, "learning_rate": 2.9405254777070064e-05, "loss": 3.7477, "step": 2500 }, { "epoch": 2.39, "learning_rate": 2.9285828025477707e-05, "loss": 3.4618, "step": 3000 }, { "epoch": 2.79, "learning_rate": 2.916640127388535e-05, "loss": 3.4774, "step": 3500 }, { "epoch": 3.18, "learning_rate": 2.9046974522292994e-05, "loss": 3.4214, "step": 4000 }, { "epoch": 3.58, "learning_rate": 2.8927786624203823e-05, "loss": 2.5773, "step": 4500 }, { "epoch": 3.98, "learning_rate": 2.8808359872611467e-05, "loss": 3.2549, "step": 5000 }, { "epoch": 4.38, "learning_rate": 2.868893312101911e-05, "loss": 2.4922, "step": 5500 }, { "epoch": 4.78, "learning_rate": 2.856950636942675e-05, "loss": 2.6262, "step": 6000 }, { "epoch": 5.18, "learning_rate": 2.845031847133758e-05, "loss": 2.5596, "step": 6500 }, { "epoch": 5.57, "learning_rate": 2.8331130573248408e-05, "loss": 2.7913, "step": 7000 }, { "epoch": 5.97, "learning_rate": 2.821170382165605e-05, "loss": 3.0112, "step": 7500 }, { "epoch": 6.37, "learning_rate": 2.8092277070063698e-05, "loss": 2.6159, "step": 8000 }, { "epoch": 6.77, "learning_rate": 2.7972850318471338e-05, "loss": 3.9984, "step": 8500 }, { "epoch": 7.17, "learning_rate": 2.785342356687898e-05, "loss": 2.6099, "step": 9000 }, { "epoch": 7.56, "learning_rate": 2.7733996815286625e-05, "loss": 2.5667, "step": 9500 }, { "epoch": 7.96, "learning_rate": 2.7614570063694268e-05, "loss": 2.5934, "step": 10000 }, { "epoch": 8.36, "learning_rate": 2.7495382165605094e-05, "loss": 2.7481, "step": 10500 }, { "epoch": 8.76, "learning_rate": 2.737595541401274e-05, "loss": 3.0303, "step": 11000 }, { "epoch": 9.16, "learning_rate": 2.7256528662420384e-05, "loss": 2.8187, "step": 11500 }, { "epoch": 9.55, "learning_rate": 2.7137101910828027e-05, "loss": 2.8765, "step": 12000 }, { "epoch": 9.95, "learning_rate": 2.7017914012738853e-05, "loss": 2.8891, "step": 12500 }, { "epoch": 10.35, "learning_rate": 2.6898487261146496e-05, "loss": 2.6341, "step": 13000 }, { "epoch": 10.75, "learning_rate": 2.677906050955414e-05, "loss": 2.9864, "step": 13500 }, { "epoch": 11.15, "learning_rate": 2.6659633757961786e-05, "loss": 2.3945, "step": 14000 }, { "epoch": 11.54, "learning_rate": 2.6540207006369426e-05, "loss": 2.4969, "step": 14500 }, { "epoch": 11.94, "learning_rate": 2.6421019108280255e-05, "loss": 3.4386, "step": 15000 }, { "epoch": 12.34, "learning_rate": 2.63015923566879e-05, "loss": 2.1529, "step": 15500 }, { "epoch": 12.74, "learning_rate": 2.6182165605095542e-05, "loss": 2.4111, "step": 16000 }, { "epoch": 13.14, "learning_rate": 2.6062738853503186e-05, "loss": 2.1625, "step": 16500 }, { "epoch": 13.54, "learning_rate": 2.594355095541401e-05, "loss": 2.221, "step": 17000 }, { "epoch": 13.93, "learning_rate": 2.5824124203821655e-05, "loss": 2.3075, "step": 17500 }, { "epoch": 14.33, "learning_rate": 2.57046974522293e-05, "loss": 2.0438, "step": 18000 }, { "epoch": 14.73, "learning_rate": 2.5585270700636945e-05, "loss": 2.2311, "step": 18500 }, { "epoch": 15.13, "learning_rate": 2.5465843949044585e-05, "loss": 2.3121, "step": 19000 }, { "epoch": 15.53, "learning_rate": 2.5346417197452228e-05, "loss": 1.8979, "step": 19500 }, { "epoch": 15.92, "learning_rate": 2.5226990445859875e-05, "loss": 2.0793, "step": 20000 }, { "epoch": 16.32, "learning_rate": 2.5107563694267518e-05, "loss": 2.4946, "step": 20500 }, { "epoch": 16.72, "learning_rate": 2.4988136942675158e-05, "loss": 2.6039, "step": 21000 }, { "epoch": 17.12, "learning_rate": 2.4868949044585987e-05, "loss": 2.3325, "step": 21500 }, { "epoch": 17.52, "learning_rate": 2.4749761146496816e-05, "loss": 2.5405, "step": 22000 }, { "epoch": 17.91, "learning_rate": 2.463033439490446e-05, "loss": 2.2666, "step": 22500 }, { "epoch": 18.31, "learning_rate": 2.4510907643312103e-05, "loss": 1.8855, "step": 23000 }, { "epoch": 18.71, "learning_rate": 2.439171974522293e-05, "loss": 2.5188, "step": 23500 }, { "epoch": 19.11, "learning_rate": 2.4272292993630572e-05, "loss": 2.0856, "step": 24000 }, { "epoch": 19.51, "learning_rate": 2.415286624203822e-05, "loss": 2.04, "step": 24500 }, { "epoch": 19.9, "learning_rate": 2.4033439490445862e-05, "loss": 2.0085, "step": 25000 }, { "epoch": 20.3, "learning_rate": 2.3914012738853502e-05, "loss": 2.2144, "step": 25500 }, { "epoch": 20.7, "learning_rate": 2.3794585987261145e-05, "loss": 2.1031, "step": 26000 }, { "epoch": 21.1, "learning_rate": 2.3675159235668792e-05, "loss": 2.1336, "step": 26500 }, { "epoch": 21.5, "learning_rate": 2.3555732484076436e-05, "loss": 2.3206, "step": 27000 }, { "epoch": 21.89, "learning_rate": 2.3436783439490447e-05, "loss": 2.0794, "step": 27500 }, { "epoch": 22.29, "learning_rate": 2.3317356687898087e-05, "loss": 2.0694, "step": 28000 }, { "epoch": 22.69, "learning_rate": 2.3197929936305734e-05, "loss": 2.0866, "step": 28500 }, { "epoch": 23.09, "learning_rate": 2.3078503184713377e-05, "loss": 2.2699, "step": 29000 }, { "epoch": 23.49, "learning_rate": 2.295907643312102e-05, "loss": 2.0621, "step": 29500 }, { "epoch": 23.89, "learning_rate": 2.283964968152866e-05, "loss": 2.1662, "step": 30000 }, { "epoch": 24.28, "learning_rate": 2.2720222929936307e-05, "loss": 1.9482, "step": 30500 }, { "epoch": 24.68, "learning_rate": 2.260079617834395e-05, "loss": 1.9606, "step": 31000 }, { "epoch": 25.08, "learning_rate": 2.2481369426751594e-05, "loss": 2.2075, "step": 31500 }, { "epoch": 25.48, "learning_rate": 2.2361942675159234e-05, "loss": 1.9507, "step": 32000 }, { "epoch": 25.88, "learning_rate": 2.224251592356688e-05, "loss": 1.99, "step": 32500 }, { "epoch": 26.27, "learning_rate": 2.2123089171974524e-05, "loss": 2.0022, "step": 33000 }, { "epoch": 26.67, "learning_rate": 2.2003662420382167e-05, "loss": 2.0491, "step": 33500 }, { "epoch": 27.07, "learning_rate": 2.1884235668789807e-05, "loss": 2.1034, "step": 34000 }, { "epoch": 27.47, "learning_rate": 2.1765047770700636e-05, "loss": 2.0613, "step": 34500 }, { "epoch": 27.87, "learning_rate": 2.164562101910828e-05, "loss": 2.0392, "step": 35000 }, { "epoch": 28.26, "learning_rate": 2.1526194267515926e-05, "loss": 1.9726, "step": 35500 }, { "epoch": 28.66, "learning_rate": 2.1406767515923566e-05, "loss": 1.6027, "step": 36000 }, { "epoch": 29.06, "learning_rate": 2.128734076433121e-05, "loss": 2.5043, "step": 36500 }, { "epoch": 29.46, "learning_rate": 2.1167914012738853e-05, "loss": 2.111, "step": 37000 }, { "epoch": 29.86, "learning_rate": 2.10484872611465e-05, "loss": 2.0965, "step": 37500 }, { "epoch": 30.25, "learning_rate": 2.092906050955414e-05, "loss": 1.9389, "step": 38000 }, { "epoch": 30.65, "learning_rate": 2.0809633757961783e-05, "loss": 2.1853, "step": 38500 }, { "epoch": 31.05, "learning_rate": 2.0690207006369427e-05, "loss": 1.946, "step": 39000 }, { "epoch": 31.45, "learning_rate": 2.0570780254777073e-05, "loss": 2.0071, "step": 39500 }, { "epoch": 31.85, "learning_rate": 2.0451831210191085e-05, "loss": 2.2209, "step": 40000 }, { "epoch": 32.25, "learning_rate": 2.0332404458598725e-05, "loss": 2.0641, "step": 40500 }, { "epoch": 32.64, "learning_rate": 2.021297770700637e-05, "loss": 1.9674, "step": 41000 }, { "epoch": 33.04, "learning_rate": 2.0093550955414015e-05, "loss": 2.0169, "step": 41500 }, { "epoch": 33.44, "learning_rate": 1.9974124203821658e-05, "loss": 2.0076, "step": 42000 }, { "epoch": 33.84, "learning_rate": 1.9854936305732484e-05, "loss": 2.2492, "step": 42500 }, { "epoch": 34.24, "learning_rate": 1.9735509554140127e-05, "loss": 1.7799, "step": 43000 }, { "epoch": 34.63, "learning_rate": 1.9616321656050956e-05, "loss": 2.1029, "step": 43500 }, { "epoch": 35.03, "learning_rate": 1.94968949044586e-05, "loss": 1.8489, "step": 44000 }, { "epoch": 35.43, "learning_rate": 1.937770700636943e-05, "loss": 1.6914, "step": 44500 }, { "epoch": 35.83, "learning_rate": 1.925828025477707e-05, "loss": 2.4834, "step": 45000 }, { "epoch": 36.23, "learning_rate": 1.9138853503184712e-05, "loss": 1.7828, "step": 45500 }, { "epoch": 36.62, "learning_rate": 1.901942675159236e-05, "loss": 1.7454, "step": 46000 }, { "epoch": 37.02, "learning_rate": 1.8900000000000002e-05, "loss": 2.2061, "step": 46500 }, { "epoch": 37.42, "learning_rate": 1.8780573248407642e-05, "loss": 1.9779, "step": 47000 }, { "epoch": 37.82, "learning_rate": 1.8661146496815285e-05, "loss": 1.9194, "step": 47500 }, { "epoch": 38.22, "learning_rate": 1.8541719745222932e-05, "loss": 1.9433, "step": 48000 }, { "epoch": 38.61, "learning_rate": 1.8422292993630575e-05, "loss": 1.9242, "step": 48500 }, { "epoch": 39.01, "learning_rate": 1.8302866242038215e-05, "loss": 1.9324, "step": 49000 }, { "epoch": 39.41, "learning_rate": 1.818343949044586e-05, "loss": 1.9326, "step": 49500 }, { "epoch": 39.81, "learning_rate": 1.8064012738853506e-05, "loss": 1.8975, "step": 50000 }, { "epoch": 40.21, "learning_rate": 1.794458598726115e-05, "loss": 2.0671, "step": 50500 }, { "epoch": 40.61, "learning_rate": 1.782515923566879e-05, "loss": 2.209, "step": 51000 }, { "epoch": 41.0, "learning_rate": 1.7705971337579618e-05, "loss": 1.9638, "step": 51500 }, { "epoch": 41.4, "learning_rate": 1.758654458598726e-05, "loss": 1.6372, "step": 52000 }, { "epoch": 41.8, "learning_rate": 1.7467117834394905e-05, "loss": 2.1012, "step": 52500 }, { "epoch": 42.2, "learning_rate": 1.7347691082802548e-05, "loss": 2.1207, "step": 53000 }, { "epoch": 42.6, "learning_rate": 1.722826433121019e-05, "loss": 1.8135, "step": 53500 }, { "epoch": 42.99, "learning_rate": 1.7108837579617835e-05, "loss": 2.1076, "step": 54000 }, { "epoch": 43.39, "learning_rate": 1.6989410828025478e-05, "loss": 1.8442, "step": 54500 }, { "epoch": 43.79, "learning_rate": 1.686998407643312e-05, "loss": 1.8598, "step": 55000 }, { "epoch": 44.19, "learning_rate": 1.6750557324840765e-05, "loss": 1.8612, "step": 55500 }, { "epoch": 44.59, "learning_rate": 1.6631369426751594e-05, "loss": 1.7965, "step": 56000 }, { "epoch": 44.98, "learning_rate": 1.6511942675159237e-05, "loss": 1.8743, "step": 56500 }, { "epoch": 45.38, "learning_rate": 1.6392515923566877e-05, "loss": 1.8581, "step": 57000 }, { "epoch": 45.78, "learning_rate": 1.627308917197452e-05, "loss": 1.8212, "step": 57500 }, { "epoch": 46.18, "learning_rate": 1.6153662420382167e-05, "loss": 1.7475, "step": 58000 }, { "epoch": 46.58, "learning_rate": 1.603423566878981e-05, "loss": 1.5722, "step": 58500 }, { "epoch": 46.97, "learning_rate": 1.591480891719745e-05, "loss": 1.7929, "step": 59000 }, { "epoch": 47.37, "learning_rate": 1.5795382165605094e-05, "loss": 1.7854, "step": 59500 }, { "epoch": 47.77, "learning_rate": 1.5676194267515923e-05, "loss": 1.6686, "step": 60000 }, { "epoch": 48.17, "learning_rate": 1.5556767515923566e-05, "loss": 1.8969, "step": 60500 }, { "epoch": 48.57, "learning_rate": 1.543734076433121e-05, "loss": 1.6989, "step": 61000 }, { "epoch": 48.96, "learning_rate": 1.5317914012738853e-05, "loss": 1.9204, "step": 61500 }, { "epoch": 49.36, "learning_rate": 1.5198726114649682e-05, "loss": 1.7297, "step": 62000 }, { "epoch": 49.76, "learning_rate": 1.5079299363057326e-05, "loss": 1.6225, "step": 62500 }, { "epoch": 50.16, "learning_rate": 1.4959872611464969e-05, "loss": 1.8492, "step": 63000 }, { "epoch": 50.56, "learning_rate": 1.4840445859872612e-05, "loss": 1.948, "step": 63500 }, { "epoch": 50.96, "learning_rate": 1.472125796178344e-05, "loss": 1.4926, "step": 64000 }, { "epoch": 51.35, "learning_rate": 1.4601831210191083e-05, "loss": 1.8296, "step": 64500 }, { "epoch": 51.75, "learning_rate": 1.4482404458598726e-05, "loss": 1.672, "step": 65000 }, { "epoch": 52.15, "learning_rate": 1.436297770700637e-05, "loss": 1.7793, "step": 65500 }, { "epoch": 52.55, "learning_rate": 1.4243789808917199e-05, "loss": 1.6569, "step": 66000 }, { "epoch": 52.95, "learning_rate": 1.412436305732484e-05, "loss": 2.2555, "step": 66500 }, { "epoch": 53.34, "learning_rate": 1.4004936305732486e-05, "loss": 1.7448, "step": 67000 }, { "epoch": 53.74, "learning_rate": 1.3885509554140127e-05, "loss": 1.8515, "step": 67500 }, { "epoch": 54.14, "learning_rate": 1.3766321656050956e-05, "loss": 1.5126, "step": 68000 }, { "epoch": 54.54, "learning_rate": 1.3646894904458598e-05, "loss": 1.8567, "step": 68500 }, { "epoch": 54.94, "learning_rate": 1.3527468152866243e-05, "loss": 1.8848, "step": 69000 }, { "epoch": 55.33, "learning_rate": 1.340828025477707e-05, "loss": 1.6216, "step": 69500 }, { "epoch": 55.73, "learning_rate": 1.3288853503184714e-05, "loss": 2.0117, "step": 70000 }, { "epoch": 56.13, "learning_rate": 1.3169426751592357e-05, "loss": 1.8113, "step": 70500 }, { "epoch": 56.53, "learning_rate": 1.305e-05, "loss": 1.7053, "step": 71000 }, { "epoch": 56.93, "learning_rate": 1.2930573248407644e-05, "loss": 1.7271, "step": 71500 }, { "epoch": 57.32, "learning_rate": 1.2811385350318471e-05, "loss": 1.6382, "step": 72000 }, { "epoch": 57.72, "learning_rate": 1.2691958598726116e-05, "loss": 1.5688, "step": 72500 }, { "epoch": 58.12, "learning_rate": 1.2572531847133758e-05, "loss": 1.6947, "step": 73000 }, { "epoch": 58.52, "learning_rate": 1.2453105095541403e-05, "loss": 1.5709, "step": 73500 }, { "epoch": 58.92, "learning_rate": 1.2333678343949045e-05, "loss": 1.8282, "step": 74000 }, { "epoch": 59.32, "learning_rate": 1.221425159235669e-05, "loss": 1.6447, "step": 74500 }, { "epoch": 59.71, "learning_rate": 1.2095063694267515e-05, "loss": 2.1066, "step": 75000 }, { "epoch": 60.11, "learning_rate": 1.197563694267516e-05, "loss": 1.7348, "step": 75500 }, { "epoch": 60.51, "learning_rate": 1.1856210191082802e-05, "loss": 1.5, "step": 76000 }, { "epoch": 60.91, "learning_rate": 1.1736783439490447e-05, "loss": 1.8146, "step": 76500 }, { "epoch": 61.31, "learning_rate": 1.1617356687898089e-05, "loss": 1.7271, "step": 77000 }, { "epoch": 61.7, "learning_rate": 1.1497929936305734e-05, "loss": 1.4879, "step": 77500 }, { "epoch": 62.1, "learning_rate": 1.1378503184713375e-05, "loss": 1.7706, "step": 78000 }, { "epoch": 62.5, "learning_rate": 1.125907643312102e-05, "loss": 1.7463, "step": 78500 }, { "epoch": 62.9, "learning_rate": 1.1139888535031846e-05, "loss": 1.4587, "step": 79000 }, { "epoch": 63.3, "learning_rate": 1.1020461783439491e-05, "loss": 1.754, "step": 79500 }, { "epoch": 63.69, "learning_rate": 1.0901035031847133e-05, "loss": 1.6684, "step": 80000 }, { "epoch": 64.09, "learning_rate": 1.0781847133757962e-05, "loss": 1.4427, "step": 80500 }, { "epoch": 64.49, "learning_rate": 1.0662420382165605e-05, "loss": 1.5719, "step": 81000 }, { "epoch": 64.89, "learning_rate": 1.0542993630573249e-05, "loss": 1.7179, "step": 81500 }, { "epoch": 65.29, "learning_rate": 1.0423566878980892e-05, "loss": 1.6181, "step": 82000 }, { "epoch": 65.68, "learning_rate": 1.0304140127388535e-05, "loss": 1.4319, "step": 82500 }, { "epoch": 66.08, "learning_rate": 1.0185191082802548e-05, "loss": 1.78, "step": 83000 }, { "epoch": 66.48, "learning_rate": 1.0065764331210192e-05, "loss": 1.5097, "step": 83500 }, { "epoch": 66.88, "learning_rate": 9.946337579617835e-06, "loss": 1.6796, "step": 84000 }, { "epoch": 67.28, "learning_rate": 9.826910828025479e-06, "loss": 1.7987, "step": 84500 }, { "epoch": 67.68, "learning_rate": 9.707484076433122e-06, "loss": 1.5529, "step": 85000 }, { "epoch": 68.07, "learning_rate": 9.588057324840764e-06, "loss": 1.7102, "step": 85500 }, { "epoch": 68.47, "learning_rate": 9.468630573248409e-06, "loss": 1.7089, "step": 86000 }, { "epoch": 68.87, "learning_rate": 9.34920382165605e-06, "loss": 1.5696, "step": 86500 }, { "epoch": 69.27, "learning_rate": 9.229777070063695e-06, "loss": 1.4092, "step": 87000 }, { "epoch": 69.67, "learning_rate": 9.110589171974523e-06, "loss": 1.7112, "step": 87500 }, { "epoch": 70.06, "learning_rate": 8.991162420382166e-06, "loss": 1.6769, "step": 88000 }, { "epoch": 70.46, "learning_rate": 8.87173566878981e-06, "loss": 1.7474, "step": 88500 }, { "epoch": 70.86, "learning_rate": 8.752547770700637e-06, "loss": 1.3906, "step": 89000 }, { "epoch": 71.26, "learning_rate": 8.63312101910828e-06, "loss": 1.6714, "step": 89500 }, { "epoch": 71.66, "learning_rate": 8.513694267515923e-06, "loss": 1.6018, "step": 90000 }, { "epoch": 72.05, "learning_rate": 8.394267515923567e-06, "loss": 1.4916, "step": 90500 }, { "epoch": 72.45, "learning_rate": 8.27484076433121e-06, "loss": 1.8035, "step": 91000 }, { "epoch": 72.85, "learning_rate": 8.155414012738854e-06, "loss": 1.3387, "step": 91500 }, { "epoch": 73.25, "learning_rate": 8.035987261146497e-06, "loss": 1.5553, "step": 92000 }, { "epoch": 73.65, "learning_rate": 7.91656050955414e-06, "loss": 1.5534, "step": 92500 }, { "epoch": 74.04, "learning_rate": 7.797133757961784e-06, "loss": 1.3926, "step": 93000 }, { "epoch": 74.44, "learning_rate": 7.677707006369427e-06, "loss": 1.5332, "step": 93500 }, { "epoch": 74.84, "learning_rate": 7.5582802547770704e-06, "loss": 1.5935, "step": 94000 }, { "epoch": 75.24, "learning_rate": 7.438853503184713e-06, "loss": 1.6671, "step": 94500 }, { "epoch": 75.64, "learning_rate": 7.319665605095542e-06, "loss": 1.662, "step": 95000 }, { "epoch": 76.04, "learning_rate": 7.200238853503185e-06, "loss": 1.6435, "step": 95500 }, { "epoch": 76.43, "learning_rate": 7.080812101910829e-06, "loss": 1.5282, "step": 96000 }, { "epoch": 76.83, "learning_rate": 6.961385350318472e-06, "loss": 1.6214, "step": 96500 }, { "epoch": 77.23, "learning_rate": 6.842197452229299e-06, "loss": 1.2999, "step": 97000 }, { "epoch": 77.63, "learning_rate": 6.722770700636943e-06, "loss": 1.4861, "step": 97500 }, { "epoch": 78.03, "learning_rate": 6.603343949044586e-06, "loss": 1.493, "step": 98000 }, { "epoch": 78.42, "learning_rate": 6.484156050955414e-06, "loss": 1.5782, "step": 98500 }, { "epoch": 78.82, "learning_rate": 6.364729299363058e-06, "loss": 1.6043, "step": 99000 }, { "epoch": 79.22, "learning_rate": 6.245302547770701e-06, "loss": 1.7383, "step": 99500 }, { "epoch": 79.62, "learning_rate": 6.1258757961783444e-06, "loss": 1.5453, "step": 100000 }, { "epoch": 80.02, "learning_rate": 6.006449044585987e-06, "loss": 1.6063, "step": 100500 }, { "epoch": 80.41, "learning_rate": 5.88702229299363e-06, "loss": 1.5471, "step": 101000 }, { "epoch": 80.81, "learning_rate": 5.767595541401274e-06, "loss": 1.6526, "step": 101500 }, { "epoch": 81.21, "learning_rate": 5.648168789808917e-06, "loss": 1.4817, "step": 102000 }, { "epoch": 81.61, "learning_rate": 5.528980891719745e-06, "loss": 1.3771, "step": 102500 }, { "epoch": 82.01, "learning_rate": 5.409554140127389e-06, "loss": 1.5052, "step": 103000 }, { "epoch": 82.4, "learning_rate": 5.290127388535032e-06, "loss": 1.7024, "step": 103500 }, { "epoch": 82.8, "learning_rate": 5.170700636942675e-06, "loss": 1.6985, "step": 104000 }, { "epoch": 83.2, "learning_rate": 5.051512738853503e-06, "loss": 1.5487, "step": 104500 }, { "epoch": 83.6, "learning_rate": 4.932085987261146e-06, "loss": 1.3429, "step": 105000 }, { "epoch": 84.0, "learning_rate": 4.812659235668789e-06, "loss": 1.6937, "step": 105500 }, { "epoch": 84.39, "learning_rate": 4.6934713375796184e-06, "loss": 1.5639, "step": 106000 }, { "epoch": 84.79, "learning_rate": 4.574044585987262e-06, "loss": 1.5466, "step": 106500 }, { "epoch": 85.19, "learning_rate": 4.454617834394905e-06, "loss": 1.4726, "step": 107000 }, { "epoch": 85.59, "learning_rate": 4.3351910828025485e-06, "loss": 1.5633, "step": 107500 }, { "epoch": 85.99, "learning_rate": 4.215764331210192e-06, "loss": 1.5687, "step": 108000 }, { "epoch": 86.39, "learning_rate": 4.096337579617834e-06, "loss": 1.2633, "step": 108500 }, { "epoch": 86.78, "learning_rate": 3.976910828025478e-06, "loss": 1.5035, "step": 109000 }, { "epoch": 87.18, "learning_rate": 3.857484076433121e-06, "loss": 1.6182, "step": 109500 }, { "epoch": 87.58, "learning_rate": 3.7380573248407645e-06, "loss": 1.6295, "step": 110000 }, { "epoch": 87.98, "learning_rate": 3.618630573248408e-06, "loss": 1.5837, "step": 110500 }, { "epoch": 88.38, "learning_rate": 3.4992038216560512e-06, "loss": 1.3664, "step": 111000 }, { "epoch": 88.77, "learning_rate": 3.3797770700636946e-06, "loss": 1.6751, "step": 111500 }, { "epoch": 89.17, "learning_rate": 3.2605891719745224e-06, "loss": 1.5701, "step": 112000 }, { "epoch": 89.57, "learning_rate": 3.1411624203821653e-06, "loss": 1.4553, "step": 112500 }, { "epoch": 89.97, "learning_rate": 3.0217356687898087e-06, "loss": 1.4953, "step": 113000 }, { "epoch": 90.37, "learning_rate": 2.902308917197452e-06, "loss": 1.6721, "step": 113500 }, { "epoch": 90.76, "learning_rate": 2.7831210191082802e-06, "loss": 1.4171, "step": 114000 }, { "epoch": 91.16, "learning_rate": 2.6636942675159236e-06, "loss": 1.8271, "step": 114500 }, { "epoch": 91.56, "learning_rate": 2.544267515923567e-06, "loss": 1.6508, "step": 115000 }, { "epoch": 91.96, "learning_rate": 2.4248407643312103e-06, "loss": 1.5092, "step": 115500 }, { "epoch": 92.36, "learning_rate": 2.305652866242038e-06, "loss": 1.5075, "step": 116000 }, { "epoch": 92.75, "learning_rate": 2.1864649681528663e-06, "loss": 1.5702, "step": 116500 }, { "epoch": 93.15, "learning_rate": 2.0670382165605097e-06, "loss": 1.5646, "step": 117000 }, { "epoch": 93.55, "learning_rate": 1.947611464968153e-06, "loss": 1.4956, "step": 117500 }, { "epoch": 93.95, "learning_rate": 1.8281847133757964e-06, "loss": 1.5299, "step": 118000 }, { "epoch": 94.35, "learning_rate": 1.7087579617834395e-06, "loss": 1.5027, "step": 118500 }, { "epoch": 94.75, "learning_rate": 1.5893312101910827e-06, "loss": 1.5207, "step": 119000 }, { "epoch": 95.14, "learning_rate": 1.469904458598726e-06, "loss": 1.3726, "step": 119500 }, { "epoch": 95.54, "learning_rate": 1.3504777070063694e-06, "loss": 1.3402, "step": 120000 }, { "epoch": 95.94, "learning_rate": 1.2312898089171974e-06, "loss": 1.7752, "step": 120500 }, { "epoch": 96.34, "learning_rate": 1.1121019108280256e-06, "loss": 1.6895, "step": 121000 }, { "epoch": 96.74, "learning_rate": 9.926751592356687e-07, "loss": 1.633, "step": 121500 }, { "epoch": 97.13, "learning_rate": 8.732484076433121e-07, "loss": 1.6033, "step": 122000 }, { "epoch": 97.53, "learning_rate": 7.538216560509554e-07, "loss": 1.3726, "step": 122500 }, { "epoch": 97.93, "learning_rate": 6.343949044585987e-07, "loss": 1.3725, "step": 123000 }, { "epoch": 98.33, "learning_rate": 5.149681528662421e-07, "loss": 1.5944, "step": 123500 }, { "epoch": 98.73, "learning_rate": 3.9554140127388536e-07, "loss": 1.466, "step": 124000 }, { "epoch": 99.12, "learning_rate": 2.7611464968152867e-07, "loss": 1.421, "step": 124500 }, { "epoch": 99.52, "learning_rate": 1.569267515923567e-07, "loss": 1.6345, "step": 125000 }, { "epoch": 99.92, "learning_rate": 3.7500000000000005e-08, "loss": 1.5021, "step": 125500 }, { "epoch": 100.0, "step": 125600, "total_flos": 5.249087478816768e+17, "train_loss": 1.9559951608499904, "train_runtime": 50817.2182, "train_samples_per_second": 14.83, "train_steps_per_second": 2.472 } ], "max_steps": 125600, "num_train_epochs": 100, "total_flos": 5.249087478816768e+17, "trial_name": null, "trial_params": null }