{ "best_metric": 1.3523887395858765, "best_model_checkpoint": "output/platina/checkpoint-435", "epoch": 5.0, "global_step": 435, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1.1151037062646087e-06, "loss": 1.8191, "step": 5 }, { "epoch": 0.11, "learning_rate": 4.424162455503776e-06, "loss": 1.778, "step": 10 }, { "epoch": 0.17, "learning_rate": 9.819597714903368e-06, "loss": 1.7677, "step": 15 }, { "epoch": 0.23, "learning_rate": 1.7126002200070792e-05, "loss": 1.8796, "step": 20 }, { "epoch": 0.29, "learning_rate": 2.610584242060141e-05, "loss": 1.8322, "step": 25 }, { "epoch": 0.34, "learning_rate": 3.646718096799441e-05, "loss": 1.7206, "step": 30 }, { "epoch": 0.4, "learning_rate": 4.787316749192729e-05, "loss": 1.7349, "step": 35 }, { "epoch": 0.46, "learning_rate": 5.995298981028814e-05, "loss": 1.7768, "step": 40 }, { "epoch": 0.52, "learning_rate": 7.231392912895959e-05, "loss": 1.7442, "step": 45 }, { "epoch": 0.57, "learning_rate": 8.455412745239765e-05, "loss": 1.7635, "step": 50 }, { "epoch": 0.63, "learning_rate": 9.627565211250085e-05, "loss": 1.7607, "step": 55 }, { "epoch": 0.69, "learning_rate": 0.00010709743268385943, "loss": 1.8494, "step": 60 }, { "epoch": 0.75, "learning_rate": 0.0001166676497022313, "loss": 1.7513, "step": 65 }, { "epoch": 0.8, "learning_rate": 0.00012467517242513996, "loss": 1.6916, "step": 70 }, { "epoch": 0.86, "learning_rate": 0.00013085967378942764, "loss": 1.8089, "step": 75 }, { "epoch": 0.92, "learning_rate": 0.00013502009372533583, "loss": 1.7891, "step": 80 }, { "epoch": 0.98, "learning_rate": 0.00013702117568213136, "loss": 1.8074, "step": 85 }, { "epoch": 1.0, "eval_loss": 1.717990517616272, "eval_runtime": 5.9537, "eval_samples_per_second": 22.003, "eval_steps_per_second": 2.855, "step": 87 }, { "epoch": 1.03, "learning_rate": 0.00013679786386078908, "loss": 1.6816, "step": 90 }, { "epoch": 1.09, "learning_rate": 0.00013435741819832448, "loss": 1.6466, "step": 95 }, { "epoch": 1.15, "learning_rate": 0.00012977917834497524, "loss": 1.7089, "step": 100 }, { "epoch": 1.21, "learning_rate": 0.00012321198430740717, "loss": 1.6398, "step": 105 }, { "epoch": 1.26, "learning_rate": 0.00011486933761363929, "loss": 1.5449, "step": 110 }, { "epoch": 1.32, "learning_rate": 0.00010502246031172626, "loss": 1.6906, "step": 115 }, { "epoch": 1.38, "learning_rate": 9.399147745631814e-05, "loss": 1.5677, "step": 120 }, { "epoch": 1.44, "learning_rate": 8.213500974321104e-05, "loss": 1.6043, "step": 125 }, { "epoch": 1.49, "learning_rate": 6.983851463858842e-05, "loss": 1.658, "step": 130 }, { "epoch": 1.55, "learning_rate": 5.750175503648031e-05, "loss": 1.6382, "step": 135 }, { "epoch": 1.61, "learning_rate": 4.5525802842298056e-05, "loss": 1.5709, "step": 140 }, { "epoch": 1.67, "learning_rate": 3.4300000000000054e-05, "loss": 1.6331, "step": 145 }, { "epoch": 1.72, "learning_rate": 2.418930086396666e-05, "loss": 1.5709, "step": 150 }, { "epoch": 1.78, "learning_rate": 1.552240741902345e-05, "loss": 1.5756, "step": 155 }, { "epoch": 1.84, "learning_rate": 8.581083076314943e-06, "loss": 1.6289, "step": 160 }, { "epoch": 1.9, "learning_rate": 3.5909924568597513e-06, "loss": 1.5374, "step": 165 }, { "epoch": 1.95, "learning_rate": 7.143649642832784e-07, "loss": 1.5678, "step": 170 }, { "epoch": 2.0, "eval_loss": 1.5926672220230103, "eval_runtime": 5.7532, "eval_samples_per_second": 22.77, "eval_steps_per_second": 2.955, "step": 174 }, { "epoch": 2.01, "learning_rate": 4.4720656268161284e-08, "loss": 1.6048, "step": 175 }, { "epoch": 2.07, "learning_rate": 1.603829878288063e-06, "loss": 1.4474, "step": 180 }, { "epoch": 2.13, "learning_rate": 5.341005502881008e-06, "loss": 1.5889, "step": 185 }, { "epoch": 2.18, "learning_rate": 1.1134750783984455e-05, "loss": 1.4795, "step": 190 }, { "epoch": 2.24, "learning_rate": 1.87967092540732e-05, "loss": 1.5209, "step": 195 }, { "epoch": 2.3, "learning_rate": 2.8077788251711776e-05, "loss": 1.6414, "step": 200 }, { "epoch": 2.36, "learning_rate": 3.8676257001728066e-05, "loss": 1.5243, "step": 205 }, { "epoch": 2.41, "learning_rate": 5.0247555976895465e-05, "loss": 1.5045, "step": 210 }, { "epoch": 2.47, "learning_rate": 6.241549863571236e-05, "loss": 1.4689, "step": 215 }, { "epoch": 2.53, "learning_rate": 7.47845013642876e-05, "loss": 1.5789, "step": 220 }, { "epoch": 2.59, "learning_rate": 8.695244402310448e-05, "loss": 1.5342, "step": 225 }, { "epoch": 2.64, "learning_rate": 9.852374299827188e-05, "loss": 1.4686, "step": 230 }, { "epoch": 2.7, "learning_rate": 0.00010912221174828818, "loss": 1.5636, "step": 235 }, { "epoch": 2.76, "learning_rate": 0.00011840329074592676, "loss": 1.5472, "step": 240 }, { "epoch": 2.82, "learning_rate": 0.00012606524921601552, "loss": 1.5416, "step": 245 }, { "epoch": 2.87, "learning_rate": 0.00013185899449711896, "loss": 1.5624, "step": 250 }, { "epoch": 2.93, "learning_rate": 0.00013559617012171192, "loss": 1.5904, "step": 255 }, { "epoch": 2.99, "learning_rate": 0.00013715527934373185, "loss": 1.5604, "step": 260 }, { "epoch": 3.0, "eval_loss": 1.5875556468963623, "eval_runtime": 5.7815, "eval_samples_per_second": 22.659, "eval_steps_per_second": 2.94, "step": 261 }, { "epoch": 3.05, "learning_rate": 0.0001364856350357167, "loss": 1.5918, "step": 265 }, { "epoch": 3.1, "learning_rate": 0.0001336090075431402, "loss": 1.4197, "step": 270 }, { "epoch": 3.16, "learning_rate": 0.00012861891692368514, "loss": 1.5324, "step": 275 }, { "epoch": 3.22, "learning_rate": 0.00012167759258097666, "loss": 1.4255, "step": 280 }, { "epoch": 3.28, "learning_rate": 0.00011301069913603348, "loss": 1.5272, "step": 285 }, { "epoch": 3.33, "learning_rate": 0.00010290000000000009, "loss": 1.52, "step": 290 }, { "epoch": 3.39, "learning_rate": 9.167419715770211e-05, "loss": 1.4541, "step": 295 }, { "epoch": 3.45, "learning_rate": 7.969824496351975e-05, "loss": 1.4829, "step": 300 }, { "epoch": 3.51, "learning_rate": 6.736148536141163e-05, "loss": 1.4054, "step": 305 }, { "epoch": 3.56, "learning_rate": 5.506499025678901e-05, "loss": 1.4169, "step": 310 }, { "epoch": 3.62, "learning_rate": 4.320852254368191e-05, "loss": 1.4294, "step": 315 }, { "epoch": 3.68, "learning_rate": 3.217753968827379e-05, "loss": 1.4879, "step": 320 }, { "epoch": 3.74, "learning_rate": 2.2330662386360752e-05, "loss": 1.4319, "step": 325 }, { "epoch": 3.79, "learning_rate": 1.3988015692592839e-05, "loss": 1.4294, "step": 330 }, { "epoch": 3.85, "learning_rate": 7.420821655024771e-06, "loss": 1.4842, "step": 335 }, { "epoch": 3.91, "learning_rate": 2.8425818016755186e-06, "loss": 1.3857, "step": 340 }, { "epoch": 3.97, "learning_rate": 4.02136139210924e-07, "loss": 1.3751, "step": 345 }, { "epoch": 4.0, "eval_loss": 1.5137468576431274, "eval_runtime": 5.7841, "eval_samples_per_second": 22.648, "eval_steps_per_second": 2.939, "step": 348 }, { "epoch": 4.02, "learning_rate": 1.788243178686371e-07, "loss": 1.5412, "step": 350 }, { "epoch": 4.08, "learning_rate": 2.1799062746641418e-06, "loss": 1.4467, "step": 355 }, { "epoch": 4.14, "learning_rate": 6.340326210572388e-06, "loss": 1.5015, "step": 360 }, { "epoch": 4.2, "learning_rate": 1.2524827574860016e-05, "loss": 1.4381, "step": 365 }, { "epoch": 4.25, "learning_rate": 2.053235029776879e-05, "loss": 1.4247, "step": 370 }, { "epoch": 4.31, "learning_rate": 3.0102567316140582e-05, "loss": 1.414, "step": 375 }, { "epoch": 4.37, "learning_rate": 4.092434788749928e-05, "loss": 1.3577, "step": 380 }, { "epoch": 4.43, "learning_rate": 5.264587254760211e-05, "loss": 1.4768, "step": 385 }, { "epoch": 4.48, "learning_rate": 6.48860708710403e-05, "loss": 1.4088, "step": 390 }, { "epoch": 4.54, "learning_rate": 7.724701018971169e-05, "loss": 1.4472, "step": 395 }, { "epoch": 4.6, "learning_rate": 8.932683250807266e-05, "loss": 1.415, "step": 400 }, { "epoch": 4.66, "learning_rate": 0.00010073281903200549, "loss": 1.4059, "step": 405 }, { "epoch": 4.71, "learning_rate": 0.00011109415757939859, "loss": 1.4512, "step": 410 }, { "epoch": 4.77, "learning_rate": 0.00012007399779992913, "loss": 1.5409, "step": 415 }, { "epoch": 4.83, "learning_rate": 0.00012738040228509664, "loss": 1.3528, "step": 420 }, { "epoch": 4.89, "learning_rate": 0.0001327758375444962, "loss": 1.5104, "step": 425 }, { "epoch": 4.94, "learning_rate": 0.0001360848962937354, "loss": 1.4315, "step": 430 }, { "epoch": 5.0, "learning_rate": 0.0001372, "loss": 1.4081, "step": 435 }, { "epoch": 5.0, "eval_loss": 1.3523887395858765, "eval_runtime": 6.0439, "eval_samples_per_second": 22.006, "eval_steps_per_second": 2.813, "step": 435 } ], "max_steps": 435, "num_train_epochs": 5, "total_flos": 452296507392000.0, "trial_name": null, "trial_params": null }