{ "best_metric": 2.7749314308166504, "best_model_checkpoint": "output/drake/checkpoint-351", "epoch": 1.0, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00013713131733182566, "loss": 2.8146, "step": 5 }, { "epoch": 0.03, "learning_rate": 0.00013692540685817455, "loss": 2.4938, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.00013658268089626935, "loss": 2.6848, "step": 15 }, { "epoch": 0.06, "learning_rate": 0.000136103825724055, "loss": 2.4887, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.0001354898002059893, "loss": 2.586, "step": 25 }, { "epoch": 0.09, "learning_rate": 0.00013474183387300238, "loss": 2.4906, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.00013386142446047196, "loss": 2.4834, "step": 35 }, { "epoch": 0.11, "learning_rate": 0.00013285033490914263, "loss": 2.6758, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.00013171058983499532, "loss": 2.4307, "step": 45 }, { "epoch": 0.14, "learning_rate": 0.00013044447147513605, "loss": 2.8348, "step": 50 }, { "epoch": 0.16, "learning_rate": 0.00012905451511782055, "loss": 2.7339, "step": 55 }, { "epoch": 0.17, "learning_rate": 0.0001275435040257681, "loss": 2.4359, "step": 60 }, { "epoch": 0.19, "learning_rate": 0.00012591446386292745, "loss": 2.7405, "step": 65 }, { "epoch": 0.2, "learning_rate": 0.00012417065663585696, "loss": 2.5262, "step": 70 }, { "epoch": 0.21, "learning_rate": 0.00012231557416184885, "loss": 2.5211, "step": 75 }, { "epoch": 0.23, "learning_rate": 0.0001203529310768803, "loss": 2.4883, "step": 80 }, { "epoch": 0.24, "learning_rate": 0.00011828665739738674, "loss": 2.6898, "step": 85 }, { "epoch": 0.26, "learning_rate": 0.00011612089065075855, "loss": 2.5956, "step": 90 }, { "epoch": 0.27, "learning_rate": 0.0001138599675903136, "loss": 2.4562, "step": 95 }, { "epoch": 0.28, "learning_rate": 0.00011150841551133841, "loss": 2.457, "step": 100 }, { "epoch": 0.3, "learning_rate": 0.00010907094318558638, "loss": 2.6934, "step": 105 }, { "epoch": 0.31, "learning_rate": 0.00010655243143238444, "loss": 2.6493, "step": 110 }, { "epoch": 0.33, "learning_rate": 0.00010395792334523303, "loss": 2.5976, "step": 115 }, { "epoch": 0.34, "learning_rate": 0.00010129261419346177, "loss": 2.7521, "step": 120 }, { "epoch": 0.36, "learning_rate": 9.856184101917151e-05, "loss": 2.5228, "step": 125 }, { "epoch": 0.37, "learning_rate": 9.577107195028614e-05, "loss": 2.555, "step": 130 }, { "epoch": 0.38, "learning_rate": 9.292589525111789e-05, "loss": 2.6695, "step": 135 }, { "epoch": 0.4, "learning_rate": 9.003200813237038e-05, "loss": 2.5893, "step": 140 }, { "epoch": 0.41, "learning_rate": 8.709520534298517e-05, "loss": 2.4968, "step": 145 }, { "epoch": 0.43, "learning_rate": 8.4121367566679e-05, "loss": 2.6097, "step": 150 }, { "epoch": 0.44, "learning_rate": 8.111644964640216e-05, "loss": 2.4365, "step": 155 }, { "epoch": 0.46, "learning_rate": 7.808646866030093e-05, "loss": 2.6697, "step": 160 }, { "epoch": 0.47, "learning_rate": 7.503749187305804e-05, "loss": 2.5317, "step": 165 }, { "epoch": 0.48, "learning_rate": 7.197562458674299e-05, "loss": 2.6247, "step": 170 }, { "epoch": 0.5, "learning_rate": 6.890699791548972e-05, "loss": 2.4144, "step": 175 }, { "epoch": 0.51, "learning_rate": 6.583775650849418e-05, "loss": 2.6802, "step": 180 }, { "epoch": 0.53, "learning_rate": 6.277404624590518e-05, "loss": 2.7703, "step": 185 }, { "epoch": 0.54, "learning_rate": 5.9722001932251785e-05, "loss": 2.8038, "step": 190 }, { "epoch": 0.56, "learning_rate": 5.6687735012048764e-05, "loss": 2.5009, "step": 195 }, { "epoch": 0.57, "learning_rate": 5.3677321332176824e-05, "loss": 2.7397, "step": 200 }, { "epoch": 0.58, "learning_rate": 5.069678897554777e-05, "loss": 2.2651, "step": 205 }, { "epoch": 0.6, "learning_rate": 4.775210619040673e-05, "loss": 2.6494, "step": 210 }, { "epoch": 0.61, "learning_rate": 4.4849169439453994e-05, "loss": 2.4319, "step": 215 }, { "epoch": 0.63, "learning_rate": 4.199379159270695e-05, "loss": 2.5083, "step": 220 }, { "epoch": 0.64, "learning_rate": 3.919169028775044e-05, "loss": 2.4238, "step": 225 }, { "epoch": 0.66, "learning_rate": 3.6448476480681456e-05, "loss": 2.7361, "step": 230 }, { "epoch": 0.67, "learning_rate": 3.3769643210672815e-05, "loss": 2.3582, "step": 235 }, { "epoch": 0.68, "learning_rate": 3.1160554600657045e-05, "loss": 2.5586, "step": 240 }, { "epoch": 0.7, "learning_rate": 2.862643511615132e-05, "loss": 2.6506, "step": 245 }, { "epoch": 0.71, "learning_rate": 2.6172359103734907e-05, "loss": 2.4642, "step": 250 }, { "epoch": 0.73, "learning_rate": 2.3803240630126026e-05, "loss": 2.4471, "step": 255 }, { "epoch": 0.74, "learning_rate": 2.152382364220499e-05, "loss": 2.3953, "step": 260 }, { "epoch": 0.75, "learning_rate": 1.9338672467686093e-05, "loss": 2.5485, "step": 265 }, { "epoch": 0.77, "learning_rate": 1.725216267546251e-05, "loss": 2.483, "step": 270 }, { "epoch": 0.78, "learning_rate": 1.5268472313922077e-05, "loss": 2.8104, "step": 275 }, { "epoch": 0.8, "learning_rate": 1.3391573544780878e-05, "loss": 2.4582, "step": 280 }, { "epoch": 0.81, "learning_rate": 1.1625224689186508e-05, "loss": 2.698, "step": 285 }, { "epoch": 0.83, "learning_rate": 9.97296270201696e-06, "loss": 2.3153, "step": 290 }, { "epoch": 0.84, "learning_rate": 8.438096089447732e-06, "loss": 2.4751, "step": 295 }, { "epoch": 0.85, "learning_rate": 7.023698283963675e-06, "loss": 2.7106, "step": 300 }, { "epoch": 0.87, "learning_rate": 5.73260149008795e-06, "loss": 2.6915, "step": 305 }, { "epoch": 0.88, "learning_rate": 4.567391013146352e-06, "loss": 2.8011, "step": 310 }, { "epoch": 0.9, "learning_rate": 3.530400082425805e-06, "loss": 2.65, "step": 315 }, { "epoch": 0.91, "learning_rate": 2.623705179092382e-06, "loss": 2.4158, "step": 320 }, { "epoch": 0.93, "learning_rate": 1.8491218782241098e-06, "loss": 2.3891, "step": 325 }, { "epoch": 0.94, "learning_rate": 1.2082012132854124e-06, "loss": 2.5672, "step": 330 }, { "epoch": 0.95, "learning_rate": 7.022265703216476e-07, "loss": 2.5956, "step": 335 }, { "epoch": 0.97, "learning_rate": 3.322111180939607e-07, "loss": 2.4543, "step": 340 }, { "epoch": 0.98, "learning_rate": 9.889577929986752e-08, "loss": 2.6378, "step": 345 }, { "epoch": 1.0, "learning_rate": 2.7477469421299584e-09, "loss": 2.6626, "step": 350 }, { "epoch": 1.0, "eval_loss": 2.7749314308166504, "eval_runtime": 12.2562, "eval_samples_per_second": 45.365, "eval_steps_per_second": 5.711, "step": 351 } ], "max_steps": 3510, "num_train_epochs": 10, "total_flos": 366592720896000.0, "trial_name": null, "trial_params": null }