{ "best_metric": null, "best_model_checkpoint": null, "epoch": 215.3846153846154, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.08, "learning_rate": 0.0002, "loss": 1.1505, "step": 10 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 0.753, "step": 20 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 0.568, "step": 30 }, { "epoch": 12.31, "learning_rate": 0.0002, "loss": 0.3911, "step": 40 }, { "epoch": 15.38, "learning_rate": 0.0002, "loss": 0.2373, "step": 50 }, { "epoch": 18.46, "learning_rate": 0.0002, "loss": 0.1112, "step": 60 }, { "epoch": 21.54, "learning_rate": 0.0002, "loss": 0.0842, "step": 70 }, { "epoch": 24.62, "learning_rate": 0.0002, "loss": 0.0736, "step": 80 }, { "epoch": 27.69, "learning_rate": 0.0002, "loss": 0.0662, "step": 90 }, { "epoch": 30.77, "learning_rate": 0.0002, "loss": 0.0676, "step": 100 }, { "epoch": 33.85, "learning_rate": 0.0002, "loss": 0.0632, "step": 110 }, { "epoch": 36.92, "learning_rate": 0.0002, "loss": 0.0639, "step": 120 }, { "epoch": 40.0, "learning_rate": 0.0002, "loss": 0.0605, "step": 130 }, { "epoch": 43.08, "learning_rate": 0.0002, "loss": 0.0605, "step": 140 }, { "epoch": 46.15, "learning_rate": 0.0002, "loss": 0.0608, "step": 150 }, { "epoch": 49.23, "learning_rate": 0.0002, "loss": 0.0606, "step": 160 }, { "epoch": 52.31, "learning_rate": 0.0002, "loss": 0.0614, "step": 170 }, { "epoch": 55.38, "learning_rate": 0.0002, "loss": 0.0602, "step": 180 }, { "epoch": 58.46, "learning_rate": 0.0002, "loss": 0.0603, "step": 190 }, { "epoch": 61.54, "learning_rate": 0.0002, "loss": 0.06, "step": 200 }, { "epoch": 64.62, "learning_rate": 0.0002, "loss": 0.059, "step": 210 }, { "epoch": 67.69, "learning_rate": 0.0002, "loss": 0.061, "step": 220 }, { "epoch": 70.77, "learning_rate": 0.0002, "loss": 0.0612, "step": 230 }, { "epoch": 73.85, "learning_rate": 0.0002, "loss": 0.0624, "step": 240 }, { "epoch": 76.92, "learning_rate": 0.0002, "loss": 0.0595, "step": 250 }, { "epoch": 80.0, "learning_rate": 0.0002, "loss": 0.0601, "step": 260 }, { "epoch": 83.08, "learning_rate": 0.0002, "loss": 0.0593, "step": 270 }, { "epoch": 86.15, "learning_rate": 0.0002, "loss": 0.0622, "step": 280 }, { "epoch": 89.23, "learning_rate": 0.0002, "loss": 0.0607, "step": 290 }, { "epoch": 92.31, "learning_rate": 0.0002, "loss": 0.0592, "step": 300 }, { "epoch": 95.38, "learning_rate": 0.0002, "loss": 0.0592, "step": 310 }, { "epoch": 98.46, "learning_rate": 0.0002, "loss": 0.0597, "step": 320 }, { "epoch": 101.54, "learning_rate": 0.0002, "loss": 0.0599, "step": 330 }, { "epoch": 104.62, "learning_rate": 0.0002, "loss": 0.06, "step": 340 }, { "epoch": 107.69, "learning_rate": 0.0002, "loss": 0.0591, "step": 350 }, { "epoch": 110.77, "learning_rate": 0.0002, "loss": 0.0593, "step": 360 }, { "epoch": 113.85, "learning_rate": 0.0002, "loss": 0.0594, "step": 370 }, { "epoch": 116.92, "learning_rate": 0.0002, "loss": 0.0608, "step": 380 }, { "epoch": 120.0, "learning_rate": 0.0002, "loss": 0.0597, "step": 390 }, { "epoch": 123.08, "learning_rate": 0.0002, "loss": 0.06, "step": 400 }, { "epoch": 126.15, "learning_rate": 0.0002, "loss": 0.0591, "step": 410 }, { "epoch": 129.23, "learning_rate": 0.0002, "loss": 0.0606, "step": 420 }, { "epoch": 132.31, "learning_rate": 0.0002, "loss": 0.0609, "step": 430 }, { "epoch": 135.38, "learning_rate": 0.0002, "loss": 0.0585, "step": 440 }, { "epoch": 138.46, "learning_rate": 0.0002, "loss": 0.0597, "step": 450 }, { "epoch": 141.54, "learning_rate": 0.0002, "loss": 0.0609, "step": 460 }, { "epoch": 144.62, "learning_rate": 0.0002, "loss": 0.0609, "step": 470 }, { "epoch": 147.69, "learning_rate": 0.0002, "loss": 0.0598, "step": 480 }, { "epoch": 150.77, "learning_rate": 0.0002, "loss": 0.061, "step": 490 }, { "epoch": 153.85, "learning_rate": 0.0002, "loss": 0.0606, "step": 500 }, { "epoch": 156.92, "learning_rate": 0.0002, "loss": 0.0634, "step": 510 }, { "epoch": 160.0, "learning_rate": 0.0002, "loss": 0.0599, "step": 520 }, { "epoch": 163.08, "learning_rate": 0.0002, "loss": 0.0611, "step": 530 }, { "epoch": 166.15, "learning_rate": 0.0002, "loss": 0.0652, "step": 540 }, { "epoch": 169.23, "learning_rate": 0.0002, "loss": 0.0619, "step": 550 }, { "epoch": 172.31, "learning_rate": 0.0002, "loss": 0.0618, "step": 560 }, { "epoch": 175.38, "learning_rate": 0.0002, "loss": 0.0609, "step": 570 }, { "epoch": 178.46, "learning_rate": 0.0002, "loss": 0.0597, "step": 580 }, { "epoch": 181.54, "learning_rate": 0.0002, "loss": 0.0607, "step": 590 }, { "epoch": 184.62, "learning_rate": 0.0002, "loss": 0.06, "step": 600 }, { "epoch": 187.69, "learning_rate": 0.0002, "loss": 0.0587, "step": 610 }, { "epoch": 190.77, "learning_rate": 0.0002, "loss": 0.0595, "step": 620 }, { "epoch": 193.85, "learning_rate": 0.0002, "loss": 0.0592, "step": 630 }, { "epoch": 196.92, "learning_rate": 0.0002, "loss": 0.0605, "step": 640 }, { "epoch": 200.0, "learning_rate": 0.0002, "loss": 0.0592, "step": 650 }, { "epoch": 203.08, "learning_rate": 0.0002, "loss": 0.0616, "step": 660 }, { "epoch": 206.15, "learning_rate": 0.0002, "loss": 0.0596, "step": 670 }, { "epoch": 209.23, "learning_rate": 0.0002, "loss": 0.0587, "step": 680 }, { "epoch": 212.31, "learning_rate": 0.0002, "loss": 0.0614, "step": 690 }, { "epoch": 215.38, "learning_rate": 0.0002, "loss": 0.0591, "step": 700 } ], "logging_steps": 10, "max_steps": 10000, "num_train_epochs": 3334, "save_steps": 10, "total_flos": 1586920711709184.0, "trial_name": null, "trial_params": null }