{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 3000, "global_step": 556, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00019640287769784174, "loss": 2.1468, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.00019280575539568347, "loss": 1.9709, "step": 20 }, { "epoch": 0.11, "learning_rate": 0.00018920863309352518, "loss": 1.8468, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.0001856115107913669, "loss": 1.6942, "step": 40 }, { "epoch": 0.18, "learning_rate": 0.00018201438848920864, "loss": 1.4989, "step": 50 }, { "epoch": 0.22, "learning_rate": 0.00017841726618705037, "loss": 1.3514, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.0001748201438848921, "loss": 1.3165, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00017122302158273383, "loss": 1.2268, "step": 80 }, { "epoch": 0.32, "learning_rate": 0.00016762589928057554, "loss": 1.3164, "step": 90 }, { "epoch": 0.36, "learning_rate": 0.00016402877697841727, "loss": 1.3179, "step": 100 }, { "epoch": 0.4, "learning_rate": 0.000160431654676259, "loss": 1.2648, "step": 110 }, { "epoch": 0.43, "learning_rate": 0.00015683453237410073, "loss": 1.178, "step": 120 }, { "epoch": 0.47, "learning_rate": 0.00015323741007194246, "loss": 1.1558, "step": 130 }, { "epoch": 0.5, "learning_rate": 0.0001496402877697842, "loss": 1.0114, "step": 140 }, { "epoch": 0.54, "learning_rate": 0.0001460431654676259, "loss": 0.8844, "step": 150 }, { "epoch": 0.58, "learning_rate": 0.00014244604316546763, "loss": 0.9118, "step": 160 }, { "epoch": 0.61, "learning_rate": 0.00013884892086330936, "loss": 1.0269, "step": 170 }, { "epoch": 0.65, "learning_rate": 0.0001352517985611511, "loss": 0.9542, "step": 180 }, { "epoch": 0.68, "learning_rate": 0.00013165467625899283, "loss": 0.8281, "step": 190 }, { "epoch": 0.72, "learning_rate": 0.00012805755395683453, "loss": 0.8024, "step": 200 }, { "epoch": 0.76, "learning_rate": 0.00012446043165467626, "loss": 0.8185, "step": 210 }, { "epoch": 0.79, "learning_rate": 0.00012086330935251799, "loss": 1.099, "step": 220 }, { "epoch": 0.83, "learning_rate": 0.00011726618705035972, "loss": 0.8726, "step": 230 }, { "epoch": 0.86, "learning_rate": 0.00011366906474820144, "loss": 0.7907, "step": 240 }, { "epoch": 0.9, "learning_rate": 0.00011007194244604317, "loss": 0.9099, "step": 250 }, { "epoch": 0.94, "learning_rate": 0.0001064748201438849, "loss": 0.7301, "step": 260 }, { "epoch": 0.97, "learning_rate": 0.00010287769784172662, "loss": 0.772, "step": 270 }, { "epoch": 1.01, "learning_rate": 9.928057553956835e-05, "loss": 0.7936, "step": 280 }, { "epoch": 1.04, "learning_rate": 9.568345323741009e-05, "loss": 0.6623, "step": 290 }, { "epoch": 1.08, "learning_rate": 9.20863309352518e-05, "loss": 0.5091, "step": 300 }, { "epoch": 1.12, "learning_rate": 8.848920863309353e-05, "loss": 0.4996, "step": 310 }, { "epoch": 1.15, "learning_rate": 8.489208633093527e-05, "loss": 0.5529, "step": 320 }, { "epoch": 1.19, "learning_rate": 8.129496402877698e-05, "loss": 0.6094, "step": 330 }, { "epoch": 1.22, "learning_rate": 7.769784172661872e-05, "loss": 0.4422, "step": 340 }, { "epoch": 1.26, "learning_rate": 7.410071942446043e-05, "loss": 0.4468, "step": 350 }, { "epoch": 1.29, "learning_rate": 7.050359712230215e-05, "loss": 0.4673, "step": 360 }, { "epoch": 1.33, "learning_rate": 6.690647482014388e-05, "loss": 0.4746, "step": 370 }, { "epoch": 1.37, "learning_rate": 6.366906474820145e-05, "loss": 0.4601, "step": 380 }, { "epoch": 1.4, "learning_rate": 6.007194244604317e-05, "loss": 0.4793, "step": 390 }, { "epoch": 1.44, "learning_rate": 5.64748201438849e-05, "loss": 0.5919, "step": 400 }, { "epoch": 1.47, "learning_rate": 5.287769784172663e-05, "loss": 0.4313, "step": 410 }, { "epoch": 1.51, "learning_rate": 4.9280575539568345e-05, "loss": 0.4715, "step": 420 }, { "epoch": 1.55, "learning_rate": 4.5683453237410076e-05, "loss": 0.3263, "step": 430 }, { "epoch": 1.58, "learning_rate": 4.20863309352518e-05, "loss": 0.4526, "step": 440 }, { "epoch": 1.62, "learning_rate": 3.8489208633093525e-05, "loss": 0.4379, "step": 450 }, { "epoch": 1.65, "learning_rate": 3.489208633093525e-05, "loss": 0.5083, "step": 460 }, { "epoch": 1.69, "learning_rate": 3.129496402877698e-05, "loss": 0.5373, "step": 470 }, { "epoch": 1.73, "learning_rate": 2.7697841726618706e-05, "loss": 0.3303, "step": 480 }, { "epoch": 1.76, "learning_rate": 2.4100719424460434e-05, "loss": 0.3214, "step": 490 }, { "epoch": 1.8, "learning_rate": 2.050359712230216e-05, "loss": 0.4343, "step": 500 }, { "epoch": 1.83, "learning_rate": 1.6906474820143887e-05, "loss": 0.314, "step": 510 }, { "epoch": 1.87, "learning_rate": 1.3309352517985613e-05, "loss": 0.3727, "step": 520 }, { "epoch": 1.91, "learning_rate": 9.71223021582734e-06, "loss": 0.2798, "step": 530 }, { "epoch": 1.94, "learning_rate": 6.115107913669065e-06, "loss": 0.4031, "step": 540 }, { "epoch": 1.98, "learning_rate": 2.5179856115107916e-06, "loss": 0.3542, "step": 550 }, { "epoch": 2.0, "step": 556, "total_flos": 6.887981879958897e+17, "train_loss": 0.8073481788738168, "train_runtime": 211.1785, "train_samples_per_second": 42.088, "train_steps_per_second": 2.633 } ], "logging_steps": 10, "max_steps": 556, "num_train_epochs": 2, "save_steps": 3000, "total_flos": 6.887981879958897e+17, "trial_name": null, "trial_params": null }