{ "best_metric": 1.948645830154419, "best_model_checkpoint": "output/hyuna/checkpoint-290", "epoch": 5.0, "global_step": 290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.00013469952948681868, "loss": 3.106, "step": 5 }, { "epoch": 0.17, "learning_rate": 0.0001273804022850966, "loss": 2.6073, "step": 10 }, { "epoch": 0.26, "learning_rate": 0.00011577618287734484, "loss": 2.406, "step": 15 }, { "epoch": 0.34, "learning_rate": 0.00010073281903200561, "loss": 2.6274, "step": 20 }, { "epoch": 0.43, "learning_rate": 8.334697219847626e-05, "loss": 2.4792, "step": 25 }, { "epoch": 0.52, "learning_rate": 6.488607087104036e-05, "loss": 2.3865, "step": 30 }, { "epoch": 0.6, "learning_rate": 4.6695915032671784e-05, "loss": 2.3743, "step": 35 }, { "epoch": 0.69, "learning_rate": 3.0102567316140575e-05, "loss": 2.5462, "step": 40 }, { "epoch": 0.78, "learning_rate": 1.6315683018244145e-05, "loss": 2.3818, "step": 45 }, { "epoch": 0.86, "learning_rate": 6.340326210572357e-06, "loss": 2.3368, "step": 50 }, { "epoch": 0.95, "learning_rate": 9.037005536513067e-07, "loss": 2.371, "step": 55 }, { "epoch": 1.0, "eval_loss": 2.4857912063598633, "eval_runtime": 0.9931, "eval_samples_per_second": 75.519, "eval_steps_per_second": 10.069, "step": 58 }, { "epoch": 1.03, "learning_rate": 4.0213613921093164e-07, "loss": 2.3666, "step": 60 }, { "epoch": 1.12, "learning_rate": 4.8721970205680935e-06, "loss": 2.4303, "step": 65 }, { "epoch": 1.21, "learning_rate": 1.3988015692592823e-05, "loss": 2.3856, "step": 70 }, { "epoch": 1.29, "learning_rate": 2.708504883770769e-05, "loss": 2.1811, "step": 75 }, { "epoch": 1.38, "learning_rate": 4.320852254368187e-05, "loss": 2.4049, "step": 80 }, { "epoch": 1.47, "learning_rate": 6.118303533611755e-05, "loss": 2.2865, "step": 85 }, { "epoch": 1.55, "learning_rate": 7.969824496351964e-05, "loss": 2.1265, "step": 90 }, { "epoch": 1.64, "learning_rate": 9.740439236703416e-05, "loss": 2.1689, "step": 95 }, { "epoch": 1.72, "learning_rate": 0.00011301069913603334, "loss": 2.4769, "step": 100 }, { "epoch": 1.81, "learning_rate": 0.00012537946527356269, "loss": 2.282, "step": 105 }, { "epoch": 1.9, "learning_rate": 0.00013360900754314024, "loss": 2.2972, "step": 110 }, { "epoch": 1.98, "learning_rate": 0.0001370993921901871, "loss": 2.3702, "step": 115 }, { "epoch": 2.0, "eval_loss": 2.0102086067199707, "eval_runtime": 1.0576, "eval_samples_per_second": 75.643, "eval_steps_per_second": 9.455, "step": 116 }, { "epoch": 2.07, "learning_rate": 0.00013559617012171197, "loss": 2.0597, "step": 120 }, { "epoch": 2.16, "learning_rate": 0.00012920892624899717, "loss": 2.1937, "step": 125 }, { "epoch": 2.24, "learning_rate": 0.00011840329074592684, "loss": 2.1953, "step": 130 }, { "epoch": 2.33, "learning_rate": 0.00010396699460234374, "loss": 2.0115, "step": 135 }, { "epoch": 2.41, "learning_rate": 8.69524440231046e-05, "loss": 2.0135, "step": 140 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 1.9796, "step": 145 }, { "epoch": 2.59, "learning_rate": 5.024755597689551e-05, "loss": 2.0685, "step": 150 }, { "epoch": 2.67, "learning_rate": 3.3233005397656285e-05, "loss": 2.075, "step": 155 }, { "epoch": 2.76, "learning_rate": 1.8796709254073232e-05, "loss": 2.0578, "step": 160 }, { "epoch": 2.84, "learning_rate": 7.99107375100285e-06, "loss": 1.9644, "step": 165 }, { "epoch": 2.93, "learning_rate": 1.6038298782880706e-06, "loss": 2.0836, "step": 170 }, { "epoch": 3.0, "eval_loss": 1.9711263179779053, "eval_runtime": 1.056, "eval_samples_per_second": 75.756, "eval_steps_per_second": 9.469, "step": 174 }, { "epoch": 3.02, "learning_rate": 1.0060780981290602e-07, "loss": 2.0553, "step": 175 }, { "epoch": 3.1, "learning_rate": 3.5909924568597365e-06, "loss": 1.9319, "step": 180 }, { "epoch": 3.19, "learning_rate": 1.182053472643733e-05, "loss": 2.0587, "step": 185 }, { "epoch": 3.28, "learning_rate": 2.418930086396662e-05, "loss": 2.1358, "step": 190 }, { "epoch": 3.36, "learning_rate": 3.9795607632965815e-05, "loss": 1.9452, "step": 195 }, { "epoch": 3.45, "learning_rate": 5.750175503648027e-05, "loss": 1.9591, "step": 200 }, { "epoch": 3.53, "learning_rate": 7.601696466388229e-05, "loss": 1.8235, "step": 205 }, { "epoch": 3.62, "learning_rate": 9.39914774563181e-05, "loss": 1.8749, "step": 210 }, { "epoch": 3.71, "learning_rate": 0.00011011495116229225, "loss": 2.0178, "step": 215 }, { "epoch": 3.79, "learning_rate": 0.00012321198430740717, "loss": 1.9038, "step": 220 }, { "epoch": 3.88, "learning_rate": 0.0001323278029794319, "loss": 2.1059, "step": 225 }, { "epoch": 3.97, "learning_rate": 0.00013679786386078908, "loss": 1.946, "step": 230 }, { "epoch": 4.0, "eval_loss": 1.9748159646987915, "eval_runtime": 1.0572, "eval_samples_per_second": 75.673, "eval_steps_per_second": 9.459, "step": 232 }, { "epoch": 4.05, "learning_rate": 0.0001362962994463487, "loss": 1.8662, "step": 235 }, { "epoch": 4.14, "learning_rate": 0.00013085967378942767, "loss": 1.9053, "step": 240 }, { "epoch": 4.22, "learning_rate": 0.00012088431698175582, "loss": 1.8573, "step": 245 }, { "epoch": 4.31, "learning_rate": 0.00010709743268385941, "loss": 1.9869, "step": 250 }, { "epoch": 4.4, "learning_rate": 9.050408496732835e-05, "loss": 1.7389, "step": 255 }, { "epoch": 4.48, "learning_rate": 7.231392912895982e-05, "loss": 2.0663, "step": 260 }, { "epoch": 4.57, "learning_rate": 5.385302780152384e-05, "loss": 1.8779, "step": 265 }, { "epoch": 4.66, "learning_rate": 3.646718096799452e-05, "loss": 1.7718, "step": 270 }, { "epoch": 4.74, "learning_rate": 2.14238171226552e-05, "loss": 1.7325, "step": 275 }, { "epoch": 4.83, "learning_rate": 9.819597714903422e-06, "loss": 1.7267, "step": 280 }, { "epoch": 4.91, "learning_rate": 2.5004705131813084e-06, "loss": 1.8411, "step": 285 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.6194, "step": 290 }, { "epoch": 5.0, "eval_loss": 1.948645830154419, "eval_runtime": 1.0612, "eval_samples_per_second": 75.387, "eval_steps_per_second": 9.423, "step": 290 } ], "max_steps": 1218, "num_train_epochs": 21, "total_flos": 299832606720000.0, "trial_name": null, "trial_params": null }