{ "best_metric": 1.798671007156372, "best_model_checkpoint": "output/kasta/checkpoint-246", "epoch": 1.0, "global_step": 246, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00013706019712792517, "loss": 2.7165, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00013664135833219747, "loss": 2.3564, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00013594519075178427, "loss": 2.2801, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.00013497453188602036, "loss": 2.095, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00013373333802928601, "loss": 2.1713, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.0001322266681456037, "loss": 2.0746, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.00013046066324888032, "loss": 2.1268, "step": 35 }, { "epoch": 0.16, "learning_rate": 0.00012844252137283782, "loss": 2.1756, "step": 40 }, { "epoch": 0.18, "learning_rate": 0.00012618046823265178, "loss": 2.0614, "step": 45 }, { "epoch": 0.2, "learning_rate": 0.00012368372369787862, "loss": 1.9928, "step": 50 }, { "epoch": 0.22, "learning_rate": 0.00012096246421332296, "loss": 2.0832, "step": 55 }, { "epoch": 0.24, "learning_rate": 0.00011802778132101399, "loss": 1.9764, "step": 60 }, { "epoch": 0.26, "learning_rate": 0.00011489163645235038, "loss": 2.0674, "step": 65 }, { "epoch": 0.28, "learning_rate": 0.00011156681217467561, "loss": 2.0935, "step": 70 }, { "epoch": 0.3, "learning_rate": 0.00010806686009099738, "loss": 1.9101, "step": 75 }, { "epoch": 0.33, "learning_rate": 0.00010440604560520553, "loss": 1.9785, "step": 80 }, { "epoch": 0.35, "learning_rate": 0.00010059928977791948, "loss": 1.9156, "step": 85 }, { "epoch": 0.37, "learning_rate": 9.666210850995393e-05, "loss": 2.0267, "step": 90 }, { "epoch": 0.39, "learning_rate": 9.261054930128376e-05, "loss": 1.8933, "step": 95 }, { "epoch": 0.41, "learning_rate": 8.846112584327212e-05, "loss": 1.9379, "step": 100 }, { "epoch": 0.43, "learning_rate": 8.423075071075525e-05, "loss": 2.0375, "step": 105 }, { "epoch": 0.45, "learning_rate": 7.993666642832438e-05, "loss": 2.014, "step": 110 }, { "epoch": 0.47, "learning_rate": 7.55963751917701e-05, "loss": 1.9119, "step": 115 }, { "epoch": 0.49, "learning_rate": 7.122756753113643e-05, "loss": 1.9155, "step": 120 }, { "epoch": 0.51, "learning_rate": 6.684805020614639e-05, "loss": 1.9652, "step": 125 }, { "epoch": 0.53, "learning_rate": 6.247567362788848e-05, "loss": 1.8941, "step": 130 }, { "epoch": 0.55, "learning_rate": 5.81282591025852e-05, "loss": 1.8734, "step": 135 }, { "epoch": 0.57, "learning_rate": 5.382352619398988e-05, "loss": 1.866, "step": 140 }, { "epoch": 0.59, "learning_rate": 4.957902050047381e-05, "loss": 1.9107, "step": 145 }, { "epoch": 0.61, "learning_rate": 4.541204214117672e-05, "loss": 1.8795, "step": 150 }, { "epoch": 0.63, "learning_rate": 4.1339575242702164e-05, "loss": 1.9314, "step": 155 }, { "epoch": 0.65, "learning_rate": 3.7378218713762616e-05, "loss": 1.8708, "step": 160 }, { "epoch": 0.67, "learning_rate": 3.354411858992822e-05, "loss": 1.9518, "step": 165 }, { "epoch": 0.69, "learning_rate": 2.985290222423505e-05, "loss": 1.9226, "step": 170 }, { "epoch": 0.71, "learning_rate": 2.6319614591883445e-05, "loss": 1.9602, "step": 175 }, { "epoch": 0.73, "learning_rate": 2.2958656968642224e-05, "loss": 1.8641, "step": 180 }, { "epoch": 0.75, "learning_rate": 1.9783728232897674e-05, "loss": 1.8891, "step": 185 }, { "epoch": 0.77, "learning_rate": 1.6807769030594122e-05, "loss": 1.7877, "step": 190 }, { "epoch": 0.79, "learning_rate": 1.4042909030642942e-05, "loss": 1.8873, "step": 195 }, { "epoch": 0.81, "learning_rate": 1.1500417485781092e-05, "loss": 1.9452, "step": 200 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 1.8256, "step": 205 }, { "epoch": 0.85, "learning_rate": 7.123042792471594e-06, "loss": 1.817, "step": 210 }, { "epoch": 0.87, "learning_rate": 5.306001321991061e-06, "loss": 1.9063, "step": 215 }, { "epoch": 0.89, "learning_rate": 3.7469389418978793e-06, "loss": 1.9345, "step": 220 }, { "epoch": 0.91, "learning_rate": 2.4522102119145282e-06, "loss": 1.8102, "step": 225 }, { "epoch": 0.93, "learning_rate": 1.42709229807627e-06, "loss": 1.8263, "step": 230 }, { "epoch": 0.96, "learning_rate": 6.757634636067098e-07, "loss": 1.9225, "step": 235 }, { "epoch": 0.98, "learning_rate": 2.0128603879540573e-07, "loss": 1.8347, "step": 240 }, { "epoch": 1.0, "learning_rate": 5.593939290255423e-09, "loss": 1.8634, "step": 245 }, { "epoch": 1.0, "eval_loss": 1.798671007156372, "eval_runtime": 14.7214, "eval_samples_per_second": 21.058, "eval_steps_per_second": 2.649, "step": 246 } ], "max_steps": 246, "num_train_epochs": 1, "total_flos": 256588775424000.0, "trial_name": null, "trial_params": null }