{ "best_metric": 2.7318851947784424, "best_model_checkpoint": "output/doja-cat/checkpoint-335", "epoch": 5.0, "global_step": 335, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.00013532330330528217, "loss": 3.5814, "step": 5 }, { "epoch": 0.15, "learning_rate": 0.00012979589515943672, "loss": 3.3945, "step": 10 }, { "epoch": 0.22, "learning_rate": 0.0001209202032183722, "loss": 3.258, "step": 15 }, { "epoch": 0.3, "learning_rate": 0.00010918185377156764, "loss": 3.0648, "step": 20 }, { "epoch": 0.37, "learning_rate": 9.522310109331633e-05, "loss": 3.0572, "step": 25 }, { "epoch": 0.45, "learning_rate": 7.980768702075116e-05, "loss": 2.9453, "step": 30 }, { "epoch": 0.52, "learning_rate": 6.377905343835293e-05, "loss": 2.8372, "step": 35 }, { "epoch": 0.6, "learning_rate": 4.8014194038976637e-05, "loss": 2.8698, "step": 40 }, { "epoch": 0.67, "learning_rate": 3.337567032488183e-05, "loss": 2.9529, "step": 45 }, { "epoch": 0.75, "learning_rate": 2.0664417254371463e-05, "loss": 2.9508, "step": 50 }, { "epoch": 0.82, "learning_rate": 1.0575920734825916e-05, "loss": 2.9673, "step": 55 }, { "epoch": 0.9, "learning_rate": 3.66216467507622e-06, "loss": 3.0803, "step": 60 }, { "epoch": 0.97, "learning_rate": 3.014296332977907e-07, "loss": 2.8364, "step": 65 }, { "epoch": 1.0, "eval_loss": 2.8956077098846436, "eval_runtime": 3.3695, "eval_samples_per_second": 21.368, "eval_steps_per_second": 2.671, "step": 67 }, { "epoch": 1.04, "learning_rate": 6.775955029229377e-07, "loss": 2.9377, "step": 70 }, { "epoch": 1.12, "learning_rate": 4.7700806733338495e-06, "loss": 2.722, "step": 75 }, { "epoch": 1.19, "learning_rate": 1.235496813600306e-05, "loss": 2.6699, "step": 80 }, { "epoch": 1.27, "learning_rate": 2.3017256922096845e-05, "loss": 2.8672, "step": 85 }, { "epoch": 1.34, "learning_rate": 3.6173568544064456e-05, "loss": 2.812, "step": 90 }, { "epoch": 1.42, "learning_rate": 5.110406607666898e-05, "loss": 2.6143, "step": 95 }, { "epoch": 1.49, "learning_rate": 6.699183945081632e-05, "loss": 2.6499, "step": 100 }, { "epoch": 1.57, "learning_rate": 8.296760202575751e-05, "loss": 2.832, "step": 105 }, { "epoch": 1.64, "learning_rate": 9.815725290295059e-05, "loss": 2.6562, "step": 110 }, { "epoch": 1.72, "learning_rate": 0.00011172970264813468, "loss": 2.8987, "step": 115 }, { "epoch": 1.79, "learning_rate": 0.00012294234567600346, "loss": 2.705, "step": 120 }, { "epoch": 1.87, "learning_rate": 0.00013118169131281192, "loss": 2.5747, "step": 125 }, { "epoch": 1.94, "learning_rate": 0.00013599693044126453, "loss": 2.6942, "step": 130 }, { "epoch": 2.0, "eval_loss": 2.8365488052368164, "eval_runtime": 3.3573, "eval_samples_per_second": 21.446, "eval_steps_per_second": 2.681, "step": 134 }, { "epoch": 2.01, "learning_rate": 0.0001371246011559198, "loss": 2.804, "step": 135 }, { "epoch": 2.09, "learning_rate": 0.00013450300386777127, "loss": 2.4535, "step": 140 }, { "epoch": 2.16, "learning_rate": 0.00012827557714724304, "loss": 2.3801, "step": 145 }, { "epoch": 2.24, "learning_rate": 0.00011878304959908774, "loss": 2.5012, "step": 150 }, { "epoch": 2.31, "learning_rate": 0.00010654479717298873, "loss": 2.2923, "step": 155 }, { "epoch": 2.39, "learning_rate": 9.223042592950526e-05, "loss": 2.3414, "step": 160 }, { "epoch": 2.46, "learning_rate": 7.662313508728495e-05, "loss": 2.5064, "step": 165 }, { "epoch": 2.54, "learning_rate": 6.0576864912715095e-05, "loss": 2.5332, "step": 170 }, { "epoch": 2.61, "learning_rate": 4.496957407049471e-05, "loss": 2.4362, "step": 175 }, { "epoch": 2.69, "learning_rate": 3.065520282701126e-05, "loss": 2.4042, "step": 180 }, { "epoch": 2.76, "learning_rate": 1.8416950400912332e-05, "loss": 2.3006, "step": 185 }, { "epoch": 2.84, "learning_rate": 8.924422852757e-06, "loss": 2.3545, "step": 190 }, { "epoch": 2.91, "learning_rate": 2.6969961322287634e-06, "loss": 2.4371, "step": 195 }, { "epoch": 2.99, "learning_rate": 7.53988440801922e-08, "loss": 2.2676, "step": 200 }, { "epoch": 3.0, "eval_loss": 2.7877070903778076, "eval_runtime": 3.3652, "eval_samples_per_second": 21.396, "eval_steps_per_second": 2.674, "step": 201 }, { "epoch": 3.06, "learning_rate": 1.2030695587354792e-06, "loss": 2.4161, "step": 205 }, { "epoch": 3.13, "learning_rate": 6.018308687188092e-06, "loss": 2.3933, "step": 210 }, { "epoch": 3.21, "learning_rate": 1.4257654323996543e-05, "loss": 2.2934, "step": 215 }, { "epoch": 3.28, "learning_rate": 2.5470297351865334e-05, "loss": 2.2132, "step": 220 }, { "epoch": 3.36, "learning_rate": 3.9042747097049484e-05, "loss": 1.9654, "step": 225 }, { "epoch": 3.43, "learning_rate": 5.423239797424245e-05, "loss": 2.2451, "step": 230 }, { "epoch": 3.51, "learning_rate": 7.020816054918364e-05, "loss": 2.36, "step": 235 }, { "epoch": 3.58, "learning_rate": 8.609593392333099e-05, "loss": 2.3897, "step": 240 }, { "epoch": 3.66, "learning_rate": 0.00010102643145593556, "loss": 2.1583, "step": 245 }, { "epoch": 3.73, "learning_rate": 0.00011418274307790318, "loss": 2.3118, "step": 250 }, { "epoch": 3.81, "learning_rate": 0.00012484503186399693, "loss": 2.2451, "step": 255 }, { "epoch": 3.88, "learning_rate": 0.00013242991932666616, "loss": 2.131, "step": 260 }, { "epoch": 3.96, "learning_rate": 0.00013652240449707706, "loss": 2.0297, "step": 265 }, { "epoch": 4.0, "eval_loss": 2.8032355308532715, "eval_runtime": 3.3675, "eval_samples_per_second": 21.381, "eval_steps_per_second": 2.673, "step": 268 }, { "epoch": 4.03, "learning_rate": 0.00013689857036670224, "loss": 1.9968, "step": 270 }, { "epoch": 4.1, "learning_rate": 0.0001335378353249238, "loss": 2.1578, "step": 275 }, { "epoch": 4.18, "learning_rate": 0.00012662407926517416, "loss": 2.1444, "step": 280 }, { "epoch": 4.25, "learning_rate": 0.00011653558274562858, "loss": 1.9796, "step": 285 }, { "epoch": 4.33, "learning_rate": 0.00010382432967511827, "loss": 1.8756, "step": 290 }, { "epoch": 4.4, "learning_rate": 8.918580596102339e-05, "loss": 1.9379, "step": 295 }, { "epoch": 4.48, "learning_rate": 7.342094656164722e-05, "loss": 2.1349, "step": 300 }, { "epoch": 4.55, "learning_rate": 5.739231297924882e-05, "loss": 2.048, "step": 305 }, { "epoch": 4.63, "learning_rate": 4.197689890668377e-05, "loss": 1.8569, "step": 310 }, { "epoch": 4.7, "learning_rate": 2.8018146228432437e-05, "loss": 1.8989, "step": 315 }, { "epoch": 4.78, "learning_rate": 1.6279796781627843e-05, "loss": 2.258, "step": 320 }, { "epoch": 4.85, "learning_rate": 7.404104840563317e-06, "loss": 2.0248, "step": 325 }, { "epoch": 4.93, "learning_rate": 1.8766966947178655e-06, "loss": 1.8796, "step": 330 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.8645, "step": 335 }, { "epoch": 5.0, "eval_loss": 2.7318851947784424, "eval_runtime": 3.3458, "eval_samples_per_second": 21.519, "eval_steps_per_second": 2.69, "step": 335 } ], "max_steps": 670, "num_train_epochs": 10, "total_flos": 348171632640000.0, "trial_name": null, "trial_params": null }