{ "best_metric": 1.6997302770614624, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaprosocial_dialog/checkpoint-297", "epoch": 2.57212449255751, "global_step": 297, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_loss": 1.9915313720703125, "eval_runtime": 22.7483, "eval_samples_per_second": 87.919, "eval_steps_per_second": 1.407, "step": 11 }, { "epoch": 0.17, "learning_rate": 0.000292814371257485, "loss": 2.0439, "step": 20 }, { "epoch": 0.19, "eval_loss": 1.8029985427856445, "eval_runtime": 22.7395, "eval_samples_per_second": 87.953, "eval_steps_per_second": 1.407, "step": 22 }, { "epoch": 0.29, "eval_loss": 1.7677525281906128, "eval_runtime": 22.7246, "eval_samples_per_second": 88.01, "eval_steps_per_second": 1.408, "step": 33 }, { "epoch": 0.35, "learning_rate": 0.00027485029940119756, "loss": 1.7686, "step": 40 }, { "epoch": 0.38, "eval_loss": 1.7513495683670044, "eval_runtime": 22.772, "eval_samples_per_second": 87.827, "eval_steps_per_second": 1.405, "step": 44 }, { "epoch": 0.48, "eval_loss": 1.7416645288467407, "eval_runtime": 22.7234, "eval_samples_per_second": 88.015, "eval_steps_per_second": 1.408, "step": 55 }, { "epoch": 0.52, "learning_rate": 0.00025688622754491017, "loss": 1.737, "step": 60 }, { "epoch": 0.57, "eval_loss": 1.734699010848999, "eval_runtime": 22.7372, "eval_samples_per_second": 87.962, "eval_steps_per_second": 1.407, "step": 66 }, { "epoch": 0.67, "eval_loss": 1.72977614402771, "eval_runtime": 22.74, "eval_samples_per_second": 87.951, "eval_steps_per_second": 1.407, "step": 77 }, { "epoch": 0.69, "learning_rate": 0.00023892215568862272, "loss": 1.7289, "step": 80 }, { "epoch": 0.76, "eval_loss": 1.725810170173645, "eval_runtime": 22.7407, "eval_samples_per_second": 87.948, "eval_steps_per_second": 1.407, "step": 88 }, { "epoch": 0.86, "eval_loss": 1.7222310304641724, "eval_runtime": 22.733, "eval_samples_per_second": 87.978, "eval_steps_per_second": 1.408, "step": 99 }, { "epoch": 0.87, "learning_rate": 0.00022095808383233533, "loss": 1.7172, "step": 100 }, { "epoch": 0.95, "eval_loss": 1.7189042568206787, "eval_runtime": 22.7271, "eval_samples_per_second": 88.001, "eval_steps_per_second": 1.408, "step": 110 }, { "epoch": 1.04, "learning_rate": 0.00020299401197604788, "loss": 1.7127, "step": 120 }, { "epoch": 1.05, "eval_loss": 1.7177015542984009, "eval_runtime": 22.7303, "eval_samples_per_second": 87.988, "eval_steps_per_second": 1.408, "step": 121 }, { "epoch": 1.14, "eval_loss": 1.7150254249572754, "eval_runtime": 22.7281, "eval_samples_per_second": 87.997, "eval_steps_per_second": 1.408, "step": 132 }, { "epoch": 1.21, "learning_rate": 0.00018502994011976046, "loss": 1.7017, "step": 140 }, { "epoch": 1.24, "eval_loss": 1.7136248350143433, "eval_runtime": 22.732, "eval_samples_per_second": 87.982, "eval_steps_per_second": 1.408, "step": 143 }, { "epoch": 1.33, "eval_loss": 1.7119090557098389, "eval_runtime": 22.7069, "eval_samples_per_second": 88.079, "eval_steps_per_second": 1.409, "step": 154 }, { "epoch": 1.39, "learning_rate": 0.00016706586826347302, "loss": 1.7003, "step": 160 }, { "epoch": 1.43, "eval_loss": 1.7102028131484985, "eval_runtime": 22.9016, "eval_samples_per_second": 87.33, "eval_steps_per_second": 1.397, "step": 165 }, { "epoch": 1.52, "eval_loss": 1.708575963973999, "eval_runtime": 22.7175, "eval_samples_per_second": 88.038, "eval_steps_per_second": 1.409, "step": 176 }, { "epoch": 1.56, "learning_rate": 0.0001491017964071856, "loss": 1.7025, "step": 180 }, { "epoch": 1.62, "eval_loss": 1.7076679468154907, "eval_runtime": 22.792, "eval_samples_per_second": 87.75, "eval_steps_per_second": 1.404, "step": 187 }, { "epoch": 1.71, "eval_loss": 1.7062067985534668, "eval_runtime": 22.7054, "eval_samples_per_second": 88.085, "eval_steps_per_second": 1.409, "step": 198 }, { "epoch": 1.73, "learning_rate": 0.0001311377245508982, "loss": 1.6976, "step": 200 }, { "epoch": 1.81, "eval_loss": 1.7049460411071777, "eval_runtime": 22.765, "eval_samples_per_second": 87.854, "eval_steps_per_second": 1.406, "step": 209 }, { "epoch": 1.91, "learning_rate": 0.00011317365269461076, "loss": 1.6955, "step": 220 }, { "epoch": 1.91, "eval_loss": 1.704172134399414, "eval_runtime": 22.8464, "eval_samples_per_second": 87.541, "eval_steps_per_second": 1.401, "step": 220 }, { "epoch": 2.0, "eval_loss": 1.7033213376998901, "eval_runtime": 22.7378, "eval_samples_per_second": 87.959, "eval_steps_per_second": 1.407, "step": 231 }, { "epoch": 2.08, "learning_rate": 9.520958083832335e-05, "loss": 1.6893, "step": 240 }, { "epoch": 2.1, "eval_loss": 1.7023844718933105, "eval_runtime": 22.7421, "eval_samples_per_second": 87.942, "eval_steps_per_second": 1.407, "step": 242 }, { "epoch": 2.19, "eval_loss": 1.7013720273971558, "eval_runtime": 22.7212, "eval_samples_per_second": 88.023, "eval_steps_per_second": 1.408, "step": 253 }, { "epoch": 2.25, "learning_rate": 7.724550898203592e-05, "loss": 1.6853, "step": 260 }, { "epoch": 2.29, "eval_loss": 1.7013109922409058, "eval_runtime": 22.7306, "eval_samples_per_second": 87.987, "eval_steps_per_second": 1.408, "step": 264 }, { "epoch": 2.38, "eval_loss": 1.7006632089614868, "eval_runtime": 22.748, "eval_samples_per_second": 87.92, "eval_steps_per_second": 1.407, "step": 275 }, { "epoch": 2.42, "learning_rate": 5.92814371257485e-05, "loss": 1.6877, "step": 280 }, { "epoch": 2.48, "eval_loss": 1.7002646923065186, "eval_runtime": 22.7267, "eval_samples_per_second": 88.002, "eval_steps_per_second": 1.408, "step": 286 }, { "epoch": 2.57, "eval_loss": 1.6997302770614624, "eval_runtime": 22.7256, "eval_samples_per_second": 88.006, "eval_steps_per_second": 1.408, "step": 297 } ], "max_steps": 345, "num_train_epochs": 3, "total_flos": 2.268056979475792e+18, "trial_name": null, "trial_params": null }