{ "best_metric": 1.0294359922409058, "best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs_jiansuo/checkpoint-400", "epoch": 1.9939795304033714, "global_step": 414, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9928054992195985e-05, "loss": 1.3488, "step": 10 }, { "epoch": 0.1, "learning_rate": 4.971263405551576e-05, "loss": 1.2097, "step": 20 }, { "epoch": 0.14, "learning_rate": 4.9354977066836986e-05, "loss": 1.1719, "step": 30 }, { "epoch": 0.19, "learning_rate": 4.885714255694698e-05, "loss": 1.145, "step": 40 }, { "epoch": 0.24, "learning_rate": 4.822199586246168e-05, "loss": 1.1203, "step": 50 }, { "epoch": 0.24, "eval_loss": 1.11279296875, "eval_runtime": 10.7965, "eval_samples_per_second": 18.71, "eval_steps_per_second": 1.575, "step": 50 }, { "epoch": 0.29, "learning_rate": 4.74531926340924e-05, "loss": 1.1036, "step": 60 }, { "epoch": 0.34, "learning_rate": 4.6555157796180335e-05, "loss": 1.0904, "step": 70 }, { "epoch": 0.39, "learning_rate": 4.5533060078599226e-05, "loss": 1.0818, "step": 80 }, { "epoch": 0.43, "learning_rate": 4.43927822676105e-05, "loss": 1.0829, "step": 90 }, { "epoch": 0.48, "learning_rate": 4.3140887346894974e-05, "loss": 1.0744, "step": 100 }, { "epoch": 0.48, "eval_loss": 1.065435767173767, "eval_runtime": 7.9424, "eval_samples_per_second": 25.433, "eval_steps_per_second": 2.14, "step": 100 }, { "epoch": 0.53, "learning_rate": 4.1784580723639923e-05, "loss": 1.0629, "step": 110 }, { "epoch": 0.58, "learning_rate": 4.033166875709291e-05, "loss": 1.0659, "step": 120 }, { "epoch": 0.63, "learning_rate": 3.8790513828275683e-05, "loss": 1.0523, "step": 130 }, { "epoch": 0.67, "learning_rate": 3.716998620945871e-05, "loss": 1.0397, "step": 140 }, { "epoch": 0.72, "learning_rate": 3.547941301041661e-05, "loss": 1.0435, "step": 150 }, { "epoch": 0.72, "eval_loss": 1.048127293586731, "eval_runtime": 8.0034, "eval_samples_per_second": 25.239, "eval_steps_per_second": 2.124, "step": 150 }, { "epoch": 0.77, "learning_rate": 3.372852449530922e-05, "loss": 1.0563, "step": 160 }, { "epoch": 0.82, "learning_rate": 3.1927398079167226e-05, "loss": 1.0376, "step": 170 }, { "epoch": 0.87, "learning_rate": 3.008640032631585e-05, "loss": 1.0436, "step": 180 }, { "epoch": 0.92, "learning_rate": 2.821612728457078e-05, "loss": 1.0301, "step": 190 }, { "epoch": 0.96, "learning_rate": 2.632734349861874e-05, "loss": 1.0338, "step": 200 }, { "epoch": 0.96, "eval_loss": 1.0388035774230957, "eval_runtime": 7.9339, "eval_samples_per_second": 25.46, "eval_steps_per_second": 2.143, "step": 200 }, { "epoch": 1.01, "learning_rate": 2.4430920053597356e-05, "loss": 1.033, "step": 210 }, { "epoch": 1.06, "learning_rate": 2.2537772005470782e-05, "loss": 1.0399, "step": 220 }, { "epoch": 1.11, "learning_rate": 2.0658795558326743e-05, "loss": 1.0277, "step": 230 }, { "epoch": 1.16, "learning_rate": 1.8804805350177505e-05, "loss": 1.0325, "step": 240 }, { "epoch": 1.2, "learning_rate": 1.6986472208222576e-05, "loss": 1.0322, "step": 250 }, { "epoch": 1.2, "eval_loss": 1.0336002111434937, "eval_runtime": 7.9742, "eval_samples_per_second": 25.332, "eval_steps_per_second": 2.132, "step": 250 }, { "epoch": 1.25, "learning_rate": 1.5214261731829022e-05, "loss": 1.0248, "step": 260 }, { "epoch": 1.3, "learning_rate": 1.3498374056721197e-05, "loss": 1.0235, "step": 270 }, { "epoch": 1.35, "learning_rate": 1.1848685147073222e-05, "loss": 1.0446, "step": 280 }, { "epoch": 1.4, "learning_rate": 1.0274689953403407e-05, "loss": 1.0271, "step": 290 }, { "epoch": 1.44, "learning_rate": 8.785447763431101e-06, "loss": 1.0351, "step": 300 }, { "epoch": 1.44, "eval_loss": 1.031198263168335, "eval_runtime": 7.9378, "eval_samples_per_second": 25.448, "eval_steps_per_second": 2.142, "step": 300 }, { "epoch": 1.49, "learning_rate": 7.389530060434696e-06, "loss": 1.0341, "step": 310 }, { "epoch": 1.54, "learning_rate": 6.094971189217042e-06, "loss": 1.0267, "step": 320 }, { "epoch": 1.59, "learning_rate": 4.9092221136255444e-06, "loss": 1.0122, "step": 330 }, { "epoch": 1.64, "learning_rate": 3.839107531779978e-06, "loss": 1.0197, "step": 340 }, { "epoch": 1.69, "learning_rate": 2.890786595835693e-06, "loss": 1.0193, "step": 350 }, { "epoch": 1.69, "eval_loss": 1.0297337770462036, "eval_runtime": 7.9869, "eval_samples_per_second": 25.292, "eval_steps_per_second": 2.128, "step": 350 }, { "epoch": 1.73, "learning_rate": 2.0697174623636794e-06, "loss": 1.0231, "step": 360 }, { "epoch": 1.78, "learning_rate": 1.3806258773811476e-06, "loss": 1.0233, "step": 370 }, { "epoch": 1.83, "learning_rate": 8.274779768448482e-07, "loss": 1.0224, "step": 380 }, { "epoch": 1.88, "learning_rate": 4.134574591564494e-07, "loss": 1.0181, "step": 390 }, { "epoch": 1.93, "learning_rate": 1.4094726106603505e-07, "loss": 1.0189, "step": 400 }, { "epoch": 1.93, "eval_loss": 1.0294359922409058, "eval_runtime": 7.9102, "eval_samples_per_second": 25.537, "eval_steps_per_second": 2.149, "step": 400 }, { "epoch": 1.97, "learning_rate": 1.1515842439871472e-08, "loss": 1.0244, "step": 410 }, { "epoch": 1.99, "step": 414, "total_flos": 3.076881712207102e+18, "train_loss": 1.0595260426618052, "train_runtime": 5379.5583, "train_samples_per_second": 7.407, "train_steps_per_second": 0.077 } ], "max_steps": 414, "num_train_epochs": 2, "total_flos": 3.076881712207102e+18, "trial_name": null, "trial_params": null }