|
{ |
|
"best_metric": 1.0294359922409058, |
|
"best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs_jiansuo/checkpoint-400", |
|
"epoch": 1.9939795304033714, |
|
"global_step": 414, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9928054992195985e-05, |
|
"loss": 1.3488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.971263405551576e-05, |
|
"loss": 1.2097, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9354977066836986e-05, |
|
"loss": 1.1719, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.885714255694698e-05, |
|
"loss": 1.145, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.822199586246168e-05, |
|
"loss": 1.1203, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.11279296875, |
|
"eval_runtime": 10.7965, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 1.575, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.74531926340924e-05, |
|
"loss": 1.1036, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6555157796180335e-05, |
|
"loss": 1.0904, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.5533060078599226e-05, |
|
"loss": 1.0818, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.43927822676105e-05, |
|
"loss": 1.0829, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.3140887346894974e-05, |
|
"loss": 1.0744, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.065435767173767, |
|
"eval_runtime": 7.9424, |
|
"eval_samples_per_second": 25.433, |
|
"eval_steps_per_second": 2.14, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1784580723639923e-05, |
|
"loss": 1.0629, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.033166875709291e-05, |
|
"loss": 1.0659, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8790513828275683e-05, |
|
"loss": 1.0523, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.716998620945871e-05, |
|
"loss": 1.0397, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.547941301041661e-05, |
|
"loss": 1.0435, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.048127293586731, |
|
"eval_runtime": 8.0034, |
|
"eval_samples_per_second": 25.239, |
|
"eval_steps_per_second": 2.124, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.372852449530922e-05, |
|
"loss": 1.0563, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.1927398079167226e-05, |
|
"loss": 1.0376, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.008640032631585e-05, |
|
"loss": 1.0436, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.821612728457078e-05, |
|
"loss": 1.0301, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.632734349861874e-05, |
|
"loss": 1.0338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.0388035774230957, |
|
"eval_runtime": 7.9339, |
|
"eval_samples_per_second": 25.46, |
|
"eval_steps_per_second": 2.143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4430920053597356e-05, |
|
"loss": 1.033, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2537772005470782e-05, |
|
"loss": 1.0399, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 1.0277, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8804805350177505e-05, |
|
"loss": 1.0325, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.6986472208222576e-05, |
|
"loss": 1.0322, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.0336002111434937, |
|
"eval_runtime": 7.9742, |
|
"eval_samples_per_second": 25.332, |
|
"eval_steps_per_second": 2.132, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.5214261731829022e-05, |
|
"loss": 1.0248, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.3498374056721197e-05, |
|
"loss": 1.0235, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1848685147073222e-05, |
|
"loss": 1.0446, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0274689953403407e-05, |
|
"loss": 1.0271, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.785447763431101e-06, |
|
"loss": 1.0351, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.031198263168335, |
|
"eval_runtime": 7.9378, |
|
"eval_samples_per_second": 25.448, |
|
"eval_steps_per_second": 2.142, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.389530060434696e-06, |
|
"loss": 1.0341, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.094971189217042e-06, |
|
"loss": 1.0267, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.9092221136255444e-06, |
|
"loss": 1.0122, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.839107531779978e-06, |
|
"loss": 1.0197, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.890786595835693e-06, |
|
"loss": 1.0193, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 1.0297337770462036, |
|
"eval_runtime": 7.9869, |
|
"eval_samples_per_second": 25.292, |
|
"eval_steps_per_second": 2.128, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.0697174623636794e-06, |
|
"loss": 1.0231, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.3806258773811476e-06, |
|
"loss": 1.0233, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.274779768448482e-07, |
|
"loss": 1.0224, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.134574591564494e-07, |
|
"loss": 1.0181, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4094726106603505e-07, |
|
"loss": 1.0189, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 1.0294359922409058, |
|
"eval_runtime": 7.9102, |
|
"eval_samples_per_second": 25.537, |
|
"eval_steps_per_second": 2.149, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.1515842439871472e-08, |
|
"loss": 1.0244, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"step": 414, |
|
"total_flos": 3.076881712207102e+18, |
|
"train_loss": 1.0595260426618052, |
|
"train_runtime": 5379.5583, |
|
"train_samples_per_second": 7.407, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"max_steps": 414, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.076881712207102e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|