Go4miii's picture
commit from root
0a2afe7
{
"best_metric": 1.0294359922409058,
"best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs_jiansuo/checkpoint-400",
"epoch": 1.9939795304033714,
"global_step": 414,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 4.9928054992195985e-05,
"loss": 1.3488,
"step": 10
},
{
"epoch": 0.1,
"learning_rate": 4.971263405551576e-05,
"loss": 1.2097,
"step": 20
},
{
"epoch": 0.14,
"learning_rate": 4.9354977066836986e-05,
"loss": 1.1719,
"step": 30
},
{
"epoch": 0.19,
"learning_rate": 4.885714255694698e-05,
"loss": 1.145,
"step": 40
},
{
"epoch": 0.24,
"learning_rate": 4.822199586246168e-05,
"loss": 1.1203,
"step": 50
},
{
"epoch": 0.24,
"eval_loss": 1.11279296875,
"eval_runtime": 10.7965,
"eval_samples_per_second": 18.71,
"eval_steps_per_second": 1.575,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 4.74531926340924e-05,
"loss": 1.1036,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 4.6555157796180335e-05,
"loss": 1.0904,
"step": 70
},
{
"epoch": 0.39,
"learning_rate": 4.5533060078599226e-05,
"loss": 1.0818,
"step": 80
},
{
"epoch": 0.43,
"learning_rate": 4.43927822676105e-05,
"loss": 1.0829,
"step": 90
},
{
"epoch": 0.48,
"learning_rate": 4.3140887346894974e-05,
"loss": 1.0744,
"step": 100
},
{
"epoch": 0.48,
"eval_loss": 1.065435767173767,
"eval_runtime": 7.9424,
"eval_samples_per_second": 25.433,
"eval_steps_per_second": 2.14,
"step": 100
},
{
"epoch": 0.53,
"learning_rate": 4.1784580723639923e-05,
"loss": 1.0629,
"step": 110
},
{
"epoch": 0.58,
"learning_rate": 4.033166875709291e-05,
"loss": 1.0659,
"step": 120
},
{
"epoch": 0.63,
"learning_rate": 3.8790513828275683e-05,
"loss": 1.0523,
"step": 130
},
{
"epoch": 0.67,
"learning_rate": 3.716998620945871e-05,
"loss": 1.0397,
"step": 140
},
{
"epoch": 0.72,
"learning_rate": 3.547941301041661e-05,
"loss": 1.0435,
"step": 150
},
{
"epoch": 0.72,
"eval_loss": 1.048127293586731,
"eval_runtime": 8.0034,
"eval_samples_per_second": 25.239,
"eval_steps_per_second": 2.124,
"step": 150
},
{
"epoch": 0.77,
"learning_rate": 3.372852449530922e-05,
"loss": 1.0563,
"step": 160
},
{
"epoch": 0.82,
"learning_rate": 3.1927398079167226e-05,
"loss": 1.0376,
"step": 170
},
{
"epoch": 0.87,
"learning_rate": 3.008640032631585e-05,
"loss": 1.0436,
"step": 180
},
{
"epoch": 0.92,
"learning_rate": 2.821612728457078e-05,
"loss": 1.0301,
"step": 190
},
{
"epoch": 0.96,
"learning_rate": 2.632734349861874e-05,
"loss": 1.0338,
"step": 200
},
{
"epoch": 0.96,
"eval_loss": 1.0388035774230957,
"eval_runtime": 7.9339,
"eval_samples_per_second": 25.46,
"eval_steps_per_second": 2.143,
"step": 200
},
{
"epoch": 1.01,
"learning_rate": 2.4430920053597356e-05,
"loss": 1.033,
"step": 210
},
{
"epoch": 1.06,
"learning_rate": 2.2537772005470782e-05,
"loss": 1.0399,
"step": 220
},
{
"epoch": 1.11,
"learning_rate": 2.0658795558326743e-05,
"loss": 1.0277,
"step": 230
},
{
"epoch": 1.16,
"learning_rate": 1.8804805350177505e-05,
"loss": 1.0325,
"step": 240
},
{
"epoch": 1.2,
"learning_rate": 1.6986472208222576e-05,
"loss": 1.0322,
"step": 250
},
{
"epoch": 1.2,
"eval_loss": 1.0336002111434937,
"eval_runtime": 7.9742,
"eval_samples_per_second": 25.332,
"eval_steps_per_second": 2.132,
"step": 250
},
{
"epoch": 1.25,
"learning_rate": 1.5214261731829022e-05,
"loss": 1.0248,
"step": 260
},
{
"epoch": 1.3,
"learning_rate": 1.3498374056721197e-05,
"loss": 1.0235,
"step": 270
},
{
"epoch": 1.35,
"learning_rate": 1.1848685147073222e-05,
"loss": 1.0446,
"step": 280
},
{
"epoch": 1.4,
"learning_rate": 1.0274689953403407e-05,
"loss": 1.0271,
"step": 290
},
{
"epoch": 1.44,
"learning_rate": 8.785447763431101e-06,
"loss": 1.0351,
"step": 300
},
{
"epoch": 1.44,
"eval_loss": 1.031198263168335,
"eval_runtime": 7.9378,
"eval_samples_per_second": 25.448,
"eval_steps_per_second": 2.142,
"step": 300
},
{
"epoch": 1.49,
"learning_rate": 7.389530060434696e-06,
"loss": 1.0341,
"step": 310
},
{
"epoch": 1.54,
"learning_rate": 6.094971189217042e-06,
"loss": 1.0267,
"step": 320
},
{
"epoch": 1.59,
"learning_rate": 4.9092221136255444e-06,
"loss": 1.0122,
"step": 330
},
{
"epoch": 1.64,
"learning_rate": 3.839107531779978e-06,
"loss": 1.0197,
"step": 340
},
{
"epoch": 1.69,
"learning_rate": 2.890786595835693e-06,
"loss": 1.0193,
"step": 350
},
{
"epoch": 1.69,
"eval_loss": 1.0297337770462036,
"eval_runtime": 7.9869,
"eval_samples_per_second": 25.292,
"eval_steps_per_second": 2.128,
"step": 350
},
{
"epoch": 1.73,
"learning_rate": 2.0697174623636794e-06,
"loss": 1.0231,
"step": 360
},
{
"epoch": 1.78,
"learning_rate": 1.3806258773811476e-06,
"loss": 1.0233,
"step": 370
},
{
"epoch": 1.83,
"learning_rate": 8.274779768448482e-07,
"loss": 1.0224,
"step": 380
},
{
"epoch": 1.88,
"learning_rate": 4.134574591564494e-07,
"loss": 1.0181,
"step": 390
},
{
"epoch": 1.93,
"learning_rate": 1.4094726106603505e-07,
"loss": 1.0189,
"step": 400
},
{
"epoch": 1.93,
"eval_loss": 1.0294359922409058,
"eval_runtime": 7.9102,
"eval_samples_per_second": 25.537,
"eval_steps_per_second": 2.149,
"step": 400
},
{
"epoch": 1.97,
"learning_rate": 1.1515842439871472e-08,
"loss": 1.0244,
"step": 410
},
{
"epoch": 1.99,
"step": 414,
"total_flos": 3.076881712207102e+18,
"train_loss": 1.0595260426618052,
"train_runtime": 5379.5583,
"train_samples_per_second": 7.407,
"train_steps_per_second": 0.077
}
],
"max_steps": 414,
"num_train_epochs": 2,
"total_flos": 3.076881712207102e+18,
"trial_name": null,
"trial_params": null
}