insft50e_llama7b / checkpoint-500 /trainer_state.json
sallywww's picture
commit
c71674f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.76923076923077,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.23,
"learning_rate": 0.000975,
"loss": 1.1621,
"step": 20
},
{
"epoch": 2.46,
"learning_rate": 0.00095,
"loss": 0.7214,
"step": 40
},
{
"epoch": 3.69,
"learning_rate": 0.000925,
"loss": 0.5522,
"step": 60
},
{
"epoch": 4.92,
"learning_rate": 0.0009000000000000001,
"loss": 0.4059,
"step": 80
},
{
"epoch": 6.15,
"learning_rate": 0.000875,
"loss": 0.3237,
"step": 100
},
{
"epoch": 7.38,
"learning_rate": 0.00085,
"loss": 0.249,
"step": 120
},
{
"epoch": 8.62,
"learning_rate": 0.000825,
"loss": 0.1859,
"step": 140
},
{
"epoch": 9.85,
"learning_rate": 0.0008,
"loss": 0.1988,
"step": 160
},
{
"epoch": 11.08,
"learning_rate": 0.0007750000000000001,
"loss": 0.1458,
"step": 180
},
{
"epoch": 12.31,
"learning_rate": 0.00075,
"loss": 0.1279,
"step": 200
},
{
"epoch": 13.54,
"learning_rate": 0.000725,
"loss": 0.1002,
"step": 220
},
{
"epoch": 14.77,
"learning_rate": 0.0007,
"loss": 0.1024,
"step": 240
},
{
"epoch": 16.0,
"learning_rate": 0.000675,
"loss": 0.0808,
"step": 260
},
{
"epoch": 17.23,
"learning_rate": 0.0006500000000000001,
"loss": 0.0743,
"step": 280
},
{
"epoch": 18.46,
"learning_rate": 0.000625,
"loss": 0.0638,
"step": 300
},
{
"epoch": 19.69,
"learning_rate": 0.0006,
"loss": 0.0444,
"step": 320
},
{
"epoch": 20.92,
"learning_rate": 0.000575,
"loss": 0.0441,
"step": 340
},
{
"epoch": 22.15,
"learning_rate": 0.00055,
"loss": 0.035,
"step": 360
},
{
"epoch": 23.38,
"learning_rate": 0.0005250000000000001,
"loss": 0.0305,
"step": 380
},
{
"epoch": 24.62,
"learning_rate": 0.0005,
"loss": 0.0272,
"step": 400
},
{
"epoch": 25.85,
"learning_rate": 0.000475,
"loss": 0.0252,
"step": 420
},
{
"epoch": 27.08,
"learning_rate": 0.00045000000000000004,
"loss": 0.021,
"step": 440
},
{
"epoch": 28.31,
"learning_rate": 0.000425,
"loss": 0.0188,
"step": 460
},
{
"epoch": 29.54,
"learning_rate": 0.0004,
"loss": 0.0186,
"step": 480
},
{
"epoch": 30.77,
"learning_rate": 0.000375,
"loss": 0.0178,
"step": 500
}
],
"max_steps": 800,
"num_train_epochs": 50,
"total_flos": 6.497819467776e+17,
"trial_name": null,
"trial_params": null
}