phi-2-oasst1 / checkpoint-500 /trainer_state.json
TharunSivamani's picture
final commit
6a33f9a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.06364562118126273,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 1.3744,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 1.4719,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 1.8097,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 1.868,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 2.0557,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 1.4231,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 1.3828,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 1.7284,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 2.14,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 2.3983,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 0.0002,
"loss": 1.8301,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.2601,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.4198,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.8985,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.7728,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.1191,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.3876,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.528,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 2.067,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.8507,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.4472,
"step": 210
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.5308,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.2405,
"step": 230
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.8404,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 2.0961,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.1615,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.1319,
"step": 270
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 1.3976,
"step": 280
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 1.8732,
"step": 290
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 2.1415,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 1.7423,
"step": 310
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 1.5519,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 1.4975,
"step": 330
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 2.2549,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 2.3298,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.1939,
"step": 360
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.7077,
"step": 370
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.9593,
"step": 380
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.9703,
"step": 390
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.958,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.3889,
"step": 410
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.4975,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 1.9328,
"step": 430
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 2.1274,
"step": 440
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 2.0546,
"step": 450
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.2364,
"step": 460
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.3099,
"step": 470
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.2687,
"step": 480
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.8152,
"step": 490
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 2.1546,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 1971890728980480.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}