test_mistral_tool_1536ctx / trainer_state.json
hoang14's picture
Upload 12 files
4102053
raw
history blame contribute delete
No virus
5.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.170731707317073,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 8e-05,
"loss": 0.5555,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 8e-05,
"loss": 0.4856,
"step": 40
},
{
"epoch": 0.08,
"learning_rate": 8e-05,
"loss": 0.427,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 8e-05,
"loss": 0.4319,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 8e-05,
"loss": 0.4089,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 8e-05,
"loss": 0.3895,
"step": 120
},
{
"epoch": 0.18,
"learning_rate": 8e-05,
"loss": 0.3859,
"step": 140
},
{
"epoch": 0.21,
"learning_rate": 8e-05,
"loss": 0.3714,
"step": 160
},
{
"epoch": 0.23,
"learning_rate": 8e-05,
"loss": 0.3563,
"step": 180
},
{
"epoch": 0.26,
"learning_rate": 8e-05,
"loss": 0.3631,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 8e-05,
"loss": 0.3691,
"step": 220
},
{
"epoch": 0.31,
"learning_rate": 8e-05,
"loss": 0.3657,
"step": 240
},
{
"epoch": 0.34,
"learning_rate": 8e-05,
"loss": 0.3652,
"step": 260
},
{
"epoch": 0.36,
"learning_rate": 8e-05,
"loss": 0.3474,
"step": 280
},
{
"epoch": 0.39,
"learning_rate": 8e-05,
"loss": 0.3553,
"step": 300
},
{
"epoch": 0.42,
"learning_rate": 8e-05,
"loss": 0.3438,
"step": 320
},
{
"epoch": 0.44,
"learning_rate": 8e-05,
"loss": 0.3713,
"step": 340
},
{
"epoch": 0.47,
"learning_rate": 8e-05,
"loss": 0.3454,
"step": 360
},
{
"epoch": 0.49,
"learning_rate": 8e-05,
"loss": 0.3594,
"step": 380
},
{
"epoch": 0.52,
"learning_rate": 8e-05,
"loss": 0.3613,
"step": 400
},
{
"epoch": 0.55,
"learning_rate": 8e-05,
"loss": 0.3329,
"step": 420
},
{
"epoch": 0.57,
"learning_rate": 8e-05,
"loss": 0.3466,
"step": 440
},
{
"epoch": 0.6,
"learning_rate": 8e-05,
"loss": 0.3408,
"step": 460
},
{
"epoch": 0.62,
"learning_rate": 8e-05,
"loss": 0.3393,
"step": 480
},
{
"epoch": 0.65,
"learning_rate": 8e-05,
"loss": 0.338,
"step": 500
},
{
"epoch": 0.68,
"learning_rate": 8e-05,
"loss": 0.328,
"step": 520
},
{
"epoch": 0.7,
"learning_rate": 8e-05,
"loss": 0.3292,
"step": 540
},
{
"epoch": 0.73,
"learning_rate": 8e-05,
"loss": 0.3321,
"step": 560
},
{
"epoch": 0.75,
"learning_rate": 8e-05,
"loss": 0.3296,
"step": 580
},
{
"epoch": 0.78,
"learning_rate": 8e-05,
"loss": 0.3328,
"step": 600
},
{
"epoch": 0.81,
"learning_rate": 8e-05,
"loss": 0.3197,
"step": 620
},
{
"epoch": 0.83,
"learning_rate": 8e-05,
"loss": 0.3401,
"step": 640
},
{
"epoch": 0.86,
"learning_rate": 8e-05,
"loss": 0.3211,
"step": 660
},
{
"epoch": 0.88,
"learning_rate": 8e-05,
"loss": 0.3383,
"step": 680
},
{
"epoch": 0.91,
"learning_rate": 8e-05,
"loss": 0.3053,
"step": 700
},
{
"epoch": 0.94,
"learning_rate": 8e-05,
"loss": 0.3252,
"step": 720
},
{
"epoch": 0.96,
"learning_rate": 8e-05,
"loss": 0.3228,
"step": 740
},
{
"epoch": 0.99,
"learning_rate": 8e-05,
"loss": 0.333,
"step": 760
},
{
"epoch": 1.01,
"learning_rate": 8e-05,
"loss": 0.3095,
"step": 780
},
{
"epoch": 1.04,
"learning_rate": 8e-05,
"loss": 0.3179,
"step": 800
},
{
"epoch": 1.07,
"learning_rate": 8e-05,
"loss": 0.2828,
"step": 820
},
{
"epoch": 1.09,
"learning_rate": 8e-05,
"loss": 0.2855,
"step": 840
},
{
"epoch": 1.12,
"learning_rate": 8e-05,
"loss": 0.3027,
"step": 860
},
{
"epoch": 1.14,
"learning_rate": 8e-05,
"loss": 0.2984,
"step": 880
},
{
"epoch": 1.17,
"learning_rate": 8e-05,
"loss": 0.2772,
"step": 900
}
],
"logging_steps": 20,
"max_steps": 1536,
"num_train_epochs": 2,
"save_steps": 150,
"total_flos": 2.4621551714304e+17,
"trial_name": null,
"trial_params": null
}