Llama-7B-SFT / trainer_state.json
girrajjangid's picture
End of training
558c77c
raw
history blame contribute delete
No virus
4.99 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 732,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 1.9963183634476757e-05,
"loss": 1.9326,
"step": 20
},
{
"epoch": 0.11,
"learning_rate": 1.985300562686109e-05,
"loss": 1.8276,
"step": 40
},
{
"epoch": 0.16,
"learning_rate": 1.9670277247913205e-05,
"loss": 1.8392,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 1.941634397659126e-05,
"loss": 1.3092,
"step": 80
},
{
"epoch": 0.27,
"learning_rate": 1.909307559292236e-05,
"loss": 1.1181,
"step": 100
},
{
"epoch": 0.33,
"learning_rate": 1.8702852410301556e-05,
"loss": 1.2467,
"step": 120
},
{
"epoch": 0.38,
"learning_rate": 1.8248547748594246e-05,
"loss": 1.0989,
"step": 140
},
{
"epoch": 0.44,
"learning_rate": 1.77335067770973e-05,
"loss": 1.1427,
"step": 160
},
{
"epoch": 0.49,
"learning_rate": 1.7161521883143936e-05,
"loss": 1.1778,
"step": 180
},
{
"epoch": 0.55,
"learning_rate": 1.653680474772006e-05,
"loss": 1.0765,
"step": 200
},
{
"epoch": 0.6,
"learning_rate": 1.586395533370696e-05,
"loss": 1.0176,
"step": 220
},
{
"epoch": 0.66,
"learning_rate": 1.5147928015098309e-05,
"loss": 1.1858,
"step": 240
},
{
"epoch": 0.71,
"learning_rate": 1.4393995096591415e-05,
"loss": 1.1799,
"step": 260
},
{
"epoch": 0.77,
"learning_rate": 1.3607707992167836e-05,
"loss": 1.1227,
"step": 280
},
{
"epoch": 0.82,
"learning_rate": 1.2794856348516095e-05,
"loss": 1.0963,
"step": 300
},
{
"epoch": 0.87,
"learning_rate": 1.196142541428197e-05,
"loss": 1.0744,
"step": 320
},
{
"epoch": 0.93,
"learning_rate": 1.1113551969048088e-05,
"loss": 1.1303,
"step": 340
},
{
"epoch": 0.98,
"learning_rate": 1.0257479136549889e-05,
"loss": 1.1179,
"step": 360
},
{
"epoch": 1.04,
"learning_rate": 9.399510414850518e-06,
"loss": 1.0918,
"step": 380
},
{
"epoch": 1.09,
"learning_rate": 8.545963261963102e-06,
"loss": 1.0483,
"step": 400
},
{
"epoch": 1.15,
"learning_rate": 7.703122578682047e-06,
"loss": 1.2234,
"step": 420
},
{
"epoch": 1.2,
"learning_rate": 6.877194431142055e-06,
"loss": 1.1311,
"step": 440
},
{
"epoch": 1.26,
"learning_rate": 6.074260353858283e-06,
"loss": 1.1261,
"step": 460
},
{
"epoch": 1.31,
"learning_rate": 5.300232569726805e-06,
"loss": 1.1854,
"step": 480
},
{
"epoch": 1.37,
"learning_rate": 4.560810456712754e-06,
"loss": 1.1388,
"step": 500
},
{
"epoch": 1.42,
"learning_rate": 3.86143858177388e-06,
"loss": 1.0478,
"step": 520
},
{
"epoch": 1.48,
"learning_rate": 3.207266611027069e-06,
"loss": 1.1229,
"step": 540
},
{
"epoch": 1.53,
"learning_rate": 2.6031113913503337e-06,
"loss": 0.9668,
"step": 560
},
{
"epoch": 1.58,
"learning_rate": 2.0534214826237486e-06,
"loss": 1.1179,
"step": 580
},
{
"epoch": 1.64,
"learning_rate": 1.5622444017681438e-06,
"loss": 1.0696,
"step": 600
},
{
"epoch": 1.69,
"learning_rate": 1.1331968197725985e-06,
"loss": 1.055,
"step": 620
},
{
"epoch": 1.75,
"learning_rate": 7.694379311582401e-07,
"loss": 1.058,
"step": 640
},
{
"epoch": 1.8,
"learning_rate": 4.73646191966175e-07,
"loss": 1.0865,
"step": 660
},
{
"epoch": 1.86,
"learning_rate": 2.479995975541749e-07,
"loss": 0.9857,
"step": 680
},
{
"epoch": 1.91,
"learning_rate": 9.415964542203059e-08,
"loss": 1.1078,
"step": 700
},
{
"epoch": 1.97,
"learning_rate": 1.325910115169471e-08,
"loss": 1.1117,
"step": 720
},
{
"epoch": 2.0,
"step": 732,
"total_flos": 2.256724490932224e+16,
"train_loss": 1.1766422537506604,
"train_runtime": 7172.2389,
"train_samples_per_second": 0.204,
"train_steps_per_second": 0.102
}
],
"logging_steps": 20,
"max_steps": 732,
"num_train_epochs": 2,
"save_steps": 20,
"total_flos": 2.256724490932224e+16,
"trial_name": null,
"trial_params": null
}