trained_llama_stanford_format / trainer_state.json
sallywww's picture
add
ca724f4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.692307692307693,
"global_step": 90,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.31,
"learning_rate": 6.666666666666667e-06,
"loss": 3.768,
"step": 1
},
{
"epoch": 0.62,
"learning_rate": 1.3333333333333333e-05,
"loss": 12.368,
"step": 2
},
{
"epoch": 0.92,
"learning_rate": 2e-05,
"loss": 11.537,
"step": 3
},
{
"epoch": 1.23,
"learning_rate": 1.999348095389677e-05,
"loss": 7.8117,
"step": 4
},
{
"epoch": 1.54,
"learning_rate": 1.9973932315179502e-05,
"loss": 5.6659,
"step": 5
},
{
"epoch": 1.85,
"learning_rate": 1.9941379571543597e-05,
"loss": 4.2924,
"step": 6
},
{
"epoch": 2.15,
"learning_rate": 1.9895865165556375e-05,
"loss": 2.1566,
"step": 7
},
{
"epoch": 2.46,
"learning_rate": 1.9837448439320027e-05,
"loss": 2.607,
"step": 8
},
{
"epoch": 2.77,
"learning_rate": 1.976620555710087e-05,
"loss": 2.6508,
"step": 9
},
{
"epoch": 3.08,
"learning_rate": 1.9682229406025635e-05,
"loss": 3.4377,
"step": 10
},
{
"epoch": 3.38,
"learning_rate": 1.9585629474974413e-05,
"loss": 2.3918,
"step": 11
},
{
"epoch": 3.69,
"learning_rate": 1.9476531711828027e-05,
"loss": 0.9541,
"step": 12
},
{
"epoch": 4.0,
"learning_rate": 1.935507835925601e-05,
"loss": 0.2939,
"step": 13
},
{
"epoch": 4.31,
"learning_rate": 1.9221427769259333e-05,
"loss": 0.717,
"step": 14
},
{
"epoch": 4.62,
"learning_rate": 1.9075754196709574e-05,
"loss": 0.3357,
"step": 15
},
{
"epoch": 4.92,
"learning_rate": 1.8918247572153822e-05,
"loss": 0.2568,
"step": 16
},
{
"epoch": 5.23,
"learning_rate": 1.8749113254181498e-05,
"loss": 0.1306,
"step": 17
},
{
"epoch": 5.54,
"learning_rate": 1.8568571761675893e-05,
"loss": 0.3485,
"step": 18
},
{
"epoch": 5.85,
"learning_rate": 1.837685848629965e-05,
"loss": 0.1659,
"step": 19
},
{
"epoch": 6.15,
"learning_rate": 1.817422338558892e-05,
"loss": 0.6525,
"step": 20
},
{
"epoch": 6.46,
"learning_rate": 1.796093065705644e-05,
"loss": 0.1003,
"step": 21
},
{
"epoch": 6.77,
"learning_rate": 1.7737258393728363e-05,
"loss": 0.3485,
"step": 22
},
{
"epoch": 7.08,
"learning_rate": 1.7503498221564026e-05,
"loss": 0.2097,
"step": 23
},
{
"epoch": 7.38,
"learning_rate": 1.725995491923131e-05,
"loss": 0.1424,
"step": 24
},
{
"epoch": 7.69,
"learning_rate": 1.7006946020733426e-05,
"loss": 0.1512,
"step": 25
},
{
"epoch": 8.0,
"learning_rate": 1.6744801401405138e-05,
"loss": 0.0815,
"step": 26
},
{
"epoch": 8.31,
"learning_rate": 1.647386284781828e-05,
"loss": 0.0202,
"step": 27
},
{
"epoch": 8.62,
"learning_rate": 1.6194483612157232e-05,
"loss": 0.0909,
"step": 28
},
{
"epoch": 8.92,
"learning_rate": 1.590702795164551e-05,
"loss": 0.1048,
"step": 29
},
{
"epoch": 9.23,
"learning_rate": 1.5611870653623826e-05,
"loss": 0.1186,
"step": 30
},
{
"epoch": 9.54,
"learning_rate": 1.530939654689887e-05,
"loss": 0.0219,
"step": 31
},
{
"epoch": 9.85,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.0718,
"step": 32
},
{
"epoch": 10.15,
"learning_rate": 1.4684084406997903e-05,
"loss": 0.0788,
"step": 33
},
{
"epoch": 10.46,
"learning_rate": 1.4362061661555675e-05,
"loss": 0.0534,
"step": 34
},
{
"epoch": 10.77,
"learning_rate": 1.4034351619898088e-05,
"loss": 0.0225,
"step": 35
},
{
"epoch": 11.08,
"learning_rate": 1.3701381553399147e-05,
"loss": 0.0345,
"step": 36
},
{
"epoch": 11.38,
"learning_rate": 1.3363585591501751e-05,
"loss": 0.0125,
"step": 37
},
{
"epoch": 11.69,
"learning_rate": 1.3021404155695728e-05,
"loss": 0.0211,
"step": 38
},
{
"epoch": 12.0,
"learning_rate": 1.2675283385292212e-05,
"loss": 0.0998,
"step": 39
},
{
"epoch": 12.31,
"learning_rate": 1.2325674555743106e-05,
"loss": 0.0139,
"step": 40
},
{
"epoch": 12.62,
"learning_rate": 1.1973033490264e-05,
"loss": 0.0052,
"step": 41
},
{
"epoch": 12.92,
"learning_rate": 1.161781996552765e-05,
"loss": 0.0069,
"step": 42
},
{
"epoch": 13.23,
"learning_rate": 1.1260497112202895e-05,
"loss": 0.1117,
"step": 43
},
{
"epoch": 13.54,
"learning_rate": 1.0901530811120655e-05,
"loss": 0.0081,
"step": 44
},
{
"epoch": 13.85,
"learning_rate": 1.0541389085854177e-05,
"loss": 0.009,
"step": 45
},
{
"epoch": 14.15,
"learning_rate": 1.0180541492505605e-05,
"loss": 0.0019,
"step": 46
},
{
"epoch": 14.46,
"learning_rate": 9.819458507494395e-06,
"loss": 0.0309,
"step": 47
},
{
"epoch": 14.77,
"learning_rate": 9.458610914145826e-06,
"loss": 0.0016,
"step": 48
},
{
"epoch": 15.08,
"learning_rate": 9.098469188879348e-06,
"loss": 0.0158,
"step": 49
},
{
"epoch": 15.38,
"learning_rate": 8.739502887797108e-06,
"loss": 0.0013,
"step": 50
},
{
"epoch": 15.69,
"learning_rate": 8.382180034472353e-06,
"loss": 0.0056,
"step": 51
},
{
"epoch": 16.0,
"learning_rate": 8.026966509736001e-06,
"loss": 0.0311,
"step": 52
},
{
"epoch": 16.31,
"learning_rate": 7.674325444256899e-06,
"loss": 0.0198,
"step": 53
},
{
"epoch": 16.62,
"learning_rate": 7.324716614707794e-06,
"loss": 0.0032,
"step": 54
},
{
"epoch": 16.92,
"learning_rate": 6.978595844304272e-06,
"loss": 0.0011,
"step": 55
},
{
"epoch": 17.23,
"learning_rate": 6.636414408498249e-06,
"loss": 0.0038,
"step": 56
},
{
"epoch": 17.54,
"learning_rate": 6.298618446600856e-06,
"loss": 0.011,
"step": 57
},
{
"epoch": 17.85,
"learning_rate": 5.965648380101916e-06,
"loss": 0.023,
"step": 58
},
{
"epoch": 18.15,
"learning_rate": 5.637938338444325e-06,
"loss": 0.0005,
"step": 59
},
{
"epoch": 18.46,
"learning_rate": 5.3159155930021e-06,
"loss": 0.0007,
"step": 60
},
{
"epoch": 18.77,
"learning_rate": 5.000000000000003e-06,
"loss": 0.0063,
"step": 61
},
{
"epoch": 19.08,
"learning_rate": 4.690603453101134e-06,
"loss": 0.0009,
"step": 62
},
{
"epoch": 19.38,
"learning_rate": 4.388129346376177e-06,
"loss": 0.0021,
"step": 63
},
{
"epoch": 19.69,
"learning_rate": 4.092972048354491e-06,
"loss": 0.02,
"step": 64
},
{
"epoch": 20.0,
"learning_rate": 3.8055163878427703e-06,
"loss": 0.001,
"step": 65
},
{
"epoch": 20.31,
"learning_rate": 3.5261371521817247e-06,
"loss": 0.0008,
"step": 66
},
{
"epoch": 20.62,
"learning_rate": 3.255198598594862e-06,
"loss": 0.0098,
"step": 67
},
{
"epoch": 20.92,
"learning_rate": 2.9930539792665767e-06,
"loss": 0.0014,
"step": 68
},
{
"epoch": 21.23,
"learning_rate": 2.740045080768694e-06,
"loss": 0.0015,
"step": 69
},
{
"epoch": 21.54,
"learning_rate": 2.496501778435977e-06,
"loss": 0.0006,
"step": 70
},
{
"epoch": 21.85,
"learning_rate": 2.2627416062716366e-06,
"loss": 0.0064,
"step": 71
},
{
"epoch": 22.15,
"learning_rate": 2.0390693429435626e-06,
"loss": 0.0007,
"step": 72
},
{
"epoch": 22.46,
"learning_rate": 1.8257766144110823e-06,
"loss": 0.0015,
"step": 73
},
{
"epoch": 22.77,
"learning_rate": 1.6231415137003536e-06,
"loss": 0.0089,
"step": 74
},
{
"epoch": 23.08,
"learning_rate": 1.4314282383241097e-06,
"loss": 0.0022,
"step": 75
},
{
"epoch": 23.38,
"learning_rate": 1.2508867458185037e-06,
"loss": 0.0076,
"step": 76
},
{
"epoch": 23.69,
"learning_rate": 1.0817524278461777e-06,
"loss": 0.0004,
"step": 77
},
{
"epoch": 24.0,
"learning_rate": 9.242458032904311e-07,
"loss": 0.0005,
"step": 78
},
{
"epoch": 24.31,
"learning_rate": 7.785722307406685e-07,
"loss": 0.0004,
"step": 79
},
{
"epoch": 24.62,
"learning_rate": 6.449216407439906e-07,
"loss": 0.0049,
"step": 80
},
{
"epoch": 24.92,
"learning_rate": 5.234682881719766e-07,
"loss": 0.004,
"step": 81
},
{
"epoch": 25.23,
"learning_rate": 4.1437052502558693e-07,
"loss": 0.0037,
"step": 82
},
{
"epoch": 25.54,
"learning_rate": 3.1777059397436693e-07,
"loss": 0.0099,
"step": 83
},
{
"epoch": 25.85,
"learning_rate": 2.3379444289913344e-07,
"loss": 0.001,
"step": 84
},
{
"epoch": 26.15,
"learning_rate": 1.6255156067997325e-07,
"loss": 0.0004,
"step": 85
},
{
"epoch": 26.46,
"learning_rate": 1.041348344436277e-07,
"loss": 0.0021,
"step": 86
},
{
"epoch": 26.77,
"learning_rate": 5.862042845640403e-08,
"loss": 0.0052,
"step": 87
},
{
"epoch": 27.08,
"learning_rate": 2.606768482050215e-08,
"loss": 0.0047,
"step": 88
},
{
"epoch": 27.38,
"learning_rate": 6.5190461032305085e-09,
"loss": 0.0004,
"step": 89
},
{
"epoch": 27.69,
"learning_rate": 0.0,
"loss": 0.0003,
"step": 90
},
{
"epoch": 27.69,
"step": 90,
"total_flos": 9963642317438976.0,
"train_loss": 0.7193527906240585,
"train_runtime": 2724.9841,
"train_samples_per_second": 1.134,
"train_steps_per_second": 0.033
}
],
"max_steps": 90,
"num_train_epochs": 30,
"total_flos": 9963642317438976.0,
"trial_name": null,
"trial_params": null
}