bloom_560_squad / trainer_state.json
avaassadi's picture
Upload 7 files
6a8d46b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.07289804249572203,
"global_step": 9500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00099,
"loss": 3.0632,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 0.0009901010101010103,
"loss": 2.9421,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.00098,
"loss": 2.9549,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.00096989898989899,
"loss": 2.9156,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.0009597979797979798,
"loss": 2.9743,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 0.0009496969696969697,
"loss": 2.9801,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 0.0009395959595959597,
"loss": 2.9492,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 0.0009294949494949495,
"loss": 3.043,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.0009193939393939395,
"loss": 2.9186,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.0009092929292929293,
"loss": 2.8856,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0008992929292929293,
"loss": 2.947,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.0008891919191919192,
"loss": 2.9986,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.0008790909090909091,
"loss": 2.9525,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.000868989898989899,
"loss": 2.9203,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.000858888888888889,
"loss": 2.9795,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.0008487878787878788,
"loss": 2.9549,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0008386868686868687,
"loss": 2.9033,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 0.0008285858585858585,
"loss": 2.9117,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.0008185858585858587,
"loss": 2.9674,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 0.0008085858585858587,
"loss": 2.9492,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 0.0007984848484848485,
"loss": 2.9526,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 0.0007883838383838384,
"loss": 2.9687,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 0.0007782828282828282,
"loss": 2.9783,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 0.0007681818181818182,
"loss": 2.9889,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 0.0007581818181818182,
"loss": 2.9141,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 0.0007480808080808081,
"loss": 2.9131,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.000737979797979798,
"loss": 2.9087,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 0.0007278787878787879,
"loss": 2.9485,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 0.0007177777777777778,
"loss": 2.8469,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 0.0007076767676767677,
"loss": 2.8715,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 0.0006975757575757576,
"loss": 2.9388,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 0.0006874747474747474,
"loss": 2.9958,
"step": 3200
},
{
"epoch": 0.03,
"learning_rate": 0.0006773737373737374,
"loss": 2.9552,
"step": 3300
},
{
"epoch": 0.03,
"learning_rate": 0.0006672727272727273,
"loss": 2.9126,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 0.0006571717171717172,
"loss": 3.0015,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 0.000647070707070707,
"loss": 2.9406,
"step": 3600
},
{
"epoch": 0.03,
"learning_rate": 0.0006369696969696969,
"loss": 2.8848,
"step": 3700
},
{
"epoch": 0.03,
"learning_rate": 0.000626868686868687,
"loss": 2.9598,
"step": 3800
},
{
"epoch": 0.03,
"learning_rate": 0.0006167676767676768,
"loss": 2.8795,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 0.0006066666666666667,
"loss": 2.9366,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 0.0005965656565656565,
"loss": 2.9357,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 0.0005864646464646464,
"loss": 2.8924,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 0.0005763636363636365,
"loss": 2.8939,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 0.0005662626262626263,
"loss": 2.8453,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 0.0005561616161616162,
"loss": 2.8551,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 0.000546060606060606,
"loss": 2.9476,
"step": 4600
},
{
"epoch": 0.04,
"learning_rate": 0.0005359595959595959,
"loss": 2.9256,
"step": 4700
},
{
"epoch": 0.04,
"learning_rate": 0.000525858585858586,
"loss": 2.963,
"step": 4800
},
{
"epoch": 0.04,
"learning_rate": 0.0005157575757575758,
"loss": 2.8954,
"step": 4900
},
{
"epoch": 0.04,
"learning_rate": 0.0005056565656565657,
"loss": 2.8978,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 0.0004955555555555556,
"loss": 2.8789,
"step": 5100
},
{
"epoch": 0.04,
"learning_rate": 0.0004854545454545455,
"loss": 2.8757,
"step": 5200
},
{
"epoch": 0.04,
"learning_rate": 0.00047545454545454545,
"loss": 2.9107,
"step": 5300
},
{
"epoch": 0.04,
"learning_rate": 0.0004653535353535354,
"loss": 2.8769,
"step": 5400
},
{
"epoch": 0.04,
"learning_rate": 0.00045525252525252524,
"loss": 2.8659,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 0.00044515151515151516,
"loss": 2.8624,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 0.0004350505050505051,
"loss": 2.8672,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 0.00042494949494949495,
"loss": 2.8286,
"step": 5800
},
{
"epoch": 0.05,
"learning_rate": 0.00041484848484848487,
"loss": 2.9685,
"step": 5900
},
{
"epoch": 0.05,
"learning_rate": 0.00040474747474747474,
"loss": 2.9659,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 0.00039464646464646466,
"loss": 2.8937,
"step": 6100
},
{
"epoch": 0.05,
"learning_rate": 0.0003845454545454545,
"loss": 2.8521,
"step": 6200
},
{
"epoch": 0.05,
"learning_rate": 0.0003744444444444445,
"loss": 2.8721,
"step": 6300
},
{
"epoch": 0.05,
"learning_rate": 0.00036434343434343437,
"loss": 2.8664,
"step": 6400
},
{
"epoch": 0.05,
"learning_rate": 0.00035424242424242423,
"loss": 2.8925,
"step": 6500
},
{
"epoch": 0.05,
"learning_rate": 0.00034414141414141415,
"loss": 2.8477,
"step": 6600
},
{
"epoch": 0.05,
"learning_rate": 0.000334040404040404,
"loss": 2.8804,
"step": 6700
},
{
"epoch": 0.05,
"learning_rate": 0.00032393939393939394,
"loss": 2.8661,
"step": 6800
},
{
"epoch": 0.05,
"learning_rate": 0.00031383838383838386,
"loss": 2.8934,
"step": 6900
},
{
"epoch": 0.05,
"learning_rate": 0.0003037373737373738,
"loss": 2.9107,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 0.00029363636363636365,
"loss": 2.8775,
"step": 7100
},
{
"epoch": 0.06,
"learning_rate": 0.0002835353535353535,
"loss": 2.8849,
"step": 7200
},
{
"epoch": 0.06,
"learning_rate": 0.00027343434343434343,
"loss": 2.845,
"step": 7300
},
{
"epoch": 0.06,
"learning_rate": 0.0002633333333333333,
"loss": 2.8441,
"step": 7400
},
{
"epoch": 0.06,
"learning_rate": 0.0002532323232323233,
"loss": 2.8455,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 0.00024313131313131314,
"loss": 2.8027,
"step": 7600
},
{
"epoch": 0.06,
"learning_rate": 0.00023303030303030304,
"loss": 2.8793,
"step": 7700
},
{
"epoch": 0.06,
"learning_rate": 0.00022292929292929293,
"loss": 2.8763,
"step": 7800
},
{
"epoch": 0.06,
"learning_rate": 0.00021282828282828285,
"loss": 2.8456,
"step": 7900
},
{
"epoch": 0.06,
"learning_rate": 0.00020272727272727274,
"loss": 2.8217,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 0.0001926262626262626,
"loss": 2.8026,
"step": 8100
},
{
"epoch": 0.06,
"learning_rate": 0.00018252525252525253,
"loss": 2.8486,
"step": 8200
},
{
"epoch": 0.06,
"learning_rate": 0.00017242424242424242,
"loss": 2.8023,
"step": 8300
},
{
"epoch": 0.06,
"learning_rate": 0.00016232323232323232,
"loss": 2.8711,
"step": 8400
},
{
"epoch": 0.07,
"learning_rate": 0.00015222222222222224,
"loss": 2.9234,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 0.00014212121212121213,
"loss": 2.8615,
"step": 8600
},
{
"epoch": 0.07,
"learning_rate": 0.00013202020202020203,
"loss": 2.8765,
"step": 8700
},
{
"epoch": 0.07,
"learning_rate": 0.00012191919191919192,
"loss": 2.891,
"step": 8800
},
{
"epoch": 0.07,
"learning_rate": 0.00011181818181818181,
"loss": 2.8735,
"step": 8900
},
{
"epoch": 0.07,
"learning_rate": 0.00010171717171717172,
"loss": 2.7965,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 9.161616161616161e-05,
"loss": 2.7891,
"step": 9100
},
{
"epoch": 0.07,
"learning_rate": 8.151515151515152e-05,
"loss": 2.8796,
"step": 9200
},
{
"epoch": 0.07,
"learning_rate": 7.141414141414141e-05,
"loss": 2.8947,
"step": 9300
},
{
"epoch": 0.07,
"learning_rate": 6.141414141414142e-05,
"loss": 2.8773,
"step": 9400
},
{
"epoch": 0.07,
"learning_rate": 5.131313131313131e-05,
"loss": 2.8241,
"step": 9500
}
],
"max_steps": 10000,
"num_train_epochs": 1,
"total_flos": 3247836551823360.0,
"trial_name": null,
"trial_params": null
}