roberta-base-squad / trainer_state.json
jimypbr's picture
End of training
f8d558b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 690,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.468208092485549e-07,
"loss": 6.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 6.936416184971098e-07,
"loss": 6.0195,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.0404624277456647e-06,
"loss": 6.0156,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.3872832369942195e-06,
"loss": 6.0352,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.7341040462427746e-06,
"loss": 6.0273,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 2.0809248554913294e-06,
"loss": 5.9805,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 2.4277456647398847e-06,
"loss": 6.0,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 2.774566473988439e-06,
"loss": 5.9375,
"step": 8
},
{
"epoch": 0.03,
"learning_rate": 3.1213872832369943e-06,
"loss": 5.9023,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 3.468208092485549e-06,
"loss": 5.9219,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 3.8150289017341036e-06,
"loss": 5.9531,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 4.161849710982659e-06,
"loss": 5.9766,
"step": 12
},
{
"epoch": 0.04,
"learning_rate": 4.508670520231214e-06,
"loss": 5.9141,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 4.855491329479769e-06,
"loss": 5.8828,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 5.202312138728324e-06,
"loss": 5.9219,
"step": 15
},
{
"epoch": 0.05,
"learning_rate": 5.549132947976878e-06,
"loss": 5.8633,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 5.895953757225434e-06,
"loss": 5.9062,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 6.242774566473989e-06,
"loss": 5.8672,
"step": 18
},
{
"epoch": 0.06,
"learning_rate": 6.589595375722544e-06,
"loss": 5.7695,
"step": 19
},
{
"epoch": 0.06,
"learning_rate": 6.936416184971098e-06,
"loss": 5.8008,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 7.283236994219653e-06,
"loss": 5.75,
"step": 21
},
{
"epoch": 0.06,
"learning_rate": 7.630057803468207e-06,
"loss": 5.6953,
"step": 22
},
{
"epoch": 0.07,
"learning_rate": 7.976878612716762e-06,
"loss": 5.6758,
"step": 23
},
{
"epoch": 0.07,
"learning_rate": 8.323699421965318e-06,
"loss": 5.6055,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 8.670520231213873e-06,
"loss": 5.5859,
"step": 25
},
{
"epoch": 0.08,
"learning_rate": 9.017341040462428e-06,
"loss": 5.4531,
"step": 26
},
{
"epoch": 0.08,
"learning_rate": 9.364161849710983e-06,
"loss": 5.5078,
"step": 27
},
{
"epoch": 0.08,
"learning_rate": 9.710982658959539e-06,
"loss": 5.3711,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 1.0057803468208092e-05,
"loss": 5.1953,
"step": 29
},
{
"epoch": 0.09,
"learning_rate": 1.0404624277456647e-05,
"loss": 5.2578,
"step": 30
},
{
"epoch": 0.09,
"learning_rate": 1.0751445086705203e-05,
"loss": 5.0273,
"step": 31
},
{
"epoch": 0.09,
"learning_rate": 1.1098265895953756e-05,
"loss": 4.9766,
"step": 32
},
{
"epoch": 0.1,
"learning_rate": 1.1445086705202312e-05,
"loss": 4.7852,
"step": 33
},
{
"epoch": 0.1,
"learning_rate": 1.1791907514450869e-05,
"loss": 4.8984,
"step": 34
},
{
"epoch": 0.1,
"learning_rate": 1.2138728323699422e-05,
"loss": 4.625,
"step": 35
},
{
"epoch": 0.1,
"learning_rate": 1.2485549132947977e-05,
"loss": 4.5156,
"step": 36
},
{
"epoch": 0.11,
"learning_rate": 1.2832369942196533e-05,
"loss": 4.5508,
"step": 37
},
{
"epoch": 0.11,
"learning_rate": 1.3179190751445088e-05,
"loss": 4.3281,
"step": 38
},
{
"epoch": 0.11,
"learning_rate": 1.3526011560693641e-05,
"loss": 4.5938,
"step": 39
},
{
"epoch": 0.12,
"learning_rate": 1.3872832369942197e-05,
"loss": 4.1172,
"step": 40
},
{
"epoch": 0.12,
"learning_rate": 1.4219653179190752e-05,
"loss": 4.2188,
"step": 41
},
{
"epoch": 0.12,
"learning_rate": 1.4566473988439305e-05,
"loss": 4.0156,
"step": 42
},
{
"epoch": 0.12,
"learning_rate": 1.491329479768786e-05,
"loss": 4.0234,
"step": 43
},
{
"epoch": 0.13,
"learning_rate": 1.5260115606936414e-05,
"loss": 3.9141,
"step": 44
},
{
"epoch": 0.13,
"learning_rate": 1.560693641618497e-05,
"loss": 3.6562,
"step": 45
},
{
"epoch": 0.13,
"learning_rate": 1.5953757225433525e-05,
"loss": 3.6133,
"step": 46
},
{
"epoch": 0.14,
"learning_rate": 1.630057803468208e-05,
"loss": 3.1582,
"step": 47
},
{
"epoch": 0.14,
"learning_rate": 1.6647398843930635e-05,
"loss": 3.248,
"step": 48
},
{
"epoch": 0.14,
"learning_rate": 1.6994219653179194e-05,
"loss": 3.0996,
"step": 49
},
{
"epoch": 0.14,
"learning_rate": 1.7341040462427746e-05,
"loss": 3.1875,
"step": 50
},
{
"epoch": 0.15,
"learning_rate": 1.76878612716763e-05,
"loss": 2.9941,
"step": 51
},
{
"epoch": 0.15,
"learning_rate": 1.8034682080924856e-05,
"loss": 2.9297,
"step": 52
},
{
"epoch": 0.15,
"learning_rate": 1.838150289017341e-05,
"loss": 3.1094,
"step": 53
},
{
"epoch": 0.16,
"learning_rate": 1.8728323699421967e-05,
"loss": 2.7812,
"step": 54
},
{
"epoch": 0.16,
"learning_rate": 1.9075144508670522e-05,
"loss": 1.9785,
"step": 55
},
{
"epoch": 0.16,
"learning_rate": 1.9421965317919077e-05,
"loss": 3.4531,
"step": 56
},
{
"epoch": 0.17,
"learning_rate": 1.976878612716763e-05,
"loss": 2.1875,
"step": 57
},
{
"epoch": 0.17,
"learning_rate": 2.0115606936416184e-05,
"loss": 1.9697,
"step": 58
},
{
"epoch": 0.17,
"learning_rate": 2.046242774566474e-05,
"loss": 2.1777,
"step": 59
},
{
"epoch": 0.17,
"learning_rate": 2.0809248554913295e-05,
"loss": 1.8594,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 2.115606936416185e-05,
"loss": 2.2441,
"step": 61
},
{
"epoch": 0.18,
"learning_rate": 2.1502890173410405e-05,
"loss": 2.3242,
"step": 62
},
{
"epoch": 0.18,
"learning_rate": 2.184971098265896e-05,
"loss": 1.9541,
"step": 63
},
{
"epoch": 0.19,
"learning_rate": 2.2196531791907513e-05,
"loss": 1.7412,
"step": 64
},
{
"epoch": 0.19,
"learning_rate": 2.2543352601156068e-05,
"loss": 2.2969,
"step": 65
},
{
"epoch": 0.19,
"learning_rate": 2.2890173410404623e-05,
"loss": 1.3818,
"step": 66
},
{
"epoch": 0.19,
"learning_rate": 2.323699421965318e-05,
"loss": 1.9668,
"step": 67
},
{
"epoch": 0.2,
"learning_rate": 2.3583815028901737e-05,
"loss": 2.4727,
"step": 68
},
{
"epoch": 0.2,
"learning_rate": 2.3930635838150292e-05,
"loss": 2.0645,
"step": 69
},
{
"epoch": 0.2,
"learning_rate": 2.4277456647398844e-05,
"loss": 2.0312,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 2.46242774566474e-05,
"loss": 2.0547,
"step": 71
},
{
"epoch": 0.21,
"learning_rate": 2.4971098265895955e-05,
"loss": 1.7764,
"step": 72
},
{
"epoch": 0.21,
"learning_rate": 2.531791907514451e-05,
"loss": 1.6074,
"step": 73
},
{
"epoch": 0.21,
"learning_rate": 2.5664739884393065e-05,
"loss": 1.2148,
"step": 74
},
{
"epoch": 0.22,
"learning_rate": 2.601156069364162e-05,
"loss": 1.3525,
"step": 75
},
{
"epoch": 0.22,
"learning_rate": 2.6358381502890176e-05,
"loss": 1.6963,
"step": 76
},
{
"epoch": 0.22,
"learning_rate": 2.6705202312138728e-05,
"loss": 1.4746,
"step": 77
},
{
"epoch": 0.23,
"learning_rate": 2.7052023121387283e-05,
"loss": 1.8105,
"step": 78
},
{
"epoch": 0.23,
"learning_rate": 2.7398843930635838e-05,
"loss": 1.5195,
"step": 79
},
{
"epoch": 0.23,
"learning_rate": 2.7745664739884393e-05,
"loss": 1.1855,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 2.809248554913295e-05,
"loss": 1.3115,
"step": 81
},
{
"epoch": 0.24,
"learning_rate": 2.8439306358381504e-05,
"loss": 1.6318,
"step": 82
},
{
"epoch": 0.24,
"learning_rate": 2.878612716763006e-05,
"loss": 2.2656,
"step": 83
},
{
"epoch": 0.24,
"learning_rate": 2.913294797687861e-05,
"loss": 1.8789,
"step": 84
},
{
"epoch": 0.25,
"learning_rate": 2.9479768786127166e-05,
"loss": 1.3555,
"step": 85
},
{
"epoch": 0.25,
"learning_rate": 2.982658959537572e-05,
"loss": 1.1211,
"step": 86
},