liuhaotian's picture
Upload model checkpoints
430340d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 625,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0526315789473684e-05,
"loss": 1.6094,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 2.105263157894737e-05,
"loss": 1.6328,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 3.157894736842105e-05,
"loss": 1.6055,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 4.210526315789474e-05,
"loss": 1.6562,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 5.2631578947368424e-05,
"loss": 1.5625,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 6.31578947368421e-05,
"loss": 1.4961,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 7.368421052631579e-05,
"loss": 1.457,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 8.421052631578948e-05,
"loss": 1.3867,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 9.473684210526316e-05,
"loss": 1.3438,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 0.00010526315789473685,
"loss": 1.3203,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 0.00011578947368421053,
"loss": 1.2617,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 0.0001263157894736842,
"loss": 1.2266,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 0.0001368421052631579,
"loss": 1.2227,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 0.00014736842105263158,
"loss": 1.2109,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 0.00015789473684210527,
"loss": 1.1953,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 0.00016842105263157895,
"loss": 1.2148,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 0.00017894736842105264,
"loss": 1.1914,
"step": 17
},
{
"epoch": 0.03,
"learning_rate": 0.00018947368421052632,
"loss": 1.1914,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.1875,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 0.00019999865623437013,
"loss": 1.207,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 0.00019999462497359466,
"loss": 1.1758,
"step": 21
},
{
"epoch": 0.04,
"learning_rate": 0.00019998790632601496,
"loss": 1.1758,
"step": 22
},
{
"epoch": 0.04,
"learning_rate": 0.0001999785004721968,
"loss": 1.1758,
"step": 23
},
{
"epoch": 0.04,
"learning_rate": 0.00019996640766492543,
"loss": 1.168,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 0.00019995162822919883,
"loss": 1.1797,
"step": 25
},
{
"epoch": 0.04,
"learning_rate": 0.00019993416256221895,
"loss": 1.1445,
"step": 26
},
{
"epoch": 0.04,
"learning_rate": 0.00019991401113338104,
"loss": 1.1367,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 0.00019989117448426108,
"loss": 1.1367,
"step": 28
},
{
"epoch": 0.05,
"learning_rate": 0.00019986565322860115,
"loss": 1.1484,
"step": 29
},
{
"epoch": 0.05,
"learning_rate": 0.00019983744805229296,
"loss": 1.1406,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 0.00019980655971335945,
"loss": 1.1367,
"step": 31
},
{
"epoch": 0.05,
"learning_rate": 0.00019977298904193437,
"loss": 1.1523,
"step": 32
},
{
"epoch": 0.05,
"learning_rate": 0.00019973673694024,
"loss": 1.1328,
"step": 33
},
{
"epoch": 0.05,
"learning_rate": 0.00019969780438256293,
"loss": 1.1367,
"step": 34
},
{
"epoch": 0.06,
"learning_rate": 0.0001996561924152278,
"loss": 1.1445,
"step": 35
},
{
"epoch": 0.06,
"learning_rate": 0.0001996119021565693,
"loss": 1.1133,
"step": 36
},
{
"epoch": 0.06,
"learning_rate": 0.0001995649347969019,
"loss": 1.1367,
"step": 37
},
{
"epoch": 0.06,
"learning_rate": 0.00019951529159848805,
"loss": 1.1367,
"step": 38
},
{
"epoch": 0.06,
"learning_rate": 0.00019946297389550433,
"loss": 1.1211,
"step": 39
},
{
"epoch": 0.06,
"learning_rate": 0.00019940798309400526,
"loss": 1.1094,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 0.0001993503206718859,
"loss": 1.1445,
"step": 41
},
{
"epoch": 0.07,
"learning_rate": 0.00019928998817884182,
"loss": 1.1289,
"step": 42
},
{
"epoch": 0.07,
"learning_rate": 0.00019922698723632767,
"loss": 1.1289,
"step": 43
},
{
"epoch": 0.07,
"learning_rate": 0.00019916131953751342,
"loss": 1.1328,
"step": 44
},
{
"epoch": 0.07,
"learning_rate": 0.00019909298684723904,
"loss": 1.1289,
"step": 45
},
{
"epoch": 0.07,
"learning_rate": 0.00019902199100196697,
"loss": 1.1094,
"step": 46
},
{
"epoch": 0.08,
"learning_rate": 0.00019894833390973266,
"loss": 1.1094,
"step": 47
},
{
"epoch": 0.08,
"learning_rate": 0.00019887201755009357,
"loss": 1.1016,
"step": 48
},
{
"epoch": 0.08,
"learning_rate": 0.0001987930439740757,
"loss": 1.1172,
"step": 49
},
{
"epoch": 0.08,
"learning_rate": 0.00019871141530411853,
"loss": 1.1094,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 0.0001986271337340182,
"loss": 1.0781,
"step": 51
},
{
"epoch": 0.08,
"learning_rate": 0.00019854020152886814,
"loss": 1.082,
"step": 52
},
{
"epoch": 0.08,
"learning_rate": 0.0001984506210249986,
"loss": 1.1172,
"step": 53
},
{
"epoch": 0.09,
"learning_rate": 0.00019835839462991361,
"loss": 1.0898,
"step": 54
},
{
"epoch": 0.09,
"learning_rate": 0.00019826352482222638,
"loss": 1.0898,
"step": 55
},
{
"epoch": 0.09,
"learning_rate": 0.00019816601415159263,
"loss": 1.1172,
"step": 56
},
{
"epoch": 0.09,
"learning_rate": 0.0001980658652386421,
"loss": 1.1445,
"step": 57
},
{
"epoch": 0.09,
"learning_rate": 0.00019796308077490817,
"loss": 1.1211,
"step": 58
},
{
"epoch": 0.09,
"learning_rate": 0.00019785766352275542,
"loss": 1.0703,
"step": 59
},
{
"epoch": 0.1,
"learning_rate": 0.00019774961631530545,
"loss": 1.1016,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 0.00019763894205636072,
"loss": 1.0898,
"step": 61
},
{
"epoch": 0.1,
"learning_rate": 0.00019752564372032657,
"loss": 1.0742,
"step": 62
},
{
"epoch": 0.1,
"learning_rate": 0.00019740972435213115,
"loss": 1.0898,
"step": 63
},
{
"epoch": 0.1,
"learning_rate": 0.00019729118706714375,
"loss": 1.1172,
"step": 64
},
{
"epoch": 0.1,
"learning_rate": 0.00019717003505109095,
"loss": 1.1094,
"step": 65
},
{
"epoch": 0.11,
"learning_rate": 0.00019704627155997108,
"loss": 1.0742,
"step": 66
},
{
"epoch": 0.11,
"learning_rate": 0.00019691989991996663,
"loss": 1.082,
"step": 67
},
{
"epoch": 0.11,
"learning_rate": 0.0001967909235273549,
"loss": 1.0977,
"step": 68
},
{
"epoch": 0.11,
"learning_rate": 0.00019665934584841682,
"loss": 1.0469,
"step": 69
},
{
"epoch": 0.11,
"learning_rate": 0.00019652517041934356,
"loss": 1.0625,
"step": 70
},
{
"epoch": 0.11,
"learning_rate": 0.00019638840084614182,
"loss": 1.0703,
"step": 71
},
{
"epoch": 0.12,
"learning_rate": 0.00019624904080453655,
"loss": 1.0742,
"step": 72
},
{
"epoch": 0.12,
"learning_rate": 0.00019610709403987246,
"loss": 1.1133,
"step": 73
},
{
"epoch": 0.12,
"learning_rate": 0.00019596256436701324,
"loss": 1.1016,
"step": 74
},
{
"epoch": 0.12,
"learning_rate": 0.000195815455670239,
"loss": 1.0664,
"step": 75
},
{
"epoch": 0.12,
"learning_rate": 0.00019566577190314197,
"loss": 1.0781,
"step": 76
},
{
"epoch": 0.12,
"learning_rate": 0.0001955135170885202,
"loss": 1.0625,
"step": 77
},
{
"epoch": 0.12,
"learning_rate": 0.00019535869531826937,
"loss": 1.0742,
"step": 78
},
{
"epoch": 0.13,
"learning_rate": 0.00019520131075327298,
"loss": 1.0742,
"step": 79
},
{
"epoch": 0.13,
"learning_rate": 0.00019504136762329047,
"loss": 1.0859,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 0.00019487887022684336,
"loss": 1.0938,
"step": 81
},
{
"epoch": 0.13,
"learning_rate": 0.00019471382293110003,
"loss": 1.0625,
"step": 82
},
{
"epoch": 0.13,
"learning_rate": 0.00019454623017175812,
"loss": 1.0977,
"step": 83
},
{
"epoch": 0.13,
"learning_rate": 0.00019437609645292546,
"loss": 1.0547,
"step": 84
},
{
"epoch": 0.14,
"learning_rate": 0.0001942034263469989,
"loss": 1.0781,
"step": 85
},
{
"epoch": 0.14,
"learning_rate": 0.00019402822449454153,
"loss": 1.0938,
"step": 86
},
{
"epoch": 0.14,
"learning_rate": 0.00019385049560415794,
"loss": 1.0742,
"step": 87
},
{
"epoch": 0.14,
"learning_rate": 0.00019367024445236754,
"loss": 1.043,
"step": 88
},
{
"epoch": 0.14,
"learning_rate": 0.00019348747588347637,
"loss": 1.0742,
"step": 89
},
{
"epoch": 0.14,
"learning_rate": 0.00019330219480944694,
"loss": 1.0625,
"step": 90
},
{
"epoch": 0.15,
"learning_rate": 0.00019311440620976597,
"loss": 1.1055,
"step": 91
},
{
"epoch": 0.15,
"learning_rate": 0.0001929241151313108,
"loss": 1.125,
"step": 92
},
{
"epoch": 0.15,
"learning_rate": 0.00019273132668821364,
"loss": 1.0938,
"step": 93
},
{
"epoch": 0.15,
"learning_rate": 0.00019253604606172417,
"loss": 1.0859,
"step": 94
},
{
"epoch": 0.15,
"learning_rate": 0.00019233827850007027,
"loss": 1.0508,
"step": 95
},
{
"epoch": 0.15,
"learning_rate": 0.00019213802931831696,
"loss": 1.082,
"step": 96
},
{
"epoch": 0.16,
"learning_rate": 0.00019193530389822363,
"loss": 1.0938,
"step": 97
},
{
"epoch": 0.16,
"learning_rate": 0.00019173010768809933,
"loss": 1.0781,
"step": 98
},
{
"epoch": 0.16,
"learning_rate": 0.0001915224462026563,
"loss": 1.043,
"step": 99
},
{
"epoch": 0.16,
"learning_rate": 0.00019131232502286188,
"loss": 1.0938,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 0.0001910997497957885,
"loss": 1.0742,
"step": 101
},
{
"epoch": 0.16,
"learning_rate": 0.00019088472623446183,
"loss": 1.0781,
"step": 102
},
{
"epoch": 0.16,
"learning_rate": 0.00019066726011770726,
"loss": 1.0742,
"step": 103
},
{
"epoch": 0.17,
"learning_rate": 0.0001904473572899947,
"loss": 1.0508,
"step": 104
},
{
"epoch": 0.17,
"learning_rate": 0.00019022502366128135,
"loss": 1.0664,
"step": 105
},
{
"epoch": 0.17,
"learning_rate": 0.00019000026520685302,
"loss": 1.0508,
"step": 106
},
{
"epoch": 0.17,
"learning_rate": 0.0001897730879671634,
"loss": 1.0586,
"step": 107
},
{
"epoch": 0.17,
"learning_rate": 0.00018954349804767184,
"loss": 1.0703,
"step": 108
},
{
"epoch": 0.17,
"learning_rate": 0.00018931150161867916,
"loss": 1.0625,
"step": 109
},
{
"epoch": 0.18,
"learning_rate": 0.00018907710491516199,
"loss": 1.0664,
"step": 110
},
{
"epoch": 0.18,
"learning_rate": 0.0001888403142366049,
"loss": 1.0547,
"step": 111
},
{
"epoch": 0.18,
"learning_rate": 0.00018860113594683148,
"loss": 1.0703,
"step": 112
},
{
"epoch": 0.18,
"learning_rate": 0.00018835957647383303,
"loss": 1.043,
"step": 113
},
{
"epoch": 0.18,
"learning_rate": 0.00018811564230959588,
"loss": 1.0547,
"step": 114
},
{
"epoch": 0.18,
"learning_rate": 0.00018786934000992688,
"loss": 1.082,
"step": 115
},
{
"epoch": 0.19,
"learning_rate": 0.00018762067619427746,
"loss": 1.0664,
"step": 116
},
{
"epoch": 0.19,
"learning_rate": 0.00018736965754556528,
"loss": 1.0625,
"step": 117
},
{
"epoch": 0.19,
"learning_rate": 0.00018711629080999504,
"loss": 1.0547,
"step": 118
},
{
"epoch": 0.19,
"learning_rate": 0.00018686058279687698,
"loss": 1.0586,
"step": 119
},
{
"epoch": 0.19,
"learning_rate": 0.00018660254037844388,
"loss": 1.043,
"step": 120
},
{
"epoch": 0.19,
"learning_rate": 0.00018634217048966637,
"loss": 1.0859,
"step": 121
},
{
"epoch": 0.2,
"learning_rate": 0.0001860794801280666,
"loss": 1.0781,
"step": 122
},
{
"epoch": 0.2,
"learning_rate": 0.0001858144763535302,
"loss": 1.0664,
"step": 123
},
{
"epoch": 0.2,
"learning_rate": 0.0001855471662881164,
"loss": 1.0625,
"step": 124
},
{
"epoch": 0.2,
"learning_rate": 0.00018527755711586678,
"loss": 1.0391,
"step": 125
},
{
"epoch": 0.2,
"learning_rate": 0.00018500565608261214,
"loss": 1.0938,
"step": 126
},
{
"epoch": 0.2,
"learning_rate": 0.00018473147049577774,
"loss": 1.0586,
"step": 127
},
{
"epoch": 0.2,
"learning_rate": 0.00018445500772418697,
"loss": 1.0547,
"step": 128
},
{
"epoch": 0.21,
"learning_rate": 0.00018417627519786315,
"loss": 1.0547,
"step": 129
},
{
"epoch": 0.21,
"learning_rate": 0.00018389528040783012,
"loss": 1.082,
"step": 130
},
{
"epoch": 0.21,
"learning_rate": 0.00018361203090591071,
"loss": 1.0977,
"step": 131
},
{
"epoch": 0.21,
"learning_rate": 0.00018332653430452376,
"loss": 1.0781,
"step": 132
},
{
"epoch": 0.21,
"learning_rate": 0.00018303879827647975,
"loss": 1.0742,
"step": 133
},
{
"epoch": 0.21,
"learning_rate": 0.00018274883055477436,
"loss": 1.0625,
"step": 134
},
{
"epoch": 0.22,
"learning_rate": 0.00018245663893238075,
"loss": 1.0625,
"step": 135
},
{
"epoch": 0.22,
"learning_rate": 0.00018216223126204007,
"loss": 1.0469,
"step": 136
},
{
"epoch": 0.22,
"learning_rate": 0.00018186561545605054,
"loss": 1.0781,
"step": 137
},
{
"epoch": 0.22,
"learning_rate": 0.00018156679948605467,
"loss": 1.0898,
"step": 138
},
{
"epoch": 0.22,
"learning_rate": 0.00018126579138282503,
"loss": 1.043,
"step": 139
},
{
"epoch": 0.22,
"learning_rate": 0.0001809625992360485,
"loss": 1.1016,
"step": 140
},
{
"epoch": 0.23,
"learning_rate": 0.00018065723119410884,
"loss": 1.0547,
"step": 141
},
{
"epoch": 0.23,
"learning_rate": 0.00018034969546386757,
"loss": 1.0273,
"step": 142
},
{
"epoch": 0.23,
"learning_rate": 0.0001800400003104436,
"loss": 1.0703,
"step": 143
},
{
"epoch": 0.23,
"learning_rate": 0.00017972815405699103,
"loss": 1.0859,
"step": 144
},
{
"epoch": 0.23,
"learning_rate": 0.00017941416508447536,
"loss": 1.0664,
"step": 145
},
{
"epoch": 0.23,
"learning_rate": 0.0001790980418314484,
"loss": 1.0781,
"step": 146
},
{
"epoch": 0.24,
"learning_rate": 0.00017877979279382135,
"loss": 1.0703,
"step": 147
},
{
"epoch": 0.24,
"learning_rate": 0.0001784594265246366,
"loss": 1.043,
"step": 148
},
{
"epoch": 0.24,
"learning_rate": 0.0001781369516338378,
"loss": 1.0352,
"step": 149
},
{
"epoch": 0.24,
"learning_rate": 0.00017781237678803847,
"loss": 1.0547,
"step": 150
},
{
"epoch": 0.24,
"learning_rate": 0.000177485710710289,
"loss": 1.0703,
"step": 151
},
{
"epoch": 0.24,
"learning_rate": 0.00017715696217984235,
"loss": 1.0391,
"step": 152
},
{
"epoch": 0.24,
"learning_rate": 0.00017682614003191807,
"loss": 1.0547,
"step": 153
},
{
"epoch": 0.25,
"learning_rate": 0.00017649325315746478,
"loss": 1.0664,
"step": 154
},
{
"epoch": 0.25,
"learning_rate": 0.0001761583105029213,
"loss": 1.0586,
"step": 155
},
{
"epoch": 0.25,
"learning_rate": 0.00017582132106997616,
"loss": 1.0547,
"step": 156
},
{
"epoch": 0.25,
"learning_rate": 0.00017548229391532572,
"loss": 1.0391,
"step": 157
},
{
"epoch": 0.25,
"learning_rate": 0.00017514123815043074,
"loss": 1.043,
"step": 158
},
{
"epoch": 0.25,
"learning_rate": 0.00017479816294127152,
"loss": 1.0312,
"step": 159
},
{
"epoch": 0.26,
"learning_rate": 0.0001744530775081015,
"loss": 1.0508,
"step": 160
},
{
"epoch": 0.26,
"learning_rate": 0.0001741059911251997,
"loss": 1.0508,
"step": 161
},
{
"epoch": 0.26,
"learning_rate": 0.000173756913120621,
"loss": 1.0352,
"step": 162
},
{
"epoch": 0.26,
"learning_rate": 0.00017340585287594604,
"loss": 1.0664,
"step": 163
},
{
"epoch": 0.26,
"learning_rate": 0.0001730528198260285,
"loss": 1.0469,
"step": 164
},
{
"epoch": 0.26,
"learning_rate": 0.00017269782345874203,
"loss": 1.0273,
"step": 165
},
{
"epoch": 0.27,
"learning_rate": 0.00017234087331472497,
"loss": 1.0195,
"step": 166
},
{
"epoch": 0.27,
"learning_rate": 0.00017198197898712404,
"loss": 1.0586,
"step": 167
},
{
"epoch": 0.27,
"learning_rate": 0.00017162115012133643,
"loss": 1.0703,
"step": 168
},
{
"epoch": 0.27,
"learning_rate": 0.00017125839641475072,
"loss": 1.0391,
"step": 169
},
{
"epoch": 0.27,
"learning_rate": 0.00017089372761648616,
"loss": 1.0391,
"step": 170
},
{
"epoch": 0.27,
"learning_rate": 0.00017052715352713075,
"loss": 1.0195,
"step": 171
},
{
"epoch": 0.28,
"learning_rate": 0.00017015868399847768,
"loss": 1.0547,
"step": 172
},
{
"epoch": 0.28,
"learning_rate": 0.00016978832893326074,
"loss": 1.0664,
"step": 173
},
{
"epoch": 0.28,
"learning_rate": 0.00016941609828488807,
"loss": 1.043,
"step": 174
},
{
"epoch": 0.28,
"learning_rate": 0.0001690420020571747,
"loss": 1.043,
"step": 175
},
{
"epoch": 0.28,
"learning_rate": 0.0001686660503040737,
"loss": 1.0547,
"step": 176
},
{
"epoch": 0.28,
"learning_rate": 0.00016828825312940592,
"loss": 1.0625,
"step": 177
},
{
"epoch": 0.28,
"learning_rate": 0.0001679086206865886,
"loss": 1.0586,
"step": 178
},
{
"epoch": 0.29,
"learning_rate": 0.00016752716317836229,
"loss": 1.0586,
"step": 179
},
{
"epoch": 0.29,
"learning_rate": 0.0001671438908565167,
"loss": 1.0391,
"step": 180
},
{
"epoch": 0.29,
"learning_rate": 0.00016675881402161536,
"loss": 1.0508,
"step": 181
},
{
"epoch": 0.29,
"learning_rate": 0.0001663719430227186,
"loss": 1.0352,
"step": 182
},
{
"epoch": 0.29,
"learning_rate": 0.00016598328825710533,
"loss": 1.0234,
"step": 183
},
{
"epoch": 0.29,
"learning_rate": 0.000165592860169994,
"loss": 1.0781,
"step": 184
},
{
"epoch": 0.3,
"learning_rate": 0.00016520066925426144,
"loss": 1.0547,
"step": 185
},
{
"epoch": 0.3,
"learning_rate": 0.0001648067260501611,
"loss": 1.0273,
"step": 186
},
{
"epoch": 0.3,
"learning_rate": 0.0001644110411450398,
"loss": 1.0273,
"step": 187
},
{
"epoch": 0.3,
"learning_rate": 0.00016401362517305296,
"loss": 1.043,
"step": 188
},
{
"epoch": 0.3,
"learning_rate": 0.00016361448881487914,
"loss": 1.0312,
"step": 189
},
{
"epoch": 0.3,
"learning_rate": 0.00016321364279743266,
"loss": 1.0137,
"step": 190
},
{
"epoch": 0.31,
"learning_rate": 0.0001628110978935756,
"loss": 1.0664,
"step": 191
},
{
"epoch": 0.31,
"learning_rate": 0.00016240686492182804,
"loss": 1.0469,
"step": 192
},
{
"epoch": 0.31,
"learning_rate": 0.00016200095474607753,
"loss": 1.0469,
"step": 193
},
{
"epoch": 0.31,
"learning_rate": 0.00016159337827528685,
"loss": 1.0371,
"step": 194
},
{
"epoch": 0.31,
"learning_rate": 0.0001611841464632011,
"loss": 1.043,
"step": 195
},
{
"epoch": 0.31,
"learning_rate": 0.0001607732703080532,
"loss": 1.0156,
"step": 196
},
{
"epoch": 0.32,
"learning_rate": 0.00016036076085226814,
"loss": 1.0508,
"step": 197
},
{
"epoch": 0.32,
"learning_rate": 0.0001599466291821666,
"loss": 1.0391,
"step": 198
},
{
"epoch": 0.32,
"learning_rate": 0.0001595308864276666,
"loss": 1.0312,
"step": 199
},
{
"epoch": 0.32,
"learning_rate": 0.0001591135437619847,
"loss": 1.0352,
"step": 200
},
{
"epoch": 0.32,
"learning_rate": 0.0001586946124013354,
"loss": 1.0664,
"step": 201
},
{
"epoch": 0.32,
"learning_rate": 0.0001582741036046301,
"loss": 1.0352,
"step": 202
},
{
"epoch": 0.32,
"learning_rate": 0.00015785202867317407,
"loss": 1.0586,
"step": 203
},
{
"epoch": 0.33,
"learning_rate": 0.00015742839895036305,
"loss": 1.0352,
"step": 204
},
{
"epoch": 0.33,
"learning_rate": 0.00015700322582137827,
"loss": 1.0195,
"step": 205
},
{
"epoch": 0.33,
"learning_rate": 0.0001565765207128805,
"loss": 1.0508,
"step": 206
},
{
"epoch": 0.33,
"learning_rate": 0.0001561482950927029,
"loss": 1.0508,
"step": 207
},
{
"epoch": 0.33,
"learning_rate": 0.00015571856046954285,
"loss": 1.0469,
"step": 208
},
{
"epoch": 0.33,
"learning_rate": 0.00015528732839265272,
"loss": 1.0547,
"step": 209
},
{
"epoch": 0.34,
"learning_rate": 0.0001548546104515294,
"loss": 1.0352,
"step": 210
},
{
"epoch": 0.34,
"learning_rate": 0.00015442041827560274,
"loss": 1.0312,
"step": 211
},
{
"epoch": 0.34,
"learning_rate": 0.00015398476353392323,
"loss": 1.0469,
"step": 212
},
{
"epoch": 0.34,
"learning_rate": 0.00015354765793484834,
"loss": 1.0547,
"step": 213
},
{
"epoch": 0.34,
"learning_rate": 0.00015310911322572753,
"loss": 1.0508,
"step": 214
},
{
"epoch": 0.34,
"learning_rate": 0.000152669141192587,
"loss": 1.0703,
"step": 215
},
{
"epoch": 0.35,
"learning_rate": 0.00015222775365981273,
"loss": 1.0273,
"step": 216
},
{
"epoch": 0.35,
"learning_rate": 0.00015178496248983254,
"loss": 1.0508,
"step": 217
},
{
"epoch": 0.35,
"learning_rate": 0.00015134077958279765,
"loss": 1.043,
"step": 218
},
{
"epoch": 0.35,
"learning_rate": 0.00015089521687626243,
"loss": 1.0625,
"step": 219
},
{
"epoch": 0.35,
"learning_rate": 0.000150448286344864,
"loss": 1.0664,
"step": 220
},
{
"epoch": 0.35,
"learning_rate": 0.00015000000000000001,
"loss": 1.0508,
"step": 221
},
{
"epoch": 0.36,
"learning_rate": 0.00014955036988950618,
"loss": 1.0547,
"step": 222
},
{
"epoch": 0.36,
"learning_rate": 0.00014909940809733222,
"loss": 1.0586,
"step": 223
},
{
"epoch": 0.36,
"learning_rate": 0.00014864712674321734,
"loss": 1.0234,
"step": 224
},
{
"epoch": 0.36,
"learning_rate": 0.00014819353798236427,
"loss": 1.0312,
"step": 225
},
{
"epoch": 0.36,
"learning_rate": 0.00014773865400511272,
"loss": 1.0391,
"step": 226
},
{
"epoch": 0.36,
"learning_rate": 0.00014728248703661182,
"loss": 1.0273,
"step": 227
},
{
"epoch": 0.36,
"learning_rate": 0.00014682504933649144,
"loss": 1.0703,
"step": 228
},
{
"epoch": 0.37,
"learning_rate": 0.00014636635319853275,
"loss": 1.043,
"step": 229
},
{
"epoch": 0.37,
"learning_rate": 0.00014590641095033787,
"loss": 1.0234,
"step": 230
},
{
"epoch": 0.37,
"learning_rate": 0.00014544523495299842,
"loss": 1.0234,
"step": 231
},
{
"epoch": 0.37,
"learning_rate": 0.0001449828376007636,
"loss": 1.0273,
"step": 232
},
{
"epoch": 0.37,
"learning_rate": 0.0001445192313207067,
"loss": 1.0234,
"step": 233
},
{
"epoch": 0.37,
"learning_rate": 0.0001440544285723915,
"loss": 1.043,
"step": 234
},
{
"epoch": 0.38,
"learning_rate": 0.00014358844184753712,
"loss": 1.0352,
"step": 235
},
{
"epoch": 0.38,
"learning_rate": 0.00014312128366968243,
"loss": 1.0625,
"step": 236
},
{
"epoch": 0.38,
"learning_rate": 0.00014265296659384956,
"loss": 1.043,
"step": 237
},
{
"epoch": 0.38,
"learning_rate": 0.00014218350320620624,
"loss": 1.0664,
"step": 238
},
{
"epoch": 0.38,
"learning_rate": 0.0001417129061237278,
"loss": 1.0195,
"step": 239
},
{
"epoch": 0.38,
"learning_rate": 0.00014124118799385796,
"loss": 1.0508,
"step": 240
},
{
"epoch": 0.39,
"learning_rate": 0.00014076836149416887,
"loss": 1.043,
"step": 241
},
{
"epoch": 0.39,
"learning_rate": 0.0001402944393320206,
"loss": 1.0234,
"step": 242
},
{
"epoch": 0.39,
"learning_rate": 0.00013981943424421932,
"loss": 1.0391,
"step": 243
},
{
"epoch": 0.39,
"learning_rate": 0.00013934335899667527,
"loss": 1.0352,
"step": 244
},
{
"epoch": 0.39,
"learning_rate": 0.00013886622638405952,
"loss": 1.0352,
"step": 245
},
{
"epoch": 0.39,
"learning_rate": 0.00013838804922946027,
"loss": 1.043,
"step": 246
},
{
"epoch": 0.4,
"learning_rate": 0.00013790884038403795,
"loss": 1.0195,
"step": 247
},
{
"epoch": 0.4,
"learning_rate": 0.00013742861272668012,
"loss": 1.0391,
"step": 248
},
{
"epoch": 0.4,
"learning_rate": 0.00013694737916365517,
"loss": 1.0391,
"step": 249
},
{
"epoch": 0.4,
"learning_rate": 0.00013646515262826552,
"loss": 1.0547,
"step": 250
},
{
"epoch": 0.4,
"learning_rate": 0.0001359819460805001,
"loss": 1.0352,
"step": 251
},
{
"epoch": 0.4,
"learning_rate": 0.0001354977725066859,
"loss": 1.0703,
"step": 252
},
{
"epoch": 0.4,
"learning_rate": 0.00013501264491913906,
"loss": 1.0469,
"step": 253
},
{
"epoch": 0.41,
"learning_rate": 0.0001345265763558152,
"loss": 1.0391,
"step": 254
},
{
"epoch": 0.41,
"learning_rate": 0.00013403957987995882,
"loss": 1.0469,
"step": 255
},
{
"epoch": 0.41,
"learning_rate": 0.0001335516685797525,
"loss": 1.0312,
"step": 256
},
{
"epoch": 0.41,
"learning_rate": 0.00013306285556796495,
"loss": 1.0391,
"step": 257
},
{
"epoch": 0.41,
"learning_rate": 0.00013257315398159864,
"loss": 1.002,
"step": 258
},
{
"epoch": 0.41,
"learning_rate": 0.00013208257698153677,
"loss": 1.0508,
"step": 259
},
{
"epoch": 0.42,
"learning_rate": 0.00013159113775218964,
"loss": 1.0195,
"step": 260
},
{
"epoch": 0.42,
"learning_rate": 0.00013109884950114007,
"loss": 0.9902,
"step": 261
},
{
"epoch": 0.42,
"learning_rate": 0.00013060572545878875,
"loss": 1.0352,
"step": 262
},
{
"epoch": 0.42,
"learning_rate": 0.00013011177887799845,
"loss": 1.0078,
"step": 263
},
{
"epoch": 0.42,
"learning_rate": 0.00012961702303373795,
"loss": 1.0273,
"step": 264
},
{
"epoch": 0.42,
"learning_rate": 0.00012912147122272523,
"loss": 1.0312,
"step": 265
},
{
"epoch": 0.43,
"learning_rate": 0.00012862513676307008,
"loss": 1.0391,
"step": 266
},
{
"epoch": 0.43,
"learning_rate": 0.00012812803299391628,
"loss": 1.0,
"step": 267
},
{
"epoch": 0.43,
"learning_rate": 0.00012763017327508305,
"loss": 1.0469,
"step": 268
},
{
"epoch": 0.43,
"learning_rate": 0.0001271315709867059,
"loss": 1.043,
"step": 269
},
{
"epoch": 0.43,
"learning_rate": 0.00012663223952887723,
"loss": 1.0586,
"step": 270
},
{
"epoch": 0.43,
"learning_rate": 0.00012613219232128608,
"loss": 1.0234,
"step": 271
},
{
"epoch": 0.44,
"learning_rate": 0.00012563144280285741,
"loss": 1.0176,
"step": 272
},
{
"epoch": 0.44,
"learning_rate": 0.00012513000443139112,
"loss": 1.0156,
"step": 273
},
{
"epoch": 0.44,
"learning_rate": 0.00012462789068320017,
"loss": 1.0547,
"step": 274
},
{
"epoch": 0.44,
"learning_rate": 0.00012412511505274844,
"loss": 1.0234,
"step": 275
},
{
"epoch": 0.44,
"learning_rate": 0.00012362169105228826,
"loss": 1.0117,
"step": 276
},
{
"epoch": 0.44,
"learning_rate": 0.000123117632211497,
"loss": 1.0352,
"step": 277
},
{
"epoch": 0.44,
"learning_rate": 0.00012261295207711346,
"loss": 1.0117,
"step": 278
},
{
"epoch": 0.45,
"learning_rate": 0.0001221076642125742,
"loss": 1.0059,
"step": 279
},
{
"epoch": 0.45,
"learning_rate": 0.00012160178219764837,
"loss": 1.0664,
"step": 280
},
{
"epoch": 0.45,
"learning_rate": 0.00012109531962807332,
"loss": 1.0312,
"step": 281
},
{
"epoch": 0.45,
"learning_rate": 0.00012058829011518896,
"loss": 1.0273,
"step": 282
},
{
"epoch": 0.45,
"learning_rate": 0.00012008070728557186,
"loss": 1.0078,
"step": 283
},
{
"epoch": 0.45,
"learning_rate": 0.00011957258478066931,
"loss": 1.0273,
"step": 284
},
{
"epoch": 0.46,
"learning_rate": 0.00011906393625643244,
"loss": 1.0195,
"step": 285
},
{
"epoch": 0.46,
"learning_rate": 0.00011855477538294935,
"loss": 1.043,
"step": 286
},
{
"epoch": 0.46,
"learning_rate": 0.00011804511584407763,
"loss": 1.0117,
"step": 287
},
{
"epoch": 0.46,
"learning_rate": 0.00011753497133707679,
"loss": 1.0078,
"step": 288
},
{
"epoch": 0.46,
"learning_rate": 0.00011702435557223987,
"loss": 1.0703,
"step": 289
},
{
"epoch": 0.46,
"learning_rate": 0.00011651328227252517,
"loss": 1.0352,
"step": 290
},
{
"epoch": 0.47,
"learning_rate": 0.00011600176517318741,
"loss": 1.0469,
"step": 291
},
{
"epoch": 0.47,
"learning_rate": 0.00011548981802140848,
"loss": 1.0664,
"step": 292
},
{
"epoch": 0.47,
"learning_rate": 0.00011497745457592816,
"loss": 1.0312,
"step": 293
},
{
"epoch": 0.47,
"learning_rate": 0.00011446468860667421,
"loss": 1.0078,
"step": 294
},
{
"epoch": 0.47,
"learning_rate": 0.00011395153389439233,
"loss": 1.0312,
"step": 295
},
{
"epoch": 0.47,
"learning_rate": 0.00011343800423027582,
"loss": 1.0703,
"step": 296
},
{
"epoch": 0.48,
"learning_rate": 0.0001129241134155949,
"loss": 1.0625,
"step": 297
},
{
"epoch": 0.48,
"learning_rate": 0.00011240987526132594,
"loss": 1.0117,
"step": 298
},
{
"epoch": 0.48,
"learning_rate": 0.00011189530358778005,
"loss": 1.0234,
"step": 299
},
{
"epoch": 0.48,
"learning_rate": 0.00011138041222423177,
"loss": 1.0078,
"step": 300
},
{
"epoch": 0.48,
"learning_rate": 0.00011086521500854745,
"loss": 1.0117,
"step": 301
},
{
"epoch": 0.48,
"learning_rate": 0.00011034972578681338,
"loss": 0.9941,
"step": 302
},
{
"epoch": 0.48,
"learning_rate": 0.00010983395841296348,
"loss": 1.0273,
"step": 303
},
{
"epoch": 0.49,
"learning_rate": 0.00010931792674840718,
"loss": 1.0312,
"step": 304
},
{
"epoch": 0.49,
"learning_rate": 0.00010880164466165674,
"loss": 1.0195,
"step": 305
},
{
"epoch": 0.49,
"learning_rate": 0.00010828512602795462,
"loss": 1.0195,
"step": 306
},
{
"epoch": 0.49,
"learning_rate": 0.00010776838472890065,
"loss": 1.0195,
"step": 307
},
{
"epoch": 0.49,
"learning_rate": 0.00010725143465207867,
"loss": 1.0156,
"step": 308
},
{
"epoch": 0.49,
"learning_rate": 0.00010673428969068364,
"loss": 0.9941,
"step": 309
},
{
"epoch": 0.5,
"learning_rate": 0.00010621696374314807,
"loss": 1.0156,
"step": 310
},
{
"epoch": 0.5,
"learning_rate": 0.00010569947071276847,
"loss": 1.0312,
"step": 311
},
{
"epoch": 0.5,
"learning_rate": 0.00010518182450733186,
"loss": 1.0176,
"step": 312
},
{
"epoch": 0.5,
"learning_rate": 0.00010466403903874176,
"loss": 1.0391,
"step": 313
},
{
"epoch": 0.5,
"learning_rate": 0.00010414612822264455,
"loss": 1.0352,
"step": 314
},
{
"epoch": 0.5,
"learning_rate": 0.00010362810597805526,
"loss": 1.0391,
"step": 315
},
{
"epoch": 0.51,
"learning_rate": 0.0001031099862269837,
"loss": 1.0234,
"step": 316
},
{
"epoch": 0.51,
"learning_rate": 0.00010259178289406011,
"loss": 1.0332,
"step": 317
},
{
"epoch": 0.51,
"learning_rate": 0.00010207350990616107,
"loss": 1.0,
"step": 318
},
{
"epoch": 0.51,
"learning_rate": 0.0001015551811920351,
"loss": 1.0312,
"step": 319
},
{
"epoch": 0.51,
"learning_rate": 0.00010103681068192845,
"loss": 1.0273,
"step": 320
},
{
"epoch": 0.51,
"learning_rate": 0.00010051841230721065,
"loss": 1.043,
"step": 321
},
{
"epoch": 0.52,
"learning_rate": 0.0001,
"loss": 1.0352,
"step": 322
},
{
"epoch": 0.52,
"learning_rate": 9.948158769278939e-05,
"loss": 1.0508,
"step": 323
},
{
"epoch": 0.52,
"learning_rate": 9.896318931807155e-05,
"loss": 1.043,
"step": 324
},
{
"epoch": 0.52,
"learning_rate": 9.844481880796491e-05,
"loss": 1.043,
"step": 325
},
{
"epoch": 0.52,
"learning_rate": 9.792649009383899e-05,
"loss": 0.9844,
"step": 326
},
{
"epoch": 0.52,
"learning_rate": 9.740821710593989e-05,
"loss": 1.0195,
"step": 327
},
{
"epoch": 0.52,
"learning_rate": 9.689001377301633e-05,
"loss": 1.0352,
"step": 328
},
{
"epoch": 0.53,
"learning_rate": 9.637189402194476e-05,
"loss": 0.9922,
"step": 329
},
{
"epoch": 0.53,
"learning_rate": 9.585387177735547e-05,
"loss": 1.0078,
"step": 330
},
{
"epoch": 0.53,
"learning_rate": 9.533596096125825e-05,
"loss": 1.0195,
"step": 331
},
{
"epoch": 0.53,
"learning_rate": 9.481817549266817e-05,
"loss": 1.0273,
"step": 332
},
{
"epoch": 0.53,
"learning_rate": 9.430052928723153e-05,
"loss": 1.0273,
"step": 333
},
{
"epoch": 0.53,
"learning_rate": 9.378303625685195e-05,
"loss": 1.0234,
"step": 334
},
{
"epoch": 0.54,
"learning_rate": 9.326571030931637e-05,
"loss": 1.0312,
"step": 335
},
{
"epoch": 0.54,
"learning_rate": 9.274856534792138e-05,
"loss": 1.0078,
"step": 336
},
{
"epoch": 0.54,
"learning_rate": 9.223161527109937e-05,
"loss": 1.0117,
"step": 337
},
{
"epoch": 0.54,
"learning_rate": 9.171487397204539e-05,
"loss": 1.0156,
"step": 338
},
{
"epoch": 0.54,
"learning_rate": 9.119835533834331e-05,
"loss": 1.0273,
"step": 339
},
{
"epoch": 0.54,
"learning_rate": 9.068207325159284e-05,
"loss": 1.0059,
"step": 340
},
{
"epoch": 0.55,
"learning_rate": 9.016604158703654e-05,
"loss": 1.0234,
"step": 341
},
{
"epoch": 0.55,
"learning_rate": 8.965027421318665e-05,
"loss": 1.0352,
"step": 342
},
{
"epoch": 0.55,
"learning_rate": 8.913478499145254e-05,
"loss": 1.043,
"step": 343
},
{
"epoch": 0.55,
"learning_rate": 8.861958777576827e-05,
"loss": 0.9961,
"step": 344
},
{
"epoch": 0.55,
"learning_rate": 8.810469641222001e-05,
"loss": 1.0195,
"step": 345
},
{
"epoch": 0.55,
"learning_rate": 8.759012473867407e-05,
"loss": 0.9922,
"step": 346
},
{
"epoch": 0.56,
"learning_rate": 8.707588658440511e-05,
"loss": 1.0547,
"step": 347
},
{
"epoch": 0.56,
"learning_rate": 8.656199576972423e-05,
"loss": 1.0078,
"step": 348
},
{
"epoch": 0.56,
"learning_rate": 8.604846610560771e-05,
"loss": 1.0352,
"step": 349
},
{
"epoch": 0.56,
"learning_rate": 8.553531139332582e-05,
"loss": 1.0195,
"step": 350
},
{
"epoch": 0.56,
"learning_rate": 8.502254542407186e-05,
"loss": 1.0195,
"step": 351
},
{
"epoch": 0.56,
"learning_rate": 8.451018197859153e-05,
"loss": 1.0234,
"step": 352
},
{
"epoch": 0.56,
"learning_rate": 8.399823482681262e-05,
"loss": 1.002,
"step": 353
},
{
"epoch": 0.57,
"learning_rate": 8.348671772747487e-05,
"loss": 1.0273,
"step": 354
},
{
"epoch": 0.57,
"learning_rate": 8.297564442776014e-05,
"loss": 1.0039,
"step": 355
},
{
"epoch": 0.57,
"learning_rate": 8.246502866292324e-05,
"loss": 1.0352,
"step": 356
},
{
"epoch": 0.57,
"learning_rate": 8.195488415592238e-05,
"loss": 1.0215,
"step": 357
},
{
"epoch": 0.57,
"learning_rate": 8.144522461705067e-05,
"loss": 1.0215,
"step": 358
},
{
"epoch": 0.57,
"learning_rate": 8.093606374356759e-05,
"loss": 1.0117,
"step": 359
},
{
"epoch": 0.58,
"learning_rate": 8.042741521933071e-05,
"loss": 1.0156,
"step": 360
},
{
"epoch": 0.58,
"learning_rate": 7.991929271442817e-05,
"loss": 1.0098,
"step": 361
},
{
"epoch": 0.58,
"learning_rate": 7.941170988481108e-05,
"loss": 0.9922,
"step": 362
},
{
"epoch": 0.58,
"learning_rate": 7.89046803719267e-05,
"loss": 1.0234,
"step": 363
},
{
"epoch": 0.58,
"learning_rate": 7.839821780235168e-05,
"loss": 1.043,
"step": 364
},
{
"epoch": 0.58,
"learning_rate": 7.789233578742582e-05,
"loss": 1.0391,
"step": 365
},
{
"epoch": 0.59,
"learning_rate": 7.738704792288655e-05,
"loss": 1.0039,
"step": 366
},
{
"epoch": 0.59,
"learning_rate": 7.688236778850306e-05,
"loss": 1.0137,
"step": 367
},
{
"epoch": 0.59,
"learning_rate": 7.637830894771175e-05,
"loss": 0.9941,
"step": 368
},
{
"epoch": 0.59,
"learning_rate": 7.587488494725157e-05,
"loss": 0.9961,
"step": 369
},
{
"epoch": 0.59,
"learning_rate": 7.537210931679987e-05,
"loss": 0.998,
"step": 370
},
{
"epoch": 0.59,
"learning_rate": 7.48699955686089e-05,
"loss": 1.0195,
"step": 371
},
{
"epoch": 0.6,
"learning_rate": 7.43685571971426e-05,
"loss": 0.9902,
"step": 372
},
{
"epoch": 0.6,
"learning_rate": 7.386780767871397e-05,
"loss": 1.0176,
"step": 373
},
{
"epoch": 0.6,
"learning_rate": 7.336776047112276e-05,
"loss": 1.0137,
"step": 374
},
{
"epoch": 0.6,
"learning_rate": 7.286842901329412e-05,
"loss": 1.0078,
"step": 375
},
{
"epoch": 0.6,
"learning_rate": 7.236982672491698e-05,
"loss": 1.0391,
"step": 376
},
{
"epoch": 0.6,
"learning_rate": 7.187196700608373e-05,
"loss": 1.0195,
"step": 377
},
{
"epoch": 0.6,
"learning_rate": 7.137486323692995e-05,
"loss": 1.0312,
"step": 378
},
{
"epoch": 0.61,
"learning_rate": 7.087852877727481e-05,
"loss": 0.9902,
"step": 379
},
{
"epoch": 0.61,
"learning_rate": 7.038297696626206e-05,
"loss": 1.0234,
"step": 380
},
{
"epoch": 0.61,
"learning_rate": 6.988822112200156e-05,
"loss": 1.0234,
"step": 381
},
{
"epoch": 0.61,
"learning_rate": 6.939427454121128e-05,
"loss": 0.9961,
"step": 382
},
{
"epoch": 0.61,
"learning_rate": 6.890115049885994e-05,
"loss": 0.9863,
"step": 383
},
{
"epoch": 0.61,
"learning_rate": 6.84088622478104e-05,
"loss": 1.0078,
"step": 384
},
{
"epoch": 0.62,
"learning_rate": 6.791742301846326e-05,
"loss": 1.0156,
"step": 385
},
{
"epoch": 0.62,
"learning_rate": 6.742684601840141e-05,
"loss": 0.9941,
"step": 386
},
{
"epoch": 0.62,
"learning_rate": 6.693714443203507e-05,
"loss": 1.043,
"step": 387
},
{
"epoch": 0.62,
"learning_rate": 6.644833142024751e-05,
"loss": 1.0156,
"step": 388
},
{
"epoch": 0.62,
"learning_rate": 6.59604201200412e-05,
"loss": 1.0156,
"step": 389
},
{
"epoch": 0.62,
"learning_rate": 6.547342364418481e-05,
"loss": 0.9844,
"step": 390
},
{
"epoch": 0.63,
"learning_rate": 6.498735508086093e-05,
"loss": 1.0234,
"step": 391
},
{
"epoch": 0.63,
"learning_rate": 6.450222749331414e-05,
"loss": 1.002,
"step": 392
},
{
"epoch": 0.63,
"learning_rate": 6.40180539194999e-05,
"loss": 0.9785,
"step": 393
},
{
"epoch": 0.63,
"learning_rate": 6.35348473717345e-05,
"loss": 1.0312,
"step": 394
},
{
"epoch": 0.63,
"learning_rate": 6.305262083634488e-05,
"loss": 1.0176,
"step": 395
},
{
"epoch": 0.63,
"learning_rate": 6.25713872733199e-05,
"loss": 1.0273,
"step": 396
},
{
"epoch": 0.64,
"learning_rate": 6.209115961596208e-05,
"loss": 1.0273,
"step": 397
},
{
"epoch": 0.64,
"learning_rate": 6.161195077053976e-05,
"loss": 1.0273,
"step": 398
},
{
"epoch": 0.64,
"learning_rate": 6.113377361594049e-05,
"loss": 1.0195,
"step": 399
},
{
"epoch": 0.64,
"learning_rate": 6.065664100332478e-05,
"loss": 1.0234,
"step": 400
},
{
"epoch": 0.64,
"learning_rate": 6.018056575578075e-05,
"loss": 1.0156,
"step": 401
},
{
"epoch": 0.64,
"learning_rate": 5.970556066797941e-05,
"loss": 1.0078,
"step": 402
},
{
"epoch": 0.64,
"learning_rate": 5.923163850583113e-05,
"loss": 0.998,
"step": 403
},
{
"epoch": 0.65,
"learning_rate": 5.875881200614207e-05,
"loss": 0.9922,
"step": 404
},
{
"epoch": 0.65,
"learning_rate": 5.828709387627218e-05,
"loss": 1.0078,
"step": 405
},
{
"epoch": 0.65,
"learning_rate": 5.781649679379378e-05,
"loss": 1.0273,
"step": 406
},
{
"epoch": 0.65,
"learning_rate": 5.73470334061505e-05,
"loss": 1.002,
"step": 407
},
{
"epoch": 0.65,
"learning_rate": 5.687871633031754e-05,
"loss": 0.998,
"step": 408
},
{
"epoch": 0.65,
"learning_rate": 5.6411558152462894e-05,
"loss": 1.0117,
"step": 409
},
{
"epoch": 0.66,
"learning_rate": 5.5945571427608526e-05,
"loss": 1.0352,
"step": 410
},
{
"epoch": 0.66,
"learning_rate": 5.54807686792933e-05,
"loss": 1.0254,
"step": 411
},
{
"epoch": 0.66,
"learning_rate": 5.501716239923642e-05,
"loss": 1.0293,
"step": 412
},
{
"epoch": 0.66,
"learning_rate": 5.4554765047001613e-05,
"loss": 1.0195,
"step": 413
},
{
"epoch": 0.66,
"learning_rate": 5.4093589049662175e-05,
"loss": 1.0,
"step": 414
},
{
"epoch": 0.66,
"learning_rate": 5.363364680146725e-05,
"loss": 1.0234,
"step": 415
},
{
"epoch": 0.67,
"learning_rate": 5.31749506635086e-05,
"loss": 1.0234,
"step": 416
},
{
"epoch": 0.67,
"learning_rate": 5.271751296338823e-05,
"loss": 1.0234,
"step": 417
},
{
"epoch": 0.67,
"learning_rate": 5.226134599488728e-05,
"loss": 1.0312,
"step": 418
},
{
"epoch": 0.67,
"learning_rate": 5.180646201763577e-05,
"loss": 1.0234,
"step": 419
},
{
"epoch": 0.67,
"learning_rate": 5.135287325678271e-05,
"loss": 1.0117,
"step": 420
},
{
"epoch": 0.67,
"learning_rate": 5.090059190266779e-05,
"loss": 1.0078,
"step": 421
},
{
"epoch": 0.68,
"learning_rate": 5.0449630110493836e-05,
"loss": 1.0312,
"step": 422
},
{
"epoch": 0.68,
"learning_rate": 5.000000000000002e-05,
"loss": 1.0234,
"step": 423
},
{
"epoch": 0.68,
"learning_rate": 4.955171365513603e-05,
"loss": 1.0039,
"step": 424
},
{
"epoch": 0.68,
"learning_rate": 4.9104783123737566e-05,
"loss": 1.0098,
"step": 425
},
{
"epoch": 0.68,
"learning_rate": 4.865922041720239e-05,
"loss": 1.0391,
"step": 426
},
{
"epoch": 0.68,
"learning_rate": 4.821503751016746e-05,
"loss": 0.998,
"step": 427
},
{
"epoch": 0.68,
"learning_rate": 4.777224634018732e-05,
"loss": 0.9961,
"step": 428
},
{
"epoch": 0.69,
"learning_rate": 4.733085880741301e-05,
"loss": 1.0195,
"step": 429
},
{
"epoch": 0.69,
"learning_rate": 4.689088677427249e-05,
"loss": 0.9941,
"step": 430
},
{
"epoch": 0.69,
"learning_rate": 4.645234206515171e-05,
"loss": 0.9609,
"step": 431
},
{
"epoch": 0.69,
"learning_rate": 4.6015236466076747e-05,
"loss": 0.9902,
"step": 432
},
{
"epoch": 0.69,
"learning_rate": 4.5579581724397255e-05,
"loss": 1.0234,
"step": 433
},
{
"epoch": 0.69,
"learning_rate": 4.514538954847064e-05,
"loss": 1.0352,
"step": 434
},
{
"epoch": 0.7,
"learning_rate": 4.471267160734731e-05,
"loss": 1.0078,
"step": 435
},
{
"epoch": 0.7,
"learning_rate": 4.428143953045717e-05,
"loss": 0.998,
"step": 436
},
{
"epoch": 0.7,
"learning_rate": 4.385170490729712e-05,
"loss": 0.9961,
"step": 437
},
{
"epoch": 0.7,
"learning_rate": 4.342347928711953e-05,
"loss": 0.9961,
"step": 438
},
{
"epoch": 0.7,
"learning_rate": 4.2996774178621736e-05,
"loss": 0.998,
"step": 439
},
{
"epoch": 0.7,
"learning_rate": 4.257160104963696e-05,
"loss": 1.0078,
"step": 440
},
{
"epoch": 0.71,
"learning_rate": 4.2147971326825966e-05,
"loss": 1.0156,
"step": 441
},
{
"epoch": 0.71,
"learning_rate": 4.172589639536991e-05,
"loss": 1.002,
"step": 442
},
{
"epoch": 0.71,
"learning_rate": 4.130538759866457e-05,
"loss": 0.9824,
"step": 443
},
{
"epoch": 0.71,
"learning_rate": 4.088645623801534e-05,
"loss": 1.0078,
"step": 444
},
{
"epoch": 0.71,
"learning_rate": 4.046911357233343e-05,
"loss": 0.9902,
"step": 445
},
{
"epoch": 0.71,
"learning_rate": 4.00533708178334e-05,
"loss": 1.0078,
"step": 446
},
{
"epoch": 0.72,
"learning_rate": 3.963923914773187e-05,
"loss": 1.0117,
"step": 447
},
{
"epoch": 0.72,
"learning_rate": 3.922672969194686e-05,
"loss": 1.0059,
"step": 448
},
{
"epoch": 0.72,
"learning_rate": 3.8815853536798904e-05,
"loss": 0.9961,
"step": 449
},
{
"epoch": 0.72,
"learning_rate": 3.840662172471315e-05,
"loss": 1.0195,
"step": 450
},
{
"epoch": 0.72,
"learning_rate": 3.79990452539225e-05,
"loss": 1.002,
"step": 451
},
{
"epoch": 0.72,
"learning_rate": 3.759313507817196e-05,
"loss": 1.0156,
"step": 452
},
{
"epoch": 0.72,
"learning_rate": 3.7188902106424416e-05,
"loss": 1.0898,
"step": 453
},
{
"epoch": 0.73,
"learning_rate": 3.678635720256737e-05,
"loss": 1.0117,
"step": 454
},
{
"epoch": 0.73,
"learning_rate": 3.638551118512089e-05,
"loss": 0.9805,
"step": 455
},
{
"epoch": 0.73,
"learning_rate": 3.5986374826947066e-05,
"loss": 1.0352,
"step": 456
},
{
"epoch": 0.73,
"learning_rate": 3.558895885496023e-05,
"loss": 0.9961,
"step": 457
},
{
"epoch": 0.73,
"learning_rate": 3.519327394983888e-05,
"loss": 1.0059,
"step": 458
},
{
"epoch": 0.73,
"learning_rate": 3.479933074573858e-05,
"loss": 0.9961,
"step": 459
},
{
"epoch": 0.74,
"learning_rate": 3.440713983000601e-05,
"loss": 1.0039,
"step": 460
},
{
"epoch": 0.74,
"learning_rate": 3.401671174289469e-05,
"loss": 1.0078,
"step": 461
},
{
"epoch": 0.74,
"learning_rate": 3.362805697728145e-05,
"loss": 1.0,
"step": 462
},
{
"epoch": 0.74,
"learning_rate": 3.324118597838464e-05,
"loss": 1.0312,
"step": 463
},
{
"epoch": 0.74,
"learning_rate": 3.285610914348332e-05,
"loss": 1.0078,
"step": 464
},
{
"epoch": 0.74,
"learning_rate": 3.2472836821637744e-05,
"loss": 1.0,
"step": 465
},
{
"epoch": 0.75,
"learning_rate": 3.209137931341143e-05,
"loss": 1.0195,
"step": 466
},
{
"epoch": 0.75,
"learning_rate": 3.1711746870594086e-05,
"loss": 0.998,
"step": 467
},
{
"epoch": 0.75,
"learning_rate": 3.1333949695926324e-05,
"loss": 1.0137,
"step": 468
},
{
"epoch": 0.75,
"learning_rate": 3.0957997942825336e-05,
"loss": 0.9922,
"step": 469
},
{
"epoch": 0.75,
"learning_rate": 3.058390171511196e-05,
"loss": 0.998,
"step": 470
},
{
"epoch": 0.75,
"learning_rate": 3.021167106673928e-05,
"loss": 1.0137,
"step": 471
},
{
"epoch": 0.76,
"learning_rate": 2.9841316001522347e-05,
"loss": 1.0312,
"step": 472
},
{
"epoch": 0.76,
"learning_rate": 2.9472846472869298e-05,
"loss": 0.9922,
"step": 473
},
{
"epoch": 0.76,
"learning_rate": 2.9106272383513835e-05,
"loss": 1.0273,
"step": 474
},
{
"epoch": 0.76,
"learning_rate": 2.874160358524931e-05,
"loss": 0.9902,
"step": 475
},
{
"epoch": 0.76,
"learning_rate": 2.8378849878663628e-05,
"loss": 0.9902,
"step": 476
},
{
"epoch": 0.76,
"learning_rate": 2.8018021012875994e-05,
"loss": 1.0078,
"step": 477
},
{
"epoch": 0.76,
"learning_rate": 2.7659126685275027e-05,
"loss": 1.0156,
"step": 478
},
{
"epoch": 0.77,
"learning_rate": 2.7302176541257986e-05,
"loss": 1.0234,
"step": 479
},
{
"epoch": 0.77,
"learning_rate": 2.6947180173971508e-05,
"loss": 1.0195,
"step": 480
},
{
"epoch": 0.77,
"learning_rate": 2.659414712405398e-05,
"loss": 1.0234,
"step": 481
},
{
"epoch": 0.77,
"learning_rate": 2.6243086879379e-05,
"loss": 0.998,
"step": 482
},
{
"epoch": 0.77,
"learning_rate": 2.5894008874800325e-05,
"loss": 0.9668,
"step": 483
},
{
"epoch": 0.77,
"learning_rate": 2.5546922491898495e-05,
"loss": 1.0,
"step": 484
},
{
"epoch": 0.78,
"learning_rate": 2.5201837058728505e-05,
"loss": 1.0234,
"step": 485
},
{
"epoch": 0.78,
"learning_rate": 2.485876184956928e-05,
"loss": 1.0059,
"step": 486
},
{
"epoch": 0.78,
"learning_rate": 2.451770608467432e-05,
"loss": 1.0117,
"step": 487
},
{
"epoch": 0.78,
"learning_rate": 2.417867893002387e-05,
"loss": 1.0117,
"step": 488
},
{
"epoch": 0.78,
"learning_rate": 2.3841689497078746e-05,
"loss": 0.9902,
"step": 489
},
{
"epoch": 0.78,
"learning_rate": 2.3506746842535242e-05,
"loss": 1.0039,
"step": 490
},
{
"epoch": 0.79,
"learning_rate": 2.3173859968081944e-05,
"loss": 1.0,
"step": 491
},
{
"epoch": 0.79,
"learning_rate": 2.2843037820157675e-05,
"loss": 1.0391,
"step": 492
},
{
"epoch": 0.79,
"learning_rate": 2.251428928971102e-05,
"loss": 1.0039,
"step": 493
},
{
"epoch": 0.79,
"learning_rate": 2.2187623211961562e-05,
"loss": 1.0312,
"step": 494
},
{
"epoch": 0.79,
"learning_rate": 2.1863048366162208e-05,
"loss": 1.0,
"step": 495
},
{
"epoch": 0.79,
"learning_rate": 2.1540573475363402e-05,
"loss": 1.002,
"step": 496
},
{
"epoch": 0.8,
"learning_rate": 2.1220207206178688e-05,
"loss": 1.0312,
"step": 497
},
{
"epoch": 0.8,
"learning_rate": 2.0901958168551638e-05,
"loss": 0.9785,
"step": 498
},
{
"epoch": 0.8,
"learning_rate": 2.058583491552465e-05,
"loss": 0.9805,
"step": 499
},
{
"epoch": 0.8,
"learning_rate": 2.027184594300898e-05,
"loss": 1.0137,
"step": 500
},
{
"epoch": 0.8,
"learning_rate": 1.995999968955641e-05,
"loss": 0.9883,
"step": 501
},
{
"epoch": 0.8,
"learning_rate": 1.9650304536132426e-05,
"loss": 0.9844,
"step": 502
},
{
"epoch": 0.8,
"learning_rate": 1.9342768805891178e-05,
"loss": 1.0195,
"step": 503
},
{
"epoch": 0.81,
"learning_rate": 1.903740076395151e-05,
"loss": 0.9746,
"step": 504
},
{
"epoch": 0.81,
"learning_rate": 1.8734208617174988e-05,
"loss": 0.9902,
"step": 505
},
{
"epoch": 0.81,
"learning_rate": 1.8433200513945337e-05,
"loss": 0.9922,
"step": 506
},
{
"epoch": 0.81,
"learning_rate": 1.8134384543949478e-05,
"loss": 1.0234,
"step": 507
},
{
"epoch": 0.81,
"learning_rate": 1.783776873795994e-05,
"loss": 1.0156,
"step": 508
},
{
"epoch": 0.81,
"learning_rate": 1.754336106761927e-05,
"loss": 1.002,
"step": 509
},
{
"epoch": 0.82,
"learning_rate": 1.7251169445225657e-05,
"loss": 0.9766,
"step": 510
},
{
"epoch": 0.82,
"learning_rate": 1.696120172352025e-05,
"loss": 0.9688,
"step": 511
},
{
"epoch": 0.82,
"learning_rate": 1.6673465695476232e-05,
"loss": 1.0234,
"step": 512
},
{
"epoch": 0.82,
"learning_rate": 1.6387969094089316e-05,
"loss": 1.0098,
"step": 513
},
{
"epoch": 0.82,
"learning_rate": 1.6104719592169902e-05,
"loss": 1.0078,
"step": 514
},
{
"epoch": 0.82,
"learning_rate": 1.5823724802136865e-05,
"loss": 1.0352,
"step": 515
},
{
"epoch": 0.83,
"learning_rate": 1.5544992275813053e-05,
"loss": 1.0,
"step": 516
},
{
"epoch": 0.83,
"learning_rate": 1.526852950422226e-05,
"loss": 1.0098,
"step": 517
},
{
"epoch": 0.83,
"learning_rate": 1.4994343917387854e-05,
"loss": 0.9863,
"step": 518
},
{
"epoch": 0.83,
"learning_rate": 1.4722442884133214e-05,
"loss": 1.0117,
"step": 519
},
{
"epoch": 0.83,
"learning_rate": 1.4452833711883628e-05,
"loss": 1.0117,
"step": 520
},
{
"epoch": 0.83,
"learning_rate": 1.4185523646469822e-05,
"loss": 1.0078,
"step": 521
},
{
"epoch": 0.84,
"learning_rate": 1.3920519871933424e-05,
"loss": 1.0215,
"step": 522
},
{
"epoch": 0.84,
"learning_rate": 1.3657829510333654e-05,
"loss": 0.9727,
"step": 523
},
{
"epoch": 0.84,
"learning_rate": 1.339745962155613e-05,
"loss": 1.0312,
"step": 524
},
{
"epoch": 0.84,
"learning_rate": 1.3139417203123027e-05,
"loss": 0.9922,
"step": 525
},
{
"epoch": 0.84,
"learning_rate": 1.2883709190004955e-05,
"loss": 1.0039,
"step": 526
},
{
"epoch": 0.84,
"learning_rate": 1.263034245443473e-05,
"loss": 0.9863,
"step": 527
},
{
"epoch": 0.84,
"learning_rate": 1.2379323805722576e-05,
"loss": 0.9961,
"step": 528
},
{
"epoch": 0.85,
"learning_rate": 1.2130659990073146e-05,
"loss": 0.9941,
"step": 529
},
{
"epoch": 0.85,
"learning_rate": 1.1884357690404158e-05,
"loss": 0.9922,
"step": 530
},
{
"epoch": 0.85,
"learning_rate": 1.1640423526166988e-05,
"loss": 0.9941,
"step": 531
},
{
"epoch": 0.85,
"learning_rate": 1.1398864053168534e-05,
"loss": 1.0273,
"step": 532
},
{
"epoch": 0.85,
"learning_rate": 1.1159685763395111e-05,
"loss": 1.002,
"step": 533
},
{
"epoch": 0.85,
"learning_rate": 1.0922895084838037e-05,
"loss": 1.0137,
"step": 534
},
{
"epoch": 0.86,
"learning_rate": 1.0688498381320855e-05,
"loss": 0.9863,
"step": 535
},
{
"epoch": 0.86,
"learning_rate": 1.045650195232819e-05,
"loss": 0.9961,
"step": 536
},
{
"epoch": 0.86,
"learning_rate": 1.0226912032836611e-05,
"loss": 1.0234,
"step": 537
},
{
"epoch": 0.86,
"learning_rate": 9.999734793146998e-06,
"loss": 0.9941,
"step": 538
},
{
"epoch": 0.86,
"learning_rate": 9.774976338718677e-06,
"loss": 1.0137,
"step": 539
},
{
"epoch": 0.86,
"learning_rate": 9.552642710005299e-06,
"loss": 0.9941,
"step": 540
},
{
"epoch": 0.87,
"learning_rate": 9.332739882292752e-06,
"loss": 0.9922,
"step": 541
},
{
"epoch": 0.87,
"learning_rate": 9.115273765538202e-06,
"loss": 1.0508,
"step": 542
},
{
"epoch": 0.87,
"learning_rate": 8.900250204211514e-06,
"loss": 0.9727,
"step": 543
},
{
"epoch": 0.87,
"learning_rate": 8.687674977138116e-06,
"loss": 1.0117,
"step": 544
},
{
"epoch": 0.87,
"learning_rate": 8.47755379734373e-06,
"loss": 0.9961,
"step": 545
},
{
"epoch": 0.87,
"learning_rate": 8.269892311900696e-06,
"loss": 1.0352,
"step": 546
},
{
"epoch": 0.88,
"learning_rate": 8.064696101776358e-06,
"loss": 1.0,
"step": 547
},
{
"epoch": 0.88,
"learning_rate": 7.861970681683051e-06,
"loss": 0.9668,
"step": 548
},
{
"epoch": 0.88,
"learning_rate": 7.661721499929753e-06,
"loss": 0.9961,
"step": 549
},
{
"epoch": 0.88,
"learning_rate": 7.463953938275858e-06,
"loss": 0.9844,
"step": 550
},
{
"epoch": 0.88,
"learning_rate": 7.2686733117863784e-06,
"loss": 1.0039,
"step": 551
},
{
"epoch": 0.88,
"learning_rate": 7.07588486868922e-06,
"loss": 0.9863,
"step": 552
},
{
"epoch": 0.88,
"learning_rate": 6.8855937902340576e-06,
"loss": 0.9883,
"step": 553
},
{
"epoch": 0.89,
"learning_rate": 6.6978051905530855e-06,
"loss": 1.0234,
"step": 554
},
{
"epoch": 0.89,
"learning_rate": 6.512524116523633e-06,
"loss": 0.9922,
"step": 555
},
{
"epoch": 0.89,
"learning_rate": 6.329755547632499e-06,
"loss": 0.9922,
"step": 556
},
{
"epoch": 0.89,
"learning_rate": 6.149504395842087e-06,
"loss": 1.0352,
"step": 557
},
{
"epoch": 0.89,
"learning_rate": 5.971775505458444e-06,
"loss": 1.0234,
"step": 558
},
{
"epoch": 0.89,
"learning_rate": 5.7965736530010916e-06,
"loss": 0.9805,
"step": 559
},
{
"epoch": 0.9,
"learning_rate": 5.623903547074549e-06,
"loss": 1.0117,
"step": 560
},
{
"epoch": 0.9,
"learning_rate": 5.453769828241872e-06,
"loss": 0.9863,
"step": 561
},
{
"epoch": 0.9,
"learning_rate": 5.286177068899989e-06,
"loss": 1.0059,
"step": 562
},
{
"epoch": 0.9,
"learning_rate": 5.121129773156663e-06,
"loss": 1.0078,
"step": 563
},
{
"epoch": 0.9,
"learning_rate": 4.95863237670956e-06,
"loss": 0.9922,
"step": 564
},
{
"epoch": 0.9,
"learning_rate": 4.798689246727006e-06,
"loss": 1.0156,
"step": 565
},
{
"epoch": 0.91,
"learning_rate": 4.641304681730641e-06,
"loss": 1.0273,
"step": 566
},
{
"epoch": 0.91,
"learning_rate": 4.486482911479839e-06,
"loss": 0.9941,
"step": 567
},
{
"epoch": 0.91,
"learning_rate": 4.3342280968580285e-06,
"loss": 1.002,
"step": 568
},
{
"epoch": 0.91,
"learning_rate": 4.184544329761009e-06,
"loss": 1.0195,
"step": 569
},
{
"epoch": 0.91,
"learning_rate": 4.037435632986786e-06,
"loss": 0.9805,
"step": 570
},
{
"epoch": 0.91,
"learning_rate": 3.892905960127546e-06,
"loss": 1.0,
"step": 571
},
{
"epoch": 0.92,
"learning_rate": 3.750959195463466e-06,
"loss": 1.0156,
"step": 572
},
{
"epoch": 0.92,
"learning_rate": 3.611599153858214e-06,
"loss": 1.0234,
"step": 573
},
{
"epoch": 0.92,
"learning_rate": 3.4748295806564356e-06,
"loss": 1.0234,
"step": 574
},
{
"epoch": 0.92,
"learning_rate": 3.3406541515832003e-06,
"loss": 1.0312,
"step": 575
},
{
"epoch": 0.92,
"learning_rate": 3.209076472645112e-06,
"loss": 0.9766,
"step": 576
},
{
"epoch": 0.92,
"learning_rate": 3.0801000800333877e-06,
"loss": 1.0078,
"step": 577
},
{
"epoch": 0.92,
"learning_rate": 2.9537284400289355e-06,
"loss": 1.0312,
"step": 578
},
{
"epoch": 0.93,
"learning_rate": 2.8299649489090475e-06,
"loss": 1.0098,
"step": 579
},
{
"epoch": 0.93,
"learning_rate": 2.708812932856253e-06,
"loss": 0.9883,
"step": 580
},
{
"epoch": 0.93,
"learning_rate": 2.590275647868867e-06,
"loss": 1.0195,
"step": 581
},
{
"epoch": 0.93,
"learning_rate": 2.4743562796734622e-06,
"loss": 1.002,
"step": 582
},
{
"epoch": 0.93,
"learning_rate": 2.3610579436393e-06,
"loss": 0.9648,
"step": 583
},
{
"epoch": 0.93,
"learning_rate": 2.250383684694579e-06,
"loss": 1.0352,
"step": 584
},
{
"epoch": 0.94,
"learning_rate": 2.1423364772445887e-06,
"loss": 1.0,
"step": 585
},
{
"epoch": 0.94,
"learning_rate": 2.036919225091827e-06,
"loss": 1.0098,
"step": 586
},
{
"epoch": 0.94,
"learning_rate": 1.9341347613579087e-06,
"loss": 0.9961,
"step": 587
},
{
"epoch": 0.94,
"learning_rate": 1.8339858484073935e-06,
"loss": 0.9961,
"step": 588
},
{
"epoch": 0.94,
"learning_rate": 1.7364751777736332e-06,
"loss": 1.0039,
"step": 589
},
{
"epoch": 0.94,
"learning_rate": 1.6416053700863964e-06,
"loss": 1.0059,
"step": 590
},
{
"epoch": 0.95,
"learning_rate": 1.5493789750014031e-06,
"loss": 0.9863,
"step": 591
},
{
"epoch": 0.95,
"learning_rate": 1.459798471131868e-06,
"loss": 0.9453,
"step": 592
},
{
"epoch": 0.95,
"learning_rate": 1.3728662659818204e-06,
"loss": 0.9727,
"step": 593
},
{
"epoch": 0.95,
"learning_rate": 1.2885846958814673e-06,
"loss": 0.9805,
"step": 594
},
{
"epoch": 0.95,
"learning_rate": 1.2069560259243328e-06,
"loss": 1.0195,
"step": 595
},
{
"epoch": 0.95,
"learning_rate": 1.1279824499064396e-06,
"loss": 0.9902,
"step": 596
},
{
"epoch": 0.96,
"learning_rate": 1.0516660902673448e-06,
"loss": 0.998,
"step": 597
},
{
"epoch": 0.96,
"learning_rate": 9.780089980330642e-07,
"loss": 1.0156,
"step": 598
},
{
"epoch": 0.96,
"learning_rate": 9.070131527609604e-07,
"loss": 1.0391,
"step": 599
},
{
"epoch": 0.96,
"learning_rate": 8.386804624865851e-07,
"loss": 1.0195,
"step": 600
},
{
"epoch": 0.96,
"learning_rate": 7.730127636723539e-07,
"loss": 1.0156,
"step": 601
},
{
"epoch": 0.96,
"learning_rate": 7.100118211581852e-07,
"loss": 1.0059,
"step": 602
},
{
"epoch": 0.96,
"learning_rate": 6.496793281141056e-07,
"loss": 0.9902,
"step": 603
},
{
"epoch": 0.97,
"learning_rate": 5.920169059947411e-07,
"loss": 1.0039,
"step": 604
},
{
"epoch": 0.97,
"learning_rate": 5.370261044956971e-07,
"loss": 0.9766,
"step": 605
},
{
"epoch": 0.97,
"learning_rate": 4.847084015119574e-07,
"loss": 1.0117,
"step": 606
},
{
"epoch": 0.97,
"learning_rate": 4.3506520309813947e-07,
"loss": 1.0039,
"step": 607
},
{
"epoch": 0.97,
"learning_rate": 3.8809784343072366e-07,
"loss": 1.0195,
"step": 608
},
{
"epoch": 0.97,
"learning_rate": 3.4380758477219333e-07,
"loss": 1.0059,
"step": 609
},
{
"epoch": 0.98,
"learning_rate": 3.0219561743707326e-07,
"loss": 1.0391,
"step": 610
},
{
"epoch": 0.98,
"learning_rate": 2.6326305976001055e-07,
"loss": 1.0391,
"step": 611
},
{
"epoch": 0.98,
"learning_rate": 2.2701095806565432e-07,
"loss": 1.0059,
"step": 612
},
{
"epoch": 0.98,
"learning_rate": 1.9344028664056713e-07,
"loss": 1.0391,
"step": 613
},
{
"epoch": 0.98,
"learning_rate": 1.6255194770704586e-07,
"loss": 1.0234,
"step": 614
},
{
"epoch": 0.98,
"learning_rate": 1.3434677139885222e-07,
"loss": 0.9902,
"step": 615
},
{
"epoch": 0.99,
"learning_rate": 1.0882551573891953e-07,
"loss": 1.0312,
"step": 616
},
{
"epoch": 0.99,
"learning_rate": 8.598886661895788e-08,
"loss": 0.9883,
"step": 617
},
{
"epoch": 0.99,
"learning_rate": 6.583743778106887e-08,
"loss": 0.9824,
"step": 618
},
{
"epoch": 0.99,
"learning_rate": 4.837177080119215e-08,
"loss": 1.0078,
"step": 619
},
{
"epoch": 0.99,
"learning_rate": 3.359233507459481e-08,
"loss": 0.9883,
"step": 620
},
{
"epoch": 0.99,
"learning_rate": 2.1499527803214846e-08,
"loss": 1.0117,
"step": 621
},
{
"epoch": 1.0,
"learning_rate": 1.209367398504746e-08,
"loss": 0.9805,
"step": 622
},
{
"epoch": 1.0,
"learning_rate": 5.375026405352035e-09,
"loss": 1.0098,
"step": 623
},
{
"epoch": 1.0,
"learning_rate": 1.3437656298687097e-09,
"loss": 1.0273,
"step": 624
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 0.9883,
"step": 625
},
{
"epoch": 1.0,
"step": 625,
"total_flos": 2364286311071744.0,
"train_loss": 1.044653125,
"train_runtime": 13892.2986,
"train_samples_per_second": 5.759,
"train_steps_per_second": 0.045
}
],
"max_steps": 625,
"num_train_epochs": 1,
"total_flos": 2364286311071744.0,
"trial_name": null,
"trial_params": null
}