ch-mi-7 / trainer_state.json
CJ-gyuwonpark's picture
Upload 6 files
b244b0e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998639455782313,
"eval_steps": 20,
"global_step": 367,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 1.264,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 0.9796,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 6e-05,
"loss": 0.9343,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 8e-05,
"loss": 1.165,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 0.0001,
"loss": 1.1233,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 0.00012,
"loss": 0.9889,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 0.00014,
"loss": 0.9492,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 0.00016,
"loss": 1.1725,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 0.00018,
"loss": 0.9987,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.0273,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 0.0001999961280430958,
"loss": 0.9883,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 0.00019998451247222416,
"loss": 0.8587,
"step": 12
},
{
"epoch": 0.04,
"learning_rate": 0.0001999651541868849,
"loss": 0.9096,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 0.00019993805468616693,
"loss": 1.0186,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 0.00019990321606863225,
"loss": 0.9402,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 0.00019986064103215339,
"loss": 0.927,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 0.00019981033287370443,
"loss": 0.935,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 0.00019975229548910582,
"loss": 0.9498,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 0.00019968653337272261,
"loss": 1.0335,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 0.0001996130516171164,
"loss": 0.8899,
"step": 20
},
{
"epoch": 0.05,
"eval_loss": 1.3891513347625732,
"eval_runtime": 119.0073,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 0.00019953185591265103,
"loss": 0.8179,
"step": 21
},
{
"epoch": 0.06,
"learning_rate": 0.00019944295254705185,
"loss": 1.1169,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 0.00019934634840491886,
"loss": 0.9125,
"step": 23
},
{
"epoch": 0.07,
"learning_rate": 0.0001992420509671936,
"loss": 0.9177,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 0.00019913006831057969,
"loss": 0.8608,
"step": 25
},
{
"epoch": 0.07,
"learning_rate": 0.0001990104091069176,
"loss": 0.7755,
"step": 26
},
{
"epoch": 0.07,
"learning_rate": 0.00019888308262251285,
"loss": 1.0967,
"step": 27
},
{
"epoch": 0.08,
"learning_rate": 0.00019874809871741876,
"loss": 0.9759,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 0.00019860546784467248,
"loss": 0.9605,
"step": 29
},
{
"epoch": 0.08,
"learning_rate": 0.00019845520104948592,
"loss": 1.0065,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 0.0001982973099683902,
"loss": 0.9643,
"step": 31
},
{
"epoch": 0.09,
"learning_rate": 0.00019813180682833447,
"loss": 0.9065,
"step": 32
},
{
"epoch": 0.09,
"learning_rate": 0.00019795870444573935,
"loss": 1.1212,
"step": 33
},
{
"epoch": 0.09,
"learning_rate": 0.00019777801622550408,
"loss": 0.8759,
"step": 34
},
{
"epoch": 0.1,
"learning_rate": 0.00019758975615996873,
"loss": 0.8474,
"step": 35
},
{
"epoch": 0.1,
"learning_rate": 0.00019739393882783047,
"loss": 0.9243,
"step": 36
},
{
"epoch": 0.1,
"learning_rate": 0.00019719057939301477,
"loss": 0.9369,
"step": 37
},
{
"epoch": 0.1,
"learning_rate": 0.00019697969360350098,
"loss": 1.0376,
"step": 38
},
{
"epoch": 0.11,
"learning_rate": 0.00019676129779010282,
"loss": 0.8972,
"step": 39
},
{
"epoch": 0.11,
"learning_rate": 0.00019653540886520386,
"loss": 0.8402,
"step": 40
},
{
"epoch": 0.11,
"eval_loss": 1.3678525686264038,
"eval_runtime": 119.0464,
"eval_samples_per_second": 3.704,
"eval_steps_per_second": 1.856,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 0.0001963020443214478,
"loss": 0.8425,
"step": 41
},
{
"epoch": 0.11,
"learning_rate": 0.00019606122223038376,
"loss": 0.9518,
"step": 42
},
{
"epoch": 0.12,
"learning_rate": 0.0001958129612410668,
"loss": 1.0741,
"step": 43
},
{
"epoch": 0.12,
"learning_rate": 0.0001955572805786141,
"loss": 0.9275,
"step": 44
},
{
"epoch": 0.12,
"learning_rate": 0.00019529420004271567,
"loss": 1.049,
"step": 45
},
{
"epoch": 0.13,
"learning_rate": 0.00019502374000610151,
"loss": 0.974,
"step": 46
},
{
"epoch": 0.13,
"learning_rate": 0.00019474592141296372,
"loss": 0.856,
"step": 47
},
{
"epoch": 0.13,
"learning_rate": 0.00019446076577733475,
"loss": 1.1511,
"step": 48
},
{
"epoch": 0.13,
"learning_rate": 0.00019416829518142118,
"loss": 0.8645,
"step": 49
},
{
"epoch": 0.14,
"learning_rate": 0.0001938685322738939,
"loss": 0.9042,
"step": 50
},
{
"epoch": 0.14,
"learning_rate": 0.00019356150026813405,
"loss": 0.914,
"step": 51
},
{
"epoch": 0.14,
"learning_rate": 0.00019324722294043558,
"loss": 1.1324,
"step": 52
},
{
"epoch": 0.14,
"learning_rate": 0.00019292572462816388,
"loss": 0.8499,
"step": 53
},
{
"epoch": 0.15,
"learning_rate": 0.0001925970302278711,
"loss": 1.0545,
"step": 54
},
{
"epoch": 0.15,
"learning_rate": 0.0001922611651933683,
"loss": 0.8526,
"step": 55
},
{
"epoch": 0.15,
"learning_rate": 0.00019191815553375427,
"loss": 0.909,
"step": 56
},
{
"epoch": 0.16,
"learning_rate": 0.0001915680278114014,
"loss": 1.0598,
"step": 57
},
{
"epoch": 0.16,
"learning_rate": 0.0001912108091398988,
"loss": 0.8763,
"step": 58
},
{
"epoch": 0.16,
"learning_rate": 0.00019084652718195238,
"loss": 0.905,
"step": 59
},
{
"epoch": 0.16,
"learning_rate": 0.00019047521014724304,
"loss": 0.9795,
"step": 60
},
{
"epoch": 0.16,
"eval_loss": 1.3594976663589478,
"eval_runtime": 119.047,
"eval_samples_per_second": 3.704,
"eval_steps_per_second": 1.856,
"step": 60
},
{
"epoch": 0.17,
"learning_rate": 0.0001900968867902419,
"loss": 0.8955,
"step": 61
},
{
"epoch": 0.17,
"learning_rate": 0.00018971158640798368,
"loss": 0.9148,
"step": 62
},
{
"epoch": 0.17,
"learning_rate": 0.00018931933883779785,
"loss": 1.0147,
"step": 63
},
{
"epoch": 0.17,
"learning_rate": 0.0001889201744549981,
"loss": 0.9123,
"step": 64
},
{
"epoch": 0.18,
"learning_rate": 0.0001885141241705303,
"loss": 0.7681,
"step": 65
},
{
"epoch": 0.18,
"learning_rate": 0.00018810121942857845,
"loss": 0.8203,
"step": 66
},
{
"epoch": 0.18,
"learning_rate": 0.0001876814922041299,
"loss": 0.9618,
"step": 67
},
{
"epoch": 0.19,
"learning_rate": 0.00018725497500049907,
"loss": 0.9399,
"step": 68
},
{
"epoch": 0.19,
"learning_rate": 0.00018682170084681065,
"loss": 1.0265,
"step": 69
},
{
"epoch": 0.19,
"learning_rate": 0.00018638170329544164,
"loss": 1.0596,
"step": 70
},
{
"epoch": 0.19,
"learning_rate": 0.00018593501641942317,
"loss": 1.2212,
"step": 71
},
{
"epoch": 0.2,
"learning_rate": 0.00018548167480980193,
"loss": 1.0959,
"step": 72
},
{
"epoch": 0.2,
"learning_rate": 0.00018502171357296144,
"loss": 0.7914,
"step": 73
},
{
"epoch": 0.2,
"learning_rate": 0.00018455516832790338,
"loss": 0.9506,
"step": 74
},
{
"epoch": 0.2,
"learning_rate": 0.00018408207520348942,
"loss": 0.8409,
"step": 75
},
{
"epoch": 0.21,
"learning_rate": 0.00018360247083564342,
"loss": 0.9833,
"step": 76
},
{
"epoch": 0.21,
"learning_rate": 0.00018311639236451416,
"loss": 1.0708,
"step": 77
},
{
"epoch": 0.21,
"learning_rate": 0.0001826238774315995,
"loss": 0.9121,
"step": 78
},
{
"epoch": 0.21,
"learning_rate": 0.00018212496417683137,
"loss": 0.9433,
"step": 79
},
{
"epoch": 0.22,
"learning_rate": 0.0001816196912356222,
"loss": 0.8529,
"step": 80
},
{
"epoch": 0.22,
"eval_loss": 1.3516823053359985,
"eval_runtime": 119.0215,
"eval_samples_per_second": 3.705,
"eval_steps_per_second": 1.857,
"step": 80
},
{
"epoch": 0.22,
"learning_rate": 0.000181108097735873,
"loss": 0.9438,
"step": 81
},
{
"epoch": 0.22,
"learning_rate": 0.0001805902232949435,
"loss": 0.9375,
"step": 82
},
{
"epoch": 0.23,
"learning_rate": 0.000180066108016584,
"loss": 0.9696,
"step": 83
},
{
"epoch": 0.23,
"learning_rate": 0.00017953579248782995,
"loss": 0.8329,
"step": 84
},
{
"epoch": 0.23,
"learning_rate": 0.00017899931777585882,
"loss": 0.9701,
"step": 85
},
{
"epoch": 0.23,
"learning_rate": 0.00017845672542480984,
"loss": 0.9774,
"step": 86
},
{
"epoch": 0.24,
"learning_rate": 0.00017790805745256704,
"loss": 0.8261,
"step": 87
},
{
"epoch": 0.24,
"learning_rate": 0.00017735335634750532,
"loss": 1.021,
"step": 88
},
{
"epoch": 0.24,
"learning_rate": 0.00017679266506520012,
"loss": 1.0482,
"step": 89
},
{
"epoch": 0.24,
"learning_rate": 0.00017622602702510105,
"loss": 1.0097,
"step": 90
},
{
"epoch": 0.25,
"learning_rate": 0.0001756534861071696,
"loss": 0.9386,
"step": 91
},
{
"epoch": 0.25,
"learning_rate": 0.00017507508664848094,
"loss": 0.7935,
"step": 92
},
{
"epoch": 0.25,
"learning_rate": 0.0001744908734397906,
"loss": 0.9367,
"step": 93
},
{
"epoch": 0.26,
"learning_rate": 0.00017390089172206592,
"loss": 0.7777,
"step": 94
},
{
"epoch": 0.26,
"learning_rate": 0.00017330518718298264,
"loss": 0.8662,
"step": 95
},
{
"epoch": 0.26,
"learning_rate": 0.0001727038059533868,
"loss": 0.805,
"step": 96
},
{
"epoch": 0.26,
"learning_rate": 0.0001720967946037225,
"loss": 1.0421,
"step": 97
},
{
"epoch": 0.27,
"learning_rate": 0.0001714842001404254,
"loss": 1.0119,
"step": 98
},
{
"epoch": 0.27,
"learning_rate": 0.00017086607000228282,
"loss": 0.9288,
"step": 99
},
{
"epoch": 0.27,
"learning_rate": 0.00017024245205675986,
"loss": 1.0169,
"step": 100
},
{
"epoch": 0.27,
"eval_loss": 1.344921588897705,
"eval_runtime": 119.0551,
"eval_samples_per_second": 3.704,
"eval_steps_per_second": 1.856,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 0.0001696133945962927,
"loss": 0.9399,
"step": 101
},
{
"epoch": 0.28,
"learning_rate": 0.00016897894633454886,
"loss": 0.9817,
"step": 102
},
{
"epoch": 0.28,
"learning_rate": 0.00016833915640265484,
"loss": 0.9042,
"step": 103
},
{
"epoch": 0.28,
"learning_rate": 0.00016769407434539168,
"loss": 0.8495,
"step": 104
},
{
"epoch": 0.29,
"learning_rate": 0.0001670437501173578,
"loss": 0.8376,
"step": 105
},
{
"epoch": 0.29,
"learning_rate": 0.00016638823407910084,
"loss": 0.9603,
"step": 106
},
{
"epoch": 0.29,
"learning_rate": 0.00016572757699321791,
"loss": 0.924,
"step": 107
},
{
"epoch": 0.29,
"learning_rate": 0.0001650618300204242,
"loss": 0.7669,
"step": 108
},
{
"epoch": 0.3,
"learning_rate": 0.00016439104471559156,
"loss": 0.8822,
"step": 109
},
{
"epoch": 0.3,
"learning_rate": 0.0001637152730237558,
"loss": 0.9815,
"step": 110
},
{
"epoch": 0.3,
"learning_rate": 0.0001630345672760943,
"loss": 1.0067,
"step": 111
},
{
"epoch": 0.3,
"learning_rate": 0.00016234898018587337,
"loss": 0.9053,
"step": 112
},
{
"epoch": 0.31,
"learning_rate": 0.00016165856484436645,
"loss": 0.8396,
"step": 113
},
{
"epoch": 0.31,
"learning_rate": 0.00016096337471674241,
"loss": 0.9769,
"step": 114
},
{
"epoch": 0.31,
"learning_rate": 0.00016026346363792567,
"loss": 0.8927,
"step": 115
},
{
"epoch": 0.32,
"learning_rate": 0.0001595588858084268,
"loss": 0.9105,
"step": 116
},
{
"epoch": 0.32,
"learning_rate": 0.00015884969579014566,
"loss": 0.8848,
"step": 117
},
{
"epoch": 0.32,
"learning_rate": 0.000158135948502146,
"loss": 0.8636,
"step": 118
},
{
"epoch": 0.32,
"learning_rate": 0.0001574176992164026,
"loss": 0.8177,
"step": 119
},
{
"epoch": 0.33,
"learning_rate": 0.00015669500355352116,
"loss": 0.8777,
"step": 120
},
{
"epoch": 0.33,
"eval_loss": 1.3403385877609253,
"eval_runtime": 118.9836,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 120
},
{
"epoch": 0.33,
"learning_rate": 0.0001559679174784308,
"loss": 1.0621,
"step": 121
},
{
"epoch": 0.33,
"learning_rate": 0.0001552364972960506,
"loss": 0.9647,
"step": 122
},
{
"epoch": 0.33,
"learning_rate": 0.00015450079964692896,
"loss": 0.9974,
"step": 123
},
{
"epoch": 0.34,
"learning_rate": 0.00015376088150285773,
"loss": 0.942,
"step": 124
},
{
"epoch": 0.34,
"learning_rate": 0.00015301680016246028,
"loss": 0.927,
"step": 125
},
{
"epoch": 0.34,
"learning_rate": 0.0001522686132467543,
"loss": 0.9022,
"step": 126
},
{
"epoch": 0.35,
"learning_rate": 0.0001515163786946896,
"loss": 0.9001,
"step": 127
},
{
"epoch": 0.35,
"learning_rate": 0.0001507601547586616,
"loss": 0.9224,
"step": 128
},
{
"epoch": 0.35,
"learning_rate": 0.00015000000000000001,
"loss": 0.9607,
"step": 129
},
{
"epoch": 0.35,
"learning_rate": 0.00014923597328443422,
"loss": 0.8044,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 0.00014846813377753456,
"loss": 0.7944,
"step": 131
},
{
"epoch": 0.36,
"learning_rate": 0.00014769654094013058,
"loss": 1.0154,
"step": 132
},
{
"epoch": 0.36,
"learning_rate": 0.00014692125452370663,
"loss": 0.9003,
"step": 133
},
{
"epoch": 0.36,
"learning_rate": 0.00014614233456577454,
"loss": 0.9419,
"step": 134
},
{
"epoch": 0.37,
"learning_rate": 0.00014535984138522442,
"loss": 0.8798,
"step": 135
},
{
"epoch": 0.37,
"learning_rate": 0.00014457383557765386,
"loss": 1.0318,
"step": 136
},
{
"epoch": 0.37,
"learning_rate": 0.000143784378010675,
"loss": 0.9086,
"step": 137
},
{
"epoch": 0.38,
"learning_rate": 0.00014299152981920145,
"loss": 0.9947,
"step": 138
},
{
"epoch": 0.38,
"learning_rate": 0.00014219535240071377,
"loss": 0.9156,
"step": 139
},
{
"epoch": 0.38,
"learning_rate": 0.00014139590741050502,
"loss": 0.8692,
"step": 140
},
{
"epoch": 0.38,
"eval_loss": 1.3370156288146973,
"eval_runtime": 119.006,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 140
},
{
"epoch": 0.38,
"learning_rate": 0.0001405932567569062,
"loss": 0.9393,
"step": 141
},
{
"epoch": 0.39,
"learning_rate": 0.00013978746259649209,
"loss": 0.8703,
"step": 142
},
{
"epoch": 0.39,
"learning_rate": 0.00013897858732926793,
"loss": 0.9319,
"step": 143
},
{
"epoch": 0.39,
"learning_rate": 0.00013816669359383726,
"loss": 1.0655,
"step": 144
},
{
"epoch": 0.39,
"learning_rate": 0.00013735184426255117,
"loss": 0.9344,
"step": 145
},
{
"epoch": 0.4,
"learning_rate": 0.00013653410243663952,
"loss": 1.0119,
"step": 146
},
{
"epoch": 0.4,
"learning_rate": 0.0001357135314413245,
"loss": 0.9353,
"step": 147
},
{
"epoch": 0.4,
"learning_rate": 0.0001348901948209167,
"loss": 0.9807,
"step": 148
},
{
"epoch": 0.41,
"learning_rate": 0.00013406415633389438,
"loss": 0.9972,
"step": 149
},
{
"epoch": 0.41,
"learning_rate": 0.00013323547994796597,
"loss": 0.8184,
"step": 150
},
{
"epoch": 0.41,
"learning_rate": 0.0001324042298351166,
"loss": 0.9096,
"step": 151
},
{
"epoch": 0.41,
"learning_rate": 0.00013157047036663853,
"loss": 0.7666,
"step": 152
},
{
"epoch": 0.42,
"learning_rate": 0.0001307342661081463,
"loss": 0.9412,
"step": 153
},
{
"epoch": 0.42,
"learning_rate": 0.00012989568181457704,
"loss": 0.7595,
"step": 154
},
{
"epoch": 0.42,
"learning_rate": 0.00012905478242517562,
"loss": 0.8968,
"step": 155
},
{
"epoch": 0.42,
"learning_rate": 0.00012821163305846596,
"loss": 0.8976,
"step": 156
},
{
"epoch": 0.43,
"learning_rate": 0.0001273662990072083,
"loss": 0.8512,
"step": 157
},
{
"epoch": 0.43,
"learning_rate": 0.00012651884573334297,
"loss": 0.9215,
"step": 158
},
{
"epoch": 0.43,
"learning_rate": 0.00012566933886292106,
"loss": 0.828,
"step": 159
},
{
"epoch": 0.44,
"learning_rate": 0.00012481784418102242,
"loss": 0.8962,
"step": 160
},
{
"epoch": 0.44,
"eval_loss": 1.3367716073989868,
"eval_runtime": 118.9758,
"eval_samples_per_second": 3.707,
"eval_steps_per_second": 1.858,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 0.00012396442762666128,
"loss": 0.8564,
"step": 161
},
{
"epoch": 0.44,
"learning_rate": 0.00012310915528768,
"loss": 0.8648,
"step": 162
},
{
"epoch": 0.44,
"learning_rate": 0.00012225209339563145,
"loss": 0.9122,
"step": 163
},
{
"epoch": 0.45,
"learning_rate": 0.00012139330832064974,
"loss": 0.8912,
"step": 164
},
{
"epoch": 0.45,
"learning_rate": 0.00012053286656631093,
"loss": 0.8872,
"step": 165
},
{
"epoch": 0.45,
"learning_rate": 0.00011967083476448282,
"loss": 0.8292,
"step": 166
},
{
"epoch": 0.45,
"learning_rate": 0.00011880727967016514,
"loss": 0.858,
"step": 167
},
{
"epoch": 0.46,
"learning_rate": 0.00011794226815632012,
"loss": 0.8548,
"step": 168
},
{
"epoch": 0.46,
"learning_rate": 0.00011707586720869374,
"loss": 0.8587,
"step": 169
},
{
"epoch": 0.46,
"learning_rate": 0.00011620814392062873,
"loss": 1.0234,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 0.00011533916548786857,
"loss": 0.9115,
"step": 171
},
{
"epoch": 0.47,
"learning_rate": 0.00011446899920335405,
"loss": 1.0819,
"step": 172
},
{
"epoch": 0.47,
"learning_rate": 0.00011359771245201232,
"loss": 0.8818,
"step": 173
},
{
"epoch": 0.47,
"learning_rate": 0.00011272537270553836,
"loss": 0.8352,
"step": 174
},
{
"epoch": 0.48,
"learning_rate": 0.00011185204751717029,
"loss": 0.8977,
"step": 175
},
{
"epoch": 0.48,
"learning_rate": 0.00011097780451645792,
"loss": 0.855,
"step": 176
},
{
"epoch": 0.48,
"learning_rate": 0.00011010271140402579,
"loss": 0.7944,
"step": 177
},
{
"epoch": 0.48,
"learning_rate": 0.00010922683594633021,
"loss": 0.9017,
"step": 178
},
{
"epoch": 0.49,
"learning_rate": 0.0001083502459704117,
"loss": 0.7825,
"step": 179
},
{
"epoch": 0.49,
"learning_rate": 0.00010747300935864243,
"loss": 0.7996,
"step": 180
},
{
"epoch": 0.49,
"eval_loss": 1.3361833095550537,
"eval_runtime": 119.0783,
"eval_samples_per_second": 3.703,
"eval_steps_per_second": 1.856,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 0.00010659519404346954,
"loss": 0.9062,
"step": 181
},
{
"epoch": 0.5,
"learning_rate": 0.00010571686800215444,
"loss": 0.7366,
"step": 182
},
{
"epoch": 0.5,
"learning_rate": 0.00010483809925150869,
"loss": 0.7322,
"step": 183
},
{
"epoch": 0.5,
"learning_rate": 0.00010395895584262696,
"loss": 0.7953,
"step": 184
},
{
"epoch": 0.5,
"learning_rate": 0.00010307950585561706,
"loss": 0.848,
"step": 185
},
{
"epoch": 0.51,
"learning_rate": 0.00010219981739432795,
"loss": 1.0637,
"step": 186
},
{
"epoch": 0.51,
"learning_rate": 0.00010131995858107591,
"loss": 0.8968,
"step": 187
},
{
"epoch": 0.51,
"learning_rate": 0.00010043999755136904,
"loss": 1.0545,
"step": 188
},
{
"epoch": 0.51,
"learning_rate": 9.9560002448631e-05,
"loss": 0.9912,
"step": 189
},
{
"epoch": 0.52,
"learning_rate": 9.868004141892411e-05,
"loss": 0.8248,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 9.780018260567207e-05,
"loss": 0.9459,
"step": 191
},
{
"epoch": 0.52,
"learning_rate": 9.692049414438299e-05,
"loss": 0.8605,
"step": 192
},
{
"epoch": 0.53,
"learning_rate": 9.604104415737308e-05,
"loss": 0.8436,
"step": 193
},
{
"epoch": 0.53,
"learning_rate": 9.516190074849134e-05,
"loss": 0.9136,
"step": 194
},
{
"epoch": 0.53,
"learning_rate": 9.428313199784556e-05,
"loss": 1.0565,
"step": 195
},
{
"epoch": 0.53,
"learning_rate": 9.340480595653047e-05,
"loss": 0.9397,
"step": 196
},
{
"epoch": 0.54,
"learning_rate": 9.252699064135758e-05,
"loss": 0.9039,
"step": 197
},
{
"epoch": 0.54,
"learning_rate": 9.164975402958834e-05,
"loss": 0.9025,
"step": 198
},
{
"epoch": 0.54,
"learning_rate": 9.077316405366981e-05,
"loss": 1.0392,
"step": 199
},
{
"epoch": 0.54,
"learning_rate": 8.989728859597424e-05,
"loss": 0.8475,
"step": 200
},
{
"epoch": 0.54,
"eval_loss": 1.3337310552597046,
"eval_runtime": 119.0825,
"eval_samples_per_second": 3.703,
"eval_steps_per_second": 1.856,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 8.902219548354209e-05,
"loss": 1.0189,
"step": 201
},
{
"epoch": 0.55,
"learning_rate": 8.814795248282974e-05,
"loss": 0.9149,
"step": 202
},
{
"epoch": 0.55,
"learning_rate": 8.727462729446167e-05,
"loss": 0.8856,
"step": 203
},
{
"epoch": 0.56,
"learning_rate": 8.640228754798773e-05,
"loss": 0.8605,
"step": 204
},
{
"epoch": 0.56,
"learning_rate": 8.553100079664598e-05,
"loss": 0.9093,
"step": 205
},
{
"epoch": 0.56,
"learning_rate": 8.466083451213144e-05,
"loss": 0.8779,
"step": 206
},
{
"epoch": 0.56,
"learning_rate": 8.379185607937126e-05,
"loss": 0.9265,
"step": 207
},
{
"epoch": 0.57,
"learning_rate": 8.292413279130624e-05,
"loss": 1.049,
"step": 208
},
{
"epoch": 0.57,
"learning_rate": 8.205773184367991e-05,
"loss": 0.8172,
"step": 209
},
{
"epoch": 0.57,
"learning_rate": 8.119272032983487e-05,
"loss": 0.927,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 8.03291652355172e-05,
"loss": 1.0608,
"step": 211
},
{
"epoch": 0.58,
"learning_rate": 7.94671334336891e-05,
"loss": 0.8668,
"step": 212
},
{
"epoch": 0.58,
"learning_rate": 7.860669167935028e-05,
"loss": 0.9103,
"step": 213
},
{
"epoch": 0.58,
"learning_rate": 7.774790660436858e-05,
"loss": 0.8594,
"step": 214
},
{
"epoch": 0.59,
"learning_rate": 7.689084471232001e-05,
"loss": 0.8572,
"step": 215
},
{
"epoch": 0.59,
"learning_rate": 7.603557237333877e-05,
"loss": 0.956,
"step": 216
},
{
"epoch": 0.59,
"learning_rate": 7.518215581897763e-05,
"loss": 0.8796,
"step": 217
},
{
"epoch": 0.59,
"learning_rate": 7.433066113707896e-05,
"loss": 1.0585,
"step": 218
},
{
"epoch": 0.6,
"learning_rate": 7.348115426665705e-05,
"loss": 0.7626,
"step": 219
},
{
"epoch": 0.6,
"learning_rate": 7.263370099279172e-05,
"loss": 0.8566,
"step": 220
},
{
"epoch": 0.6,
"eval_loss": 1.3307039737701416,
"eval_runtime": 119.0797,
"eval_samples_per_second": 3.703,
"eval_steps_per_second": 1.856,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 7.178836694153405e-05,
"loss": 0.8049,
"step": 221
},
{
"epoch": 0.6,
"learning_rate": 7.09452175748244e-05,
"loss": 1.0561,
"step": 222
},
{
"epoch": 0.61,
"learning_rate": 7.010431818542297e-05,
"loss": 0.7266,
"step": 223
},
{
"epoch": 0.61,
"learning_rate": 6.926573389185371e-05,
"loss": 0.8773,
"step": 224
},
{
"epoch": 0.61,
"learning_rate": 6.842952963336153e-05,
"loss": 0.9103,
"step": 225
},
{
"epoch": 0.61,
"learning_rate": 6.759577016488343e-05,
"loss": 0.6977,
"step": 226
},
{
"epoch": 0.62,
"learning_rate": 6.676452005203406e-05,
"loss": 0.8085,
"step": 227
},
{
"epoch": 0.62,
"learning_rate": 6.593584366610566e-05,
"loss": 0.8944,
"step": 228
},
{
"epoch": 0.62,
"learning_rate": 6.510980517908334e-05,
"loss": 0.7846,
"step": 229
},
{
"epoch": 0.63,
"learning_rate": 6.428646855867553e-05,
"loss": 0.9049,
"step": 230
},
{
"epoch": 0.63,
"learning_rate": 6.34658975633605e-05,
"loss": 1.0148,
"step": 231
},
{
"epoch": 0.63,
"learning_rate": 6.264815573744884e-05,
"loss": 0.8781,
"step": 232
},
{
"epoch": 0.63,
"learning_rate": 6.183330640616273e-05,
"loss": 0.9242,
"step": 233
},
{
"epoch": 0.64,
"learning_rate": 6.102141267073207e-05,
"loss": 0.8635,
"step": 234
},
{
"epoch": 0.64,
"learning_rate": 6.021253740350793e-05,
"loss": 1.0117,
"step": 235
},
{
"epoch": 0.64,
"learning_rate": 5.9406743243093807e-05,
"loss": 0.9206,
"step": 236
},
{
"epoch": 0.64,
"learning_rate": 5.8604092589494994e-05,
"loss": 1.0006,
"step": 237
},
{
"epoch": 0.65,
"learning_rate": 5.780464759928623e-05,
"loss": 0.8629,
"step": 238
},
{
"epoch": 0.65,
"learning_rate": 5.700847018079856e-05,
"loss": 0.9166,
"step": 239
},
{
"epoch": 0.65,
"learning_rate": 5.6215621989325e-05,
"loss": 0.8463,
"step": 240
},
{
"epoch": 0.65,
"eval_loss": 1.3288977146148682,
"eval_runtime": 118.9998,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 240
},
{
"epoch": 0.66,
"learning_rate": 5.542616442234618e-05,
"loss": 1.0178,
"step": 241
},
{
"epoch": 0.66,
"learning_rate": 5.464015861477557e-05,
"loss": 0.8584,
"step": 242
},
{
"epoch": 0.66,
"learning_rate": 5.385766543422551e-05,
"loss": 0.8194,
"step": 243
},
{
"epoch": 0.66,
"learning_rate": 5.307874547629339e-05,
"loss": 0.8743,
"step": 244
},
{
"epoch": 0.67,
"learning_rate": 5.230345905986944e-05,
"loss": 0.9445,
"step": 245
},
{
"epoch": 0.67,
"learning_rate": 5.1531866222465466e-05,
"loss": 1.0006,
"step": 246
},
{
"epoch": 0.67,
"learning_rate": 5.0764026715565785e-05,
"loss": 0.7993,
"step": 247
},
{
"epoch": 0.67,
"learning_rate": 5.000000000000002e-05,
"loss": 0.9364,
"step": 248
},
{
"epoch": 0.68,
"learning_rate": 4.9239845241338435e-05,
"loss": 0.8884,
"step": 249
},
{
"epoch": 0.68,
"learning_rate": 4.848362130531039e-05,
"loss": 0.9054,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 4.7731386753245675e-05,
"loss": 1.0429,
"step": 251
},
{
"epoch": 0.69,
"learning_rate": 4.6983199837539705e-05,
"loss": 0.8428,
"step": 252
},
{
"epoch": 0.69,
"learning_rate": 4.6239118497142256e-05,
"loss": 0.7732,
"step": 253
},
{
"epoch": 0.69,
"learning_rate": 4.549920035307107e-05,
"loss": 1.0696,
"step": 254
},
{
"epoch": 0.69,
"learning_rate": 4.476350270394942e-05,
"loss": 0.8644,
"step": 255
},
{
"epoch": 0.7,
"learning_rate": 4.403208252156921e-05,
"loss": 0.8967,
"step": 256
},
{
"epoch": 0.7,
"learning_rate": 4.3304996446478854e-05,
"loss": 0.7205,
"step": 257
},
{
"epoch": 0.7,
"learning_rate": 4.2582300783597404e-05,
"loss": 1.0014,
"step": 258
},
{
"epoch": 0.7,
"learning_rate": 4.186405149785403e-05,
"loss": 0.7683,
"step": 259
},
{
"epoch": 0.71,
"learning_rate": 4.115030420985437e-05,
"loss": 0.8781,
"step": 260
},
{
"epoch": 0.71,
"eval_loss": 1.3258484601974487,
"eval_runtime": 119.0026,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 260
},
{
"epoch": 0.71,
"learning_rate": 4.044111419157326e-05,
"loss": 0.962,
"step": 261
},
{
"epoch": 0.71,
"learning_rate": 3.973653636207437e-05,
"loss": 0.8061,
"step": 262
},
{
"epoch": 0.72,
"learning_rate": 3.903662528325759e-05,
"loss": 0.9114,
"step": 263
},
{
"epoch": 0.72,
"learning_rate": 3.834143515563358e-05,
"loss": 0.9192,
"step": 264
},
{
"epoch": 0.72,
"learning_rate": 3.7651019814126654e-05,
"loss": 0.9397,
"step": 265
},
{
"epoch": 0.72,
"learning_rate": 3.6965432723905735e-05,
"loss": 0.9167,
"step": 266
},
{
"epoch": 0.73,
"learning_rate": 3.628472697624422e-05,
"loss": 0.9877,
"step": 267
},
{
"epoch": 0.73,
"learning_rate": 3.5608955284408443e-05,
"loss": 1.1038,
"step": 268
},
{
"epoch": 0.73,
"learning_rate": 3.493816997957582e-05,
"loss": 0.868,
"step": 269
},
{
"epoch": 0.73,
"learning_rate": 3.427242300678213e-05,
"loss": 0.7829,
"step": 270
},
{
"epoch": 0.74,
"learning_rate": 3.361176592089919e-05,
"loss": 0.8135,
"step": 271
},
{
"epoch": 0.74,
"learning_rate": 3.295624988264224e-05,
"loss": 0.8382,
"step": 272
},
{
"epoch": 0.74,
"learning_rate": 3.2305925654608326e-05,
"loss": 1.0243,
"step": 273
},
{
"epoch": 0.75,
"learning_rate": 3.1660843597345135e-05,
"loss": 0.7312,
"step": 274
},
{
"epoch": 0.75,
"learning_rate": 3.1021053665451206e-05,
"loss": 0.9365,
"step": 275
},
{
"epoch": 0.75,
"learning_rate": 3.0386605403707346e-05,
"loss": 0.7903,
"step": 276
},
{
"epoch": 0.75,
"learning_rate": 2.975754794324015e-05,
"loss": 0.8536,
"step": 277
},
{
"epoch": 0.76,
"learning_rate": 2.913392999771718e-05,
"loss": 0.993,
"step": 278
},
{
"epoch": 0.76,
"learning_rate": 2.8515799859574588e-05,
"loss": 0.7764,
"step": 279
},
{
"epoch": 0.76,
"learning_rate": 2.7903205396277542e-05,
"loss": 0.9057,
"step": 280
},
{
"epoch": 0.76,
"eval_loss": 1.3229844570159912,
"eval_runtime": 119.0121,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 280
},
{
"epoch": 0.76,
"learning_rate": 2.729619404661321e-05,
"loss": 0.8637,
"step": 281
},
{
"epoch": 0.77,
"learning_rate": 2.669481281701739e-05,
"loss": 0.8564,
"step": 282
},
{
"epoch": 0.77,
"learning_rate": 2.6099108277934103e-05,
"loss": 0.9834,
"step": 283
},
{
"epoch": 0.77,
"learning_rate": 2.5509126560209428e-05,
"loss": 0.8363,
"step": 284
},
{
"epoch": 0.78,
"learning_rate": 2.4924913351519084e-05,
"loss": 0.8882,
"step": 285
},
{
"epoch": 0.78,
"learning_rate": 2.4346513892830423e-05,
"loss": 0.8547,
"step": 286
},
{
"epoch": 0.78,
"learning_rate": 2.377397297489895e-05,
"loss": 0.9304,
"step": 287
},
{
"epoch": 0.78,
"learning_rate": 2.320733493479992e-05,
"loss": 0.9409,
"step": 288
},
{
"epoch": 0.79,
"learning_rate": 2.2646643652494692e-05,
"loss": 0.9414,
"step": 289
},
{
"epoch": 0.79,
"learning_rate": 2.2091942547432955e-05,
"loss": 0.8788,
"step": 290
},
{
"epoch": 0.79,
"learning_rate": 2.1543274575190188e-05,
"loss": 0.8338,
"step": 291
},
{
"epoch": 0.79,
"learning_rate": 2.100068222414121e-05,
"loss": 0.7986,
"step": 292
},
{
"epoch": 0.8,
"learning_rate": 2.0464207512170065e-05,
"loss": 1.0324,
"step": 293
},
{
"epoch": 0.8,
"learning_rate": 1.993389198341601e-05,
"loss": 0.8743,
"step": 294
},
{
"epoch": 0.8,
"learning_rate": 1.9409776705056516e-05,
"loss": 0.8304,
"step": 295
},
{
"epoch": 0.81,
"learning_rate": 1.8891902264127004e-05,
"loss": 0.8021,
"step": 296
},
{
"epoch": 0.81,
"learning_rate": 1.8380308764377842e-05,
"loss": 0.9101,
"step": 297
},
{
"epoch": 0.81,
"learning_rate": 1.787503582316864e-05,
"loss": 0.8156,
"step": 298
},
{
"epoch": 0.81,
"learning_rate": 1.7376122568400532e-05,
"loss": 0.7932,
"step": 299
},
{
"epoch": 0.82,
"learning_rate": 1.6883607635485877e-05,
"loss": 0.9364,
"step": 300
},
{
"epoch": 0.82,
"eval_loss": 1.3215992450714111,
"eval_runtime": 118.9898,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 300
},
{
"epoch": 0.82,
"learning_rate": 1.6397529164356606e-05,
"loss": 0.9039,
"step": 301
},
{
"epoch": 0.82,
"learning_rate": 1.5917924796510587e-05,
"loss": 0.9557,
"step": 302
},
{
"epoch": 0.82,
"learning_rate": 1.544483167209664e-05,
"loss": 0.8927,
"step": 303
},
{
"epoch": 0.83,
"learning_rate": 1.4978286427038601e-05,
"loss": 0.9724,
"step": 304
},
{
"epoch": 0.83,
"learning_rate": 1.4518325190198078e-05,
"loss": 0.9405,
"step": 305
},
{
"epoch": 0.83,
"learning_rate": 1.406498358057683e-05,
"loss": 0.9551,
"step": 306
},
{
"epoch": 0.84,
"learning_rate": 1.3618296704558364e-05,
"loss": 0.8297,
"step": 307
},
{
"epoch": 0.84,
"learning_rate": 1.3178299153189366e-05,
"loss": 0.8766,
"step": 308
},
{
"epoch": 0.84,
"learning_rate": 1.2745024999500943e-05,
"loss": 0.8472,
"step": 309
},
{
"epoch": 0.84,
"learning_rate": 1.2318507795870138e-05,
"loss": 0.7698,
"step": 310
},
{
"epoch": 0.85,
"learning_rate": 1.1898780571421552e-05,
"loss": 0.933,
"step": 311
},
{
"epoch": 0.85,
"learning_rate": 1.1485875829469705e-05,
"loss": 0.9372,
"step": 312
},
{
"epoch": 0.85,
"learning_rate": 1.1079825545001888e-05,
"loss": 0.6934,
"step": 313
},
{
"epoch": 0.85,
"learning_rate": 1.0680661162202177e-05,
"loss": 0.9019,
"step": 314
},
{
"epoch": 0.86,
"learning_rate": 1.0288413592016343e-05,
"loss": 1.0839,
"step": 315
},
{
"epoch": 0.86,
"learning_rate": 9.903113209758096e-06,
"loss": 0.9194,
"step": 316
},
{
"epoch": 0.86,
"learning_rate": 9.524789852756954e-06,
"loss": 0.8442,
"step": 317
},
{
"epoch": 0.87,
"learning_rate": 9.153472818047625e-06,
"loss": 0.8134,
"step": 318
},
{
"epoch": 0.87,
"learning_rate": 8.789190860101225e-06,
"loss": 0.8297,
"step": 319
},
{
"epoch": 0.87,
"learning_rate": 8.43197218859858e-06,
"loss": 0.9635,
"step": 320
},
{
"epoch": 0.87,
"eval_loss": 1.3210258483886719,
"eval_runtime": 119.0221,
"eval_samples_per_second": 3.705,
"eval_steps_per_second": 1.857,
"step": 320
},
{
"epoch": 0.87,
"learning_rate": 8.081844466245737e-06,
"loss": 0.7029,
"step": 321
},
{
"epoch": 0.88,
"learning_rate": 7.738834806631711e-06,
"loss": 0.898,
"step": 322
},
{
"epoch": 0.88,
"learning_rate": 7.402969772128931e-06,
"loss": 0.8886,
"step": 323
},
{
"epoch": 0.88,
"learning_rate": 7.074275371836148e-06,
"loss": 0.7764,
"step": 324
},
{
"epoch": 0.88,
"learning_rate": 6.75277705956443e-06,
"loss": 0.8431,
"step": 325
},
{
"epoch": 0.89,
"learning_rate": 6.438499731865966e-06,
"loss": 0.8315,
"step": 326
},
{
"epoch": 0.89,
"learning_rate": 6.131467726106144e-06,
"loss": 0.8022,
"step": 327
},
{
"epoch": 0.89,
"learning_rate": 5.831704818578843e-06,
"loss": 1.0034,
"step": 328
},
{
"epoch": 0.9,
"learning_rate": 5.539234222665279e-06,
"loss": 0.9289,
"step": 329
},
{
"epoch": 0.9,
"learning_rate": 5.2540785870362815e-06,
"loss": 1.1203,
"step": 330
},
{
"epoch": 0.9,
"learning_rate": 4.976259993898502e-06,
"loss": 0.8166,
"step": 331
},
{
"epoch": 0.9,
"learning_rate": 4.705799957284351e-06,
"loss": 0.872,
"step": 332
},
{
"epoch": 0.91,
"learning_rate": 4.442719421385922e-06,
"loss": 0.914,
"step": 333
},
{
"epoch": 0.91,
"learning_rate": 4.187038758933204e-06,
"loss": 0.9243,
"step": 334
},
{
"epoch": 0.91,
"learning_rate": 3.938777769616275e-06,
"loss": 0.7485,
"step": 335
},
{
"epoch": 0.91,
"learning_rate": 3.6979556785522116e-06,
"loss": 0.835,
"step": 336
},
{
"epoch": 0.92,
"learning_rate": 3.4645911347961357e-06,
"loss": 0.8814,
"step": 337
},
{
"epoch": 0.92,
"learning_rate": 3.2387022098972153e-06,
"loss": 0.7235,
"step": 338
},
{
"epoch": 0.92,
"learning_rate": 3.0203063964990617e-06,
"loss": 0.9117,
"step": 339
},
{
"epoch": 0.93,
"learning_rate": 2.809420606985236e-06,
"loss": 1.0049,
"step": 340
},
{
"epoch": 0.93,
"eval_loss": 1.3204991817474365,
"eval_runtime": 119.0009,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 340
},
{
"epoch": 0.93,
"learning_rate": 2.606061172169527e-06,
"loss": 0.8719,
"step": 341
},
{
"epoch": 0.93,
"learning_rate": 2.410243840031279e-06,
"loss": 0.9375,
"step": 342
},
{
"epoch": 0.93,
"learning_rate": 2.2219837744959283e-06,
"loss": 0.8642,
"step": 343
},
{
"epoch": 0.94,
"learning_rate": 2.0412955542606473e-06,
"loss": 0.9535,
"step": 344
},
{
"epoch": 0.94,
"learning_rate": 1.8681931716655221e-06,
"loss": 0.8538,
"step": 345
},
{
"epoch": 0.94,
"learning_rate": 1.7026900316098215e-06,
"loss": 0.736,
"step": 346
},
{
"epoch": 0.94,
"learning_rate": 1.5447989505140925e-06,
"loss": 0.8917,
"step": 347
},
{
"epoch": 0.95,
"learning_rate": 1.3945321553275326e-06,
"loss": 0.7953,
"step": 348
},
{
"epoch": 0.95,
"learning_rate": 1.2519012825812804e-06,
"loss": 0.9073,
"step": 349
},
{
"epoch": 0.95,
"learning_rate": 1.1169173774871478e-06,
"loss": 0.8242,
"step": 350
},
{
"epoch": 0.96,
"learning_rate": 9.89590893082426e-07,
"loss": 1.0244,
"step": 351
},
{
"epoch": 0.96,
"learning_rate": 8.699316894203224e-07,
"loss": 0.8013,
"step": 352
},
{
"epoch": 0.96,
"learning_rate": 7.579490328064265e-07,
"loss": 0.8989,
"step": 353
},
{
"epoch": 0.96,
"learning_rate": 6.536515950811395e-07,
"loss": 1.013,
"step": 354
},
{
"epoch": 0.97,
"learning_rate": 5.570474529481562e-07,
"loss": 0.8283,
"step": 355
},
{
"epoch": 0.97,
"learning_rate": 4.681440873489762e-07,
"loss": 0.819,
"step": 356
},
{
"epoch": 0.97,
"learning_rate": 3.869483828836007e-07,
"loss": 1.0522,
"step": 357
},
{
"epoch": 0.97,
"learning_rate": 3.134666272774034e-07,
"loss": 1.0221,
"step": 358
},
{
"epoch": 0.98,
"learning_rate": 2.477045108941978e-07,
"loss": 0.93,
"step": 359
},
{
"epoch": 0.98,
"learning_rate": 1.8966712629558957e-07,
"loss": 0.901,
"step": 360
},
{
"epoch": 0.98,
"eval_loss": 1.3202154636383057,
"eval_runtime": 118.9855,
"eval_samples_per_second": 3.706,
"eval_steps_per_second": 1.857,
"step": 360
},
{
"epoch": 0.98,
"learning_rate": 1.393589678466367e-07,
"loss": 1.0046,
"step": 361
},
{
"epoch": 0.99,
"learning_rate": 9.678393136776098e-08,
"loss": 0.9131,
"step": 362
},
{
"epoch": 0.99,
"learning_rate": 6.194531383307833e-08,
"loss": 1.0747,
"step": 363
},
{
"epoch": 0.99,
"learning_rate": 3.484581311511414e-08,
"loss": 0.994,
"step": 364
},
{
"epoch": 0.99,
"learning_rate": 1.5487527775848164e-08,
"loss": 0.8788,
"step": 365
},
{
"epoch": 1.0,
"learning_rate": 3.87195690421116e-09,
"loss": 0.8329,
"step": 366
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 0.963,
"step": 367
}
],
"logging_steps": 1,
"max_steps": 367,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 1.0382469155751199e+18,
"trial_name": null,
"trial_params": null
}