bert-plus-L8-v1.0-synthSTSv3-4k / trainer_state.json
pszemraj's picture
chk 1800
fd0e378 verified
{
"best_metric": 0.9652652915549957,
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-21_10-51/checkpoint-1800",
"epoch": 0.2894472361809045,
"eval_steps": 300,
"global_step": 1800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 3.4826388359069824,
"learning_rate": 1.2861736334405146e-07,
"loss": 0.0905,
"step": 4
},
{
"epoch": 0.0,
"grad_norm": 2.3191146850585938,
"learning_rate": 2.572347266881029e-07,
"loss": 0.1138,
"step": 8
},
{
"epoch": 0.0,
"grad_norm": 2.398422956466675,
"learning_rate": 3.858520900321544e-07,
"loss": 0.1055,
"step": 12
},
{
"epoch": 0.0,
"grad_norm": 3.034379720687866,
"learning_rate": 5.144694533762058e-07,
"loss": 0.1113,
"step": 16
},
{
"epoch": 0.0,
"grad_norm": 5.541696071624756,
"learning_rate": 6.430868167202573e-07,
"loss": 0.0875,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 3.2232377529144287,
"learning_rate": 7.717041800643088e-07,
"loss": 0.1163,
"step": 24
},
{
"epoch": 0.0,
"grad_norm": 1.692831039428711,
"learning_rate": 9.003215434083602e-07,
"loss": 0.0746,
"step": 28
},
{
"epoch": 0.01,
"grad_norm": 0.9550053477287292,
"learning_rate": 1.0289389067524116e-06,
"loss": 0.0697,
"step": 32
},
{
"epoch": 0.01,
"grad_norm": 2.5329389572143555,
"learning_rate": 1.157556270096463e-06,
"loss": 0.0716,
"step": 36
},
{
"epoch": 0.01,
"grad_norm": 2.0605404376983643,
"learning_rate": 1.2861736334405146e-06,
"loss": 0.0478,
"step": 40
},
{
"epoch": 0.01,
"grad_norm": 3.4411487579345703,
"learning_rate": 1.4147909967845661e-06,
"loss": 0.0971,
"step": 44
},
{
"epoch": 0.01,
"grad_norm": 1.9422132968902588,
"learning_rate": 1.5434083601286177e-06,
"loss": 0.0567,
"step": 48
},
{
"epoch": 0.01,
"grad_norm": 2.2038466930389404,
"learning_rate": 1.6720257234726688e-06,
"loss": 0.0477,
"step": 52
},
{
"epoch": 0.01,
"grad_norm": 1.7133512496948242,
"learning_rate": 1.8006430868167204e-06,
"loss": 0.0528,
"step": 56
},
{
"epoch": 0.01,
"grad_norm": 1.754583716392517,
"learning_rate": 1.9292604501607717e-06,
"loss": 0.048,
"step": 60
},
{
"epoch": 0.01,
"grad_norm": 0.8963572382926941,
"learning_rate": 2.0578778135048233e-06,
"loss": 0.0508,
"step": 64
},
{
"epoch": 0.01,
"grad_norm": 1.0214054584503174,
"learning_rate": 2.186495176848875e-06,
"loss": 0.0485,
"step": 68
},
{
"epoch": 0.01,
"grad_norm": 1.9729621410369873,
"learning_rate": 2.315112540192926e-06,
"loss": 0.051,
"step": 72
},
{
"epoch": 0.01,
"grad_norm": 1.5615208148956299,
"learning_rate": 2.4437299035369775e-06,
"loss": 0.0604,
"step": 76
},
{
"epoch": 0.01,
"grad_norm": 1.6368948221206665,
"learning_rate": 2.572347266881029e-06,
"loss": 0.0442,
"step": 80
},
{
"epoch": 0.01,
"grad_norm": 2.133140802383423,
"learning_rate": 2.7009646302250807e-06,
"loss": 0.0302,
"step": 84
},
{
"epoch": 0.01,
"grad_norm": 1.2055103778839111,
"learning_rate": 2.8295819935691322e-06,
"loss": 0.0432,
"step": 88
},
{
"epoch": 0.01,
"grad_norm": 1.5411338806152344,
"learning_rate": 2.9581993569131834e-06,
"loss": 0.0319,
"step": 92
},
{
"epoch": 0.02,
"grad_norm": 0.75223708152771,
"learning_rate": 3.0868167202572353e-06,
"loss": 0.0364,
"step": 96
},
{
"epoch": 0.02,
"grad_norm": 1.4433358907699585,
"learning_rate": 3.2154340836012865e-06,
"loss": 0.0661,
"step": 100
},
{
"epoch": 0.02,
"grad_norm": 1.5212054252624512,
"learning_rate": 3.3440514469453376e-06,
"loss": 0.0609,
"step": 104
},
{
"epoch": 0.02,
"grad_norm": 2.2315409183502197,
"learning_rate": 3.4726688102893896e-06,
"loss": 0.0546,
"step": 108
},
{
"epoch": 0.02,
"grad_norm": 1.073073148727417,
"learning_rate": 3.6012861736334407e-06,
"loss": 0.0686,
"step": 112
},
{
"epoch": 0.02,
"grad_norm": 1.4473097324371338,
"learning_rate": 3.7299035369774923e-06,
"loss": 0.0363,
"step": 116
},
{
"epoch": 0.02,
"grad_norm": 2.4474904537200928,
"learning_rate": 3.8585209003215434e-06,
"loss": 0.0425,
"step": 120
},
{
"epoch": 0.02,
"grad_norm": 2.410780429840088,
"learning_rate": 3.987138263665595e-06,
"loss": 0.057,
"step": 124
},
{
"epoch": 0.02,
"grad_norm": 1.380175232887268,
"learning_rate": 4.1157556270096466e-06,
"loss": 0.045,
"step": 128
},
{
"epoch": 0.02,
"grad_norm": 1.0906972885131836,
"learning_rate": 4.244372990353698e-06,
"loss": 0.0338,
"step": 132
},
{
"epoch": 0.02,
"grad_norm": 1.6216851472854614,
"learning_rate": 4.37299035369775e-06,
"loss": 0.036,
"step": 136
},
{
"epoch": 0.02,
"grad_norm": 0.806192934513092,
"learning_rate": 4.501607717041801e-06,
"loss": 0.037,
"step": 140
},
{
"epoch": 0.02,
"grad_norm": 0.959051251411438,
"learning_rate": 4.630225080385852e-06,
"loss": 0.0473,
"step": 144
},
{
"epoch": 0.02,
"grad_norm": 0.4807804226875305,
"learning_rate": 4.758842443729904e-06,
"loss": 0.022,
"step": 148
},
{
"epoch": 0.02,
"grad_norm": 1.982123613357544,
"learning_rate": 4.887459807073955e-06,
"loss": 0.04,
"step": 152
},
{
"epoch": 0.03,
"grad_norm": 1.4609640836715698,
"learning_rate": 5.016077170418007e-06,
"loss": 0.0266,
"step": 156
},
{
"epoch": 0.03,
"grad_norm": 1.4000838994979858,
"learning_rate": 5.144694533762058e-06,
"loss": 0.0211,
"step": 160
},
{
"epoch": 0.03,
"grad_norm": 0.828529953956604,
"learning_rate": 5.273311897106109e-06,
"loss": 0.0239,
"step": 164
},
{
"epoch": 0.03,
"grad_norm": 0.8723558187484741,
"learning_rate": 5.401929260450161e-06,
"loss": 0.0273,
"step": 168
},
{
"epoch": 0.03,
"grad_norm": 1.4310917854309082,
"learning_rate": 5.530546623794213e-06,
"loss": 0.0383,
"step": 172
},
{
"epoch": 0.03,
"grad_norm": 1.0429316759109497,
"learning_rate": 5.6591639871382644e-06,
"loss": 0.0348,
"step": 176
},
{
"epoch": 0.03,
"grad_norm": 0.7097664475440979,
"learning_rate": 5.787781350482315e-06,
"loss": 0.0199,
"step": 180
},
{
"epoch": 0.03,
"grad_norm": 1.269923448562622,
"learning_rate": 5.916398713826367e-06,
"loss": 0.0274,
"step": 184
},
{
"epoch": 0.03,
"grad_norm": 1.1587966680526733,
"learning_rate": 6.045016077170418e-06,
"loss": 0.0347,
"step": 188
},
{
"epoch": 0.03,
"grad_norm": 1.1199533939361572,
"learning_rate": 6.173633440514471e-06,
"loss": 0.0294,
"step": 192
},
{
"epoch": 0.03,
"grad_norm": 0.9978614449501038,
"learning_rate": 6.302250803858521e-06,
"loss": 0.0237,
"step": 196
},
{
"epoch": 0.03,
"grad_norm": 1.0913878679275513,
"learning_rate": 6.430868167202573e-06,
"loss": 0.039,
"step": 200
},
{
"epoch": 0.03,
"grad_norm": 0.7430211901664734,
"learning_rate": 6.5594855305466245e-06,
"loss": 0.0328,
"step": 204
},
{
"epoch": 0.03,
"grad_norm": 1.597890019416809,
"learning_rate": 6.688102893890675e-06,
"loss": 0.033,
"step": 208
},
{
"epoch": 0.03,
"grad_norm": 0.8224192261695862,
"learning_rate": 6.816720257234727e-06,
"loss": 0.0207,
"step": 212
},
{
"epoch": 0.03,
"grad_norm": 2.1937623023986816,
"learning_rate": 6.945337620578779e-06,
"loss": 0.0341,
"step": 216
},
{
"epoch": 0.04,
"grad_norm": 1.337793231010437,
"learning_rate": 7.073954983922831e-06,
"loss": 0.0359,
"step": 220
},
{
"epoch": 0.04,
"grad_norm": 1.3222864866256714,
"learning_rate": 7.2025723472668815e-06,
"loss": 0.0341,
"step": 224
},
{
"epoch": 0.04,
"grad_norm": 1.6585116386413574,
"learning_rate": 7.331189710610933e-06,
"loss": 0.0319,
"step": 228
},
{
"epoch": 0.04,
"grad_norm": 1.9420249462127686,
"learning_rate": 7.459807073954985e-06,
"loss": 0.0458,
"step": 232
},
{
"epoch": 0.04,
"grad_norm": 2.1558046340942383,
"learning_rate": 7.588424437299035e-06,
"loss": 0.0309,
"step": 236
},
{
"epoch": 0.04,
"grad_norm": 2.778165340423584,
"learning_rate": 7.717041800643087e-06,
"loss": 0.0532,
"step": 240
},
{
"epoch": 0.04,
"grad_norm": 1.3744961023330688,
"learning_rate": 7.84565916398714e-06,
"loss": 0.0306,
"step": 244
},
{
"epoch": 0.04,
"grad_norm": 2.4764065742492676,
"learning_rate": 7.97427652733119e-06,
"loss": 0.0398,
"step": 248
},
{
"epoch": 0.04,
"grad_norm": 2.0220906734466553,
"learning_rate": 8.102893890675242e-06,
"loss": 0.0552,
"step": 252
},
{
"epoch": 0.04,
"grad_norm": 3.4686410427093506,
"learning_rate": 8.231511254019293e-06,
"loss": 0.0599,
"step": 256
},
{
"epoch": 0.04,
"grad_norm": 2.0917580127716064,
"learning_rate": 8.360128617363345e-06,
"loss": 0.036,
"step": 260
},
{
"epoch": 0.04,
"grad_norm": 1.3227124214172363,
"learning_rate": 8.488745980707396e-06,
"loss": 0.0337,
"step": 264
},
{
"epoch": 0.04,
"grad_norm": 1.853574275970459,
"learning_rate": 8.617363344051448e-06,
"loss": 0.0356,
"step": 268
},
{
"epoch": 0.04,
"grad_norm": 1.250444769859314,
"learning_rate": 8.7459807073955e-06,
"loss": 0.0625,
"step": 272
},
{
"epoch": 0.04,
"grad_norm": 1.1477417945861816,
"learning_rate": 8.874598070739551e-06,
"loss": 0.0325,
"step": 276
},
{
"epoch": 0.05,
"grad_norm": 1.7810678482055664,
"learning_rate": 9.003215434083602e-06,
"loss": 0.028,
"step": 280
},
{
"epoch": 0.05,
"grad_norm": 1.5006675720214844,
"learning_rate": 9.131832797427654e-06,
"loss": 0.046,
"step": 284
},
{
"epoch": 0.05,
"grad_norm": 0.8945916295051575,
"learning_rate": 9.260450160771704e-06,
"loss": 0.0318,
"step": 288
},
{
"epoch": 0.05,
"grad_norm": 1.4942963123321533,
"learning_rate": 9.389067524115757e-06,
"loss": 0.0254,
"step": 292
},
{
"epoch": 0.05,
"grad_norm": 0.9653424620628357,
"learning_rate": 9.517684887459809e-06,
"loss": 0.0286,
"step": 296
},
{
"epoch": 0.05,
"grad_norm": 1.740561604499817,
"learning_rate": 9.64630225080386e-06,
"loss": 0.032,
"step": 300
},
{
"epoch": 0.05,
"eval_loss": 0.03372881934046745,
"eval_pearson_cosine": 0.9332293835600463,
"eval_pearson_dot": 0.9301330880973007,
"eval_pearson_euclidean": 0.9260017674080668,
"eval_pearson_manhattan": 0.9271288221546901,
"eval_pearson_max": 0.9332293835600463,
"eval_runtime": 249.5828,
"eval_samples_per_second": 2.003,
"eval_spearman_cosine": 0.947022185388043,
"eval_spearman_dot": 0.9435553217732889,
"eval_spearman_euclidean": 0.940413083620648,
"eval_spearman_manhattan": 0.9413909920251474,
"eval_spearman_max": 0.947022185388043,
"eval_steps_per_second": 2.003,
"step": 300
},
{
"epoch": 0.05,
"grad_norm": 0.6091077923774719,
"learning_rate": 9.77491961414791e-06,
"loss": 0.0197,
"step": 304
},
{
"epoch": 0.05,
"grad_norm": 1.2511804103851318,
"learning_rate": 9.903536977491962e-06,
"loss": 0.0306,
"step": 308
},
{
"epoch": 0.05,
"grad_norm": 1.3721988201141357,
"learning_rate": 1.0032154340836013e-05,
"loss": 0.0187,
"step": 312
},
{
"epoch": 0.05,
"grad_norm": 0.7939477562904358,
"learning_rate": 1.0160771704180067e-05,
"loss": 0.0318,
"step": 316
},
{
"epoch": 0.05,
"grad_norm": 0.6942979097366333,
"learning_rate": 1.0289389067524116e-05,
"loss": 0.0264,
"step": 320
},
{
"epoch": 0.05,
"grad_norm": 1.3466440439224243,
"learning_rate": 1.0418006430868168e-05,
"loss": 0.0316,
"step": 324
},
{
"epoch": 0.05,
"grad_norm": 0.7388295531272888,
"learning_rate": 1.0546623794212218e-05,
"loss": 0.0322,
"step": 328
},
{
"epoch": 0.05,
"grad_norm": 1.094037413597107,
"learning_rate": 1.0675241157556271e-05,
"loss": 0.029,
"step": 332
},
{
"epoch": 0.05,
"grad_norm": 0.9884235262870789,
"learning_rate": 1.0803858520900323e-05,
"loss": 0.0424,
"step": 336
},
{
"epoch": 0.05,
"grad_norm": 1.2799253463745117,
"learning_rate": 1.0932475884244374e-05,
"loss": 0.0221,
"step": 340
},
{
"epoch": 0.06,
"grad_norm": 0.830769419670105,
"learning_rate": 1.1061093247588426e-05,
"loss": 0.0255,
"step": 344
},
{
"epoch": 0.06,
"grad_norm": 1.4065282344818115,
"learning_rate": 1.1189710610932476e-05,
"loss": 0.0257,
"step": 348
},
{
"epoch": 0.06,
"grad_norm": 1.4823110103607178,
"learning_rate": 1.1318327974276529e-05,
"loss": 0.0277,
"step": 352
},
{
"epoch": 0.06,
"grad_norm": 0.5865018367767334,
"learning_rate": 1.144694533762058e-05,
"loss": 0.0272,
"step": 356
},
{
"epoch": 0.06,
"grad_norm": 1.3011387586593628,
"learning_rate": 1.157556270096463e-05,
"loss": 0.0249,
"step": 360
},
{
"epoch": 0.06,
"grad_norm": 1.0222949981689453,
"learning_rate": 1.1704180064308684e-05,
"loss": 0.025,
"step": 364
},
{
"epoch": 0.06,
"grad_norm": 1.650606393814087,
"learning_rate": 1.1832797427652733e-05,
"loss": 0.0427,
"step": 368
},
{
"epoch": 0.06,
"grad_norm": 7.919157981872559,
"learning_rate": 1.1961414790996787e-05,
"loss": 0.0432,
"step": 372
},
{
"epoch": 0.06,
"grad_norm": 1.2726035118103027,
"learning_rate": 1.2090032154340837e-05,
"loss": 0.0244,
"step": 376
},
{
"epoch": 0.06,
"grad_norm": 1.1458909511566162,
"learning_rate": 1.2218649517684888e-05,
"loss": 0.026,
"step": 380
},
{
"epoch": 0.06,
"grad_norm": 1.4866142272949219,
"learning_rate": 1.2347266881028941e-05,
"loss": 0.0315,
"step": 384
},
{
"epoch": 0.06,
"grad_norm": 0.7649275064468384,
"learning_rate": 1.2475884244372991e-05,
"loss": 0.0253,
"step": 388
},
{
"epoch": 0.06,
"grad_norm": 0.5921647548675537,
"learning_rate": 1.2604501607717043e-05,
"loss": 0.0229,
"step": 392
},
{
"epoch": 0.06,
"grad_norm": 0.977344810962677,
"learning_rate": 1.2733118971061094e-05,
"loss": 0.0225,
"step": 396
},
{
"epoch": 0.06,
"grad_norm": 1.8201347589492798,
"learning_rate": 1.2861736334405146e-05,
"loss": 0.0219,
"step": 400
},
{
"epoch": 0.06,
"grad_norm": 0.8397660255432129,
"learning_rate": 1.2990353697749196e-05,
"loss": 0.0281,
"step": 404
},
{
"epoch": 0.07,
"grad_norm": 0.5883716940879822,
"learning_rate": 1.3118971061093249e-05,
"loss": 0.0256,
"step": 408
},
{
"epoch": 0.07,
"grad_norm": 1.352879524230957,
"learning_rate": 1.32475884244373e-05,
"loss": 0.0323,
"step": 412
},
{
"epoch": 0.07,
"grad_norm": 2.8051841259002686,
"learning_rate": 1.337620578778135e-05,
"loss": 0.0389,
"step": 416
},
{
"epoch": 0.07,
"grad_norm": 0.8029029965400696,
"learning_rate": 1.3504823151125404e-05,
"loss": 0.0198,
"step": 420
},
{
"epoch": 0.07,
"grad_norm": 2.1253607273101807,
"learning_rate": 1.3633440514469454e-05,
"loss": 0.0303,
"step": 424
},
{
"epoch": 0.07,
"grad_norm": 1.313376784324646,
"learning_rate": 1.3762057877813507e-05,
"loss": 0.0415,
"step": 428
},
{
"epoch": 0.07,
"grad_norm": 0.5886867046356201,
"learning_rate": 1.3890675241157558e-05,
"loss": 0.028,
"step": 432
},
{
"epoch": 0.07,
"grad_norm": 1.150387167930603,
"learning_rate": 1.4019292604501608e-05,
"loss": 0.0317,
"step": 436
},
{
"epoch": 0.07,
"grad_norm": 0.9141702651977539,
"learning_rate": 1.4147909967845662e-05,
"loss": 0.0375,
"step": 440
},
{
"epoch": 0.07,
"grad_norm": 1.5728639364242554,
"learning_rate": 1.4276527331189711e-05,
"loss": 0.035,
"step": 444
},
{
"epoch": 0.07,
"grad_norm": 0.9280940294265747,
"learning_rate": 1.4405144694533763e-05,
"loss": 0.0254,
"step": 448
},
{
"epoch": 0.07,
"grad_norm": 1.4746482372283936,
"learning_rate": 1.4533762057877815e-05,
"loss": 0.0449,
"step": 452
},
{
"epoch": 0.07,
"grad_norm": 0.9735682606697083,
"learning_rate": 1.4662379421221866e-05,
"loss": 0.0305,
"step": 456
},
{
"epoch": 0.07,
"grad_norm": 1.2252638339996338,
"learning_rate": 1.479099678456592e-05,
"loss": 0.0286,
"step": 460
},
{
"epoch": 0.07,
"grad_norm": 2.308204412460327,
"learning_rate": 1.491961414790997e-05,
"loss": 0.0376,
"step": 464
},
{
"epoch": 0.08,
"grad_norm": 1.1166713237762451,
"learning_rate": 1.504823151125402e-05,
"loss": 0.0309,
"step": 468
},
{
"epoch": 0.08,
"grad_norm": 1.1420267820358276,
"learning_rate": 1.517684887459807e-05,
"loss": 0.0243,
"step": 472
},
{
"epoch": 0.08,
"grad_norm": 2.7457785606384277,
"learning_rate": 1.5305466237942124e-05,
"loss": 0.0358,
"step": 476
},
{
"epoch": 0.08,
"grad_norm": 1.1351250410079956,
"learning_rate": 1.5434083601286174e-05,
"loss": 0.0381,
"step": 480
},
{
"epoch": 0.08,
"grad_norm": 0.669029951095581,
"learning_rate": 1.5562700964630227e-05,
"loss": 0.0233,
"step": 484
},
{
"epoch": 0.08,
"grad_norm": 1.1537421941757202,
"learning_rate": 1.569131832797428e-05,
"loss": 0.0319,
"step": 488
},
{
"epoch": 0.08,
"grad_norm": 1.1718066930770874,
"learning_rate": 1.581993569131833e-05,
"loss": 0.0366,
"step": 492
},
{
"epoch": 0.08,
"grad_norm": 0.6619621515274048,
"learning_rate": 1.594855305466238e-05,
"loss": 0.0433,
"step": 496
},
{
"epoch": 0.08,
"grad_norm": 0.9330528378486633,
"learning_rate": 1.607717041800643e-05,
"loss": 0.0336,
"step": 500
},
{
"epoch": 0.08,
"grad_norm": 1.4244128465652466,
"learning_rate": 1.6205787781350483e-05,
"loss": 0.0279,
"step": 504
},
{
"epoch": 0.08,
"grad_norm": 0.719264566898346,
"learning_rate": 1.6334405144694536e-05,
"loss": 0.026,
"step": 508
},
{
"epoch": 0.08,
"grad_norm": 0.5247528553009033,
"learning_rate": 1.6463022508038586e-05,
"loss": 0.0204,
"step": 512
},
{
"epoch": 0.08,
"grad_norm": 1.9459376335144043,
"learning_rate": 1.659163987138264e-05,
"loss": 0.0457,
"step": 516
},
{
"epoch": 0.08,
"grad_norm": 0.6106892824172974,
"learning_rate": 1.672025723472669e-05,
"loss": 0.024,
"step": 520
},
{
"epoch": 0.08,
"grad_norm": 1.9702305793762207,
"learning_rate": 1.6848874598070743e-05,
"loss": 0.0424,
"step": 524
},
{
"epoch": 0.08,
"grad_norm": 0.8634403347969055,
"learning_rate": 1.6977491961414792e-05,
"loss": 0.0377,
"step": 528
},
{
"epoch": 0.09,
"grad_norm": 1.0177710056304932,
"learning_rate": 1.7106109324758842e-05,
"loss": 0.0299,
"step": 532
},
{
"epoch": 0.09,
"grad_norm": 0.7187432646751404,
"learning_rate": 1.7234726688102896e-05,
"loss": 0.03,
"step": 536
},
{
"epoch": 0.09,
"grad_norm": 1.3794456720352173,
"learning_rate": 1.7363344051446945e-05,
"loss": 0.0461,
"step": 540
},
{
"epoch": 0.09,
"grad_norm": 2.011230707168579,
"learning_rate": 1.7491961414791e-05,
"loss": 0.0423,
"step": 544
},
{
"epoch": 0.09,
"grad_norm": 0.9847524166107178,
"learning_rate": 1.762057877813505e-05,
"loss": 0.0379,
"step": 548
},
{
"epoch": 0.09,
"grad_norm": 1.4435635805130005,
"learning_rate": 1.7749196141479102e-05,
"loss": 0.0407,
"step": 552
},
{
"epoch": 0.09,
"grad_norm": 0.6206502318382263,
"learning_rate": 1.7877813504823152e-05,
"loss": 0.0293,
"step": 556
},
{
"epoch": 0.09,
"grad_norm": 0.6152936816215515,
"learning_rate": 1.8006430868167205e-05,
"loss": 0.0239,
"step": 560
},
{
"epoch": 0.09,
"grad_norm": 0.9495165944099426,
"learning_rate": 1.8135048231511255e-05,
"loss": 0.0279,
"step": 564
},
{
"epoch": 0.09,
"grad_norm": 1.1117522716522217,
"learning_rate": 1.8263665594855308e-05,
"loss": 0.0282,
"step": 568
},
{
"epoch": 0.09,
"grad_norm": 0.9599608182907104,
"learning_rate": 1.8392282958199358e-05,
"loss": 0.0241,
"step": 572
},
{
"epoch": 0.09,
"grad_norm": 0.665824294090271,
"learning_rate": 1.8520900321543408e-05,
"loss": 0.032,
"step": 576
},
{
"epoch": 0.09,
"grad_norm": 0.9927297830581665,
"learning_rate": 1.864951768488746e-05,
"loss": 0.0386,
"step": 580
},
{
"epoch": 0.09,
"grad_norm": 0.8864579200744629,
"learning_rate": 1.8778135048231514e-05,
"loss": 0.0424,
"step": 584
},
{
"epoch": 0.09,
"grad_norm": 0.8934502601623535,
"learning_rate": 1.8906752411575564e-05,
"loss": 0.0303,
"step": 588
},
{
"epoch": 0.1,
"grad_norm": 0.7153275609016418,
"learning_rate": 1.9035369774919617e-05,
"loss": 0.0351,
"step": 592
},
{
"epoch": 0.1,
"grad_norm": 0.8853140473365784,
"learning_rate": 1.9163987138263667e-05,
"loss": 0.0251,
"step": 596
},
{
"epoch": 0.1,
"grad_norm": 1.535198450088501,
"learning_rate": 1.929260450160772e-05,
"loss": 0.039,
"step": 600
},
{
"epoch": 0.1,
"eval_loss": 0.03215770050883293,
"eval_pearson_cosine": 0.9396208600788933,
"eval_pearson_dot": 0.9361396977412548,
"eval_pearson_euclidean": 0.9366702713808518,
"eval_pearson_manhattan": 0.935916312626658,
"eval_pearson_max": 0.9396208600788933,
"eval_runtime": 249.4161,
"eval_samples_per_second": 2.005,
"eval_spearman_cosine": 0.9531478289779426,
"eval_spearman_dot": 0.9484099676386145,
"eval_spearman_euclidean": 0.9506823938736226,
"eval_spearman_manhattan": 0.9494599723678361,
"eval_spearman_max": 0.9531478289779426,
"eval_steps_per_second": 2.005,
"step": 600
},
{
"epoch": 0.1,
"grad_norm": 0.6895681023597717,
"learning_rate": 1.942122186495177e-05,
"loss": 0.0243,
"step": 604
},
{
"epoch": 0.1,
"grad_norm": 1.0323344469070435,
"learning_rate": 1.954983922829582e-05,
"loss": 0.0366,
"step": 608
},
{
"epoch": 0.1,
"grad_norm": 0.6633996367454529,
"learning_rate": 1.9678456591639874e-05,
"loss": 0.0472,
"step": 612
},
{
"epoch": 0.1,
"grad_norm": 1.3444079160690308,
"learning_rate": 1.9807073954983923e-05,
"loss": 0.0323,
"step": 616
},
{
"epoch": 0.1,
"grad_norm": 0.9151845574378967,
"learning_rate": 1.9935691318327977e-05,
"loss": 0.0263,
"step": 620
},
{
"epoch": 0.1,
"grad_norm": 1.1424955129623413,
"learning_rate": 1.9992852037169407e-05,
"loss": 0.0333,
"step": 624
},
{
"epoch": 0.1,
"grad_norm": 1.0085678100585938,
"learning_rate": 1.9978556111508223e-05,
"loss": 0.0375,
"step": 628
},
{
"epoch": 0.1,
"grad_norm": 1.071134328842163,
"learning_rate": 1.9964260185847037e-05,
"loss": 0.0299,
"step": 632
},
{
"epoch": 0.1,
"grad_norm": 1.2681633234024048,
"learning_rate": 1.9949964260185847e-05,
"loss": 0.0317,
"step": 636
},
{
"epoch": 0.1,
"grad_norm": 0.9661902189254761,
"learning_rate": 1.9935668334524663e-05,
"loss": 0.0358,
"step": 640
},
{
"epoch": 0.1,
"grad_norm": 1.77792489528656,
"learning_rate": 1.9921372408863477e-05,
"loss": 0.0318,
"step": 644
},
{
"epoch": 0.1,
"grad_norm": 0.4316706955432892,
"learning_rate": 1.990707648320229e-05,
"loss": 0.0288,
"step": 648
},
{
"epoch": 0.1,
"grad_norm": 0.73268061876297,
"learning_rate": 1.9892780557541103e-05,
"loss": 0.0373,
"step": 652
},
{
"epoch": 0.11,
"grad_norm": 1.6720423698425293,
"learning_rate": 1.9878484631879917e-05,
"loss": 0.0478,
"step": 656
},
{
"epoch": 0.11,
"grad_norm": 0.5472896695137024,
"learning_rate": 1.986418870621873e-05,
"loss": 0.0169,
"step": 660
},
{
"epoch": 0.11,
"grad_norm": 1.016210675239563,
"learning_rate": 1.9849892780557543e-05,
"loss": 0.0279,
"step": 664
},
{
"epoch": 0.11,
"grad_norm": 0.6803948879241943,
"learning_rate": 1.9835596854896357e-05,
"loss": 0.0323,
"step": 668
},
{
"epoch": 0.11,
"grad_norm": 0.570452868938446,
"learning_rate": 1.982130092923517e-05,
"loss": 0.0189,
"step": 672
},
{
"epoch": 0.11,
"grad_norm": 1.1386935710906982,
"learning_rate": 1.9807005003573983e-05,
"loss": 0.0253,
"step": 676
},
{
"epoch": 0.11,
"grad_norm": 1.8023974895477295,
"learning_rate": 1.9792709077912797e-05,
"loss": 0.0251,
"step": 680
},
{
"epoch": 0.11,
"grad_norm": 1.1469191312789917,
"learning_rate": 1.977841315225161e-05,
"loss": 0.0247,
"step": 684
},
{
"epoch": 0.11,
"grad_norm": 1.133340835571289,
"learning_rate": 1.9764117226590423e-05,
"loss": 0.0363,
"step": 688
},
{
"epoch": 0.11,
"grad_norm": 1.3125261068344116,
"learning_rate": 1.9749821300929237e-05,
"loss": 0.022,
"step": 692
},
{
"epoch": 0.11,
"grad_norm": 0.689136266708374,
"learning_rate": 1.973552537526805e-05,
"loss": 0.0332,
"step": 696
},
{
"epoch": 0.11,
"grad_norm": 0.9812389016151428,
"learning_rate": 1.9721229449606863e-05,
"loss": 0.0309,
"step": 700
},
{
"epoch": 0.11,
"grad_norm": 1.1285403966903687,
"learning_rate": 1.9706933523945677e-05,
"loss": 0.0295,
"step": 704
},
{
"epoch": 0.11,
"grad_norm": 0.7089455723762512,
"learning_rate": 1.969263759828449e-05,
"loss": 0.0253,
"step": 708
},
{
"epoch": 0.11,
"grad_norm": 0.8068435788154602,
"learning_rate": 1.9678341672623303e-05,
"loss": 0.0239,
"step": 712
},
{
"epoch": 0.12,
"grad_norm": 1.2158197164535522,
"learning_rate": 1.9664045746962117e-05,
"loss": 0.0348,
"step": 716
},
{
"epoch": 0.12,
"grad_norm": 1.1878087520599365,
"learning_rate": 1.9649749821300933e-05,
"loss": 0.0352,
"step": 720
},
{
"epoch": 0.12,
"grad_norm": 1.019784927368164,
"learning_rate": 1.9635453895639743e-05,
"loss": 0.0468,
"step": 724
},
{
"epoch": 0.12,
"grad_norm": 1.391402244567871,
"learning_rate": 1.9621157969978557e-05,
"loss": 0.0243,
"step": 728
},
{
"epoch": 0.12,
"grad_norm": 1.5212937593460083,
"learning_rate": 1.9606862044317373e-05,
"loss": 0.0454,
"step": 732
},
{
"epoch": 0.12,
"grad_norm": 0.7201647758483887,
"learning_rate": 1.9592566118656183e-05,
"loss": 0.0436,
"step": 736
},
{
"epoch": 0.12,
"grad_norm": 1.0111092329025269,
"learning_rate": 1.9578270192994997e-05,
"loss": 0.0245,
"step": 740
},
{
"epoch": 0.12,
"grad_norm": 0.8695981502532959,
"learning_rate": 1.9563974267333813e-05,
"loss": 0.0369,
"step": 744
},
{
"epoch": 0.12,
"grad_norm": 0.9623269438743591,
"learning_rate": 1.9549678341672623e-05,
"loss": 0.0222,
"step": 748
},
{
"epoch": 0.12,
"grad_norm": 0.6350664496421814,
"learning_rate": 1.953538241601144e-05,
"loss": 0.0211,
"step": 752
},
{
"epoch": 0.12,
"grad_norm": 0.9464645981788635,
"learning_rate": 1.9521086490350253e-05,
"loss": 0.0416,
"step": 756
},
{
"epoch": 0.12,
"grad_norm": 0.5733113288879395,
"learning_rate": 1.9506790564689063e-05,
"loss": 0.0217,
"step": 760
},
{
"epoch": 0.12,
"grad_norm": 0.8460751175880432,
"learning_rate": 1.949249463902788e-05,
"loss": 0.0318,
"step": 764
},
{
"epoch": 0.12,
"grad_norm": 1.7062273025512695,
"learning_rate": 1.9478198713366693e-05,
"loss": 0.0428,
"step": 768
},
{
"epoch": 0.12,
"grad_norm": 0.8697042465209961,
"learning_rate": 1.9463902787705507e-05,
"loss": 0.0414,
"step": 772
},
{
"epoch": 0.12,
"grad_norm": 1.0291727781295776,
"learning_rate": 1.944960686204432e-05,
"loss": 0.0288,
"step": 776
},
{
"epoch": 0.13,
"grad_norm": 0.8506454229354858,
"learning_rate": 1.9435310936383133e-05,
"loss": 0.047,
"step": 780
},
{
"epoch": 0.13,
"grad_norm": 0.9701406955718994,
"learning_rate": 1.9421015010721947e-05,
"loss": 0.0276,
"step": 784
},
{
"epoch": 0.13,
"grad_norm": 2.1087899208068848,
"learning_rate": 1.940671908506076e-05,
"loss": 0.0448,
"step": 788
},
{
"epoch": 0.13,
"grad_norm": 0.8718247413635254,
"learning_rate": 1.9392423159399573e-05,
"loss": 0.0241,
"step": 792
},
{
"epoch": 0.13,
"grad_norm": 1.3904443979263306,
"learning_rate": 1.9378127233738387e-05,
"loss": 0.0451,
"step": 796
},
{
"epoch": 0.13,
"grad_norm": 0.7410137057304382,
"learning_rate": 1.93638313080772e-05,
"loss": 0.0244,
"step": 800
},
{
"epoch": 0.13,
"grad_norm": 0.9150621294975281,
"learning_rate": 1.9349535382416013e-05,
"loss": 0.0272,
"step": 804
},
{
"epoch": 0.13,
"grad_norm": 0.7707653045654297,
"learning_rate": 1.9335239456754827e-05,
"loss": 0.036,
"step": 808
},
{
"epoch": 0.13,
"grad_norm": 0.9467148184776306,
"learning_rate": 1.932094353109364e-05,
"loss": 0.0363,
"step": 812
},
{
"epoch": 0.13,
"grad_norm": 1.2180854082107544,
"learning_rate": 1.9306647605432453e-05,
"loss": 0.039,
"step": 816
},
{
"epoch": 0.13,
"grad_norm": 0.9589481949806213,
"learning_rate": 1.9292351679771267e-05,
"loss": 0.0346,
"step": 820
},
{
"epoch": 0.13,
"grad_norm": 1.086827039718628,
"learning_rate": 1.927805575411008e-05,
"loss": 0.0352,
"step": 824
},
{
"epoch": 0.13,
"grad_norm": 1.0733484029769897,
"learning_rate": 1.9263759828448893e-05,
"loss": 0.0308,
"step": 828
},
{
"epoch": 0.13,
"grad_norm": 0.7703049182891846,
"learning_rate": 1.9249463902787707e-05,
"loss": 0.0271,
"step": 832
},
{
"epoch": 0.13,
"grad_norm": 1.1060117483139038,
"learning_rate": 1.923516797712652e-05,
"loss": 0.0281,
"step": 836
},
{
"epoch": 0.14,
"grad_norm": 0.7313349843025208,
"learning_rate": 1.9220872051465333e-05,
"loss": 0.0245,
"step": 840
},
{
"epoch": 0.14,
"grad_norm": 0.7229084372520447,
"learning_rate": 1.920657612580415e-05,
"loss": 0.0249,
"step": 844
},
{
"epoch": 0.14,
"grad_norm": 1.4665247201919556,
"learning_rate": 1.919228020014296e-05,
"loss": 0.0412,
"step": 848
},
{
"epoch": 0.14,
"grad_norm": 0.8117924928665161,
"learning_rate": 1.9177984274481773e-05,
"loss": 0.0391,
"step": 852
},
{
"epoch": 0.14,
"grad_norm": 0.9442553520202637,
"learning_rate": 1.916368834882059e-05,
"loss": 0.0328,
"step": 856
},
{
"epoch": 0.14,
"grad_norm": 0.7885982394218445,
"learning_rate": 1.91493924231594e-05,
"loss": 0.0182,
"step": 860
},
{
"epoch": 0.14,
"grad_norm": 1.2472141981124878,
"learning_rate": 1.9135096497498217e-05,
"loss": 0.0342,
"step": 864
},
{
"epoch": 0.14,
"grad_norm": 0.8535823225975037,
"learning_rate": 1.9120800571837027e-05,
"loss": 0.0223,
"step": 868
},
{
"epoch": 0.14,
"grad_norm": 1.84065842628479,
"learning_rate": 1.910650464617584e-05,
"loss": 0.0359,
"step": 872
},
{
"epoch": 0.14,
"grad_norm": 1.0987244844436646,
"learning_rate": 1.9092208720514657e-05,
"loss": 0.0343,
"step": 876
},
{
"epoch": 0.14,
"grad_norm": 0.7632778286933899,
"learning_rate": 1.9077912794853467e-05,
"loss": 0.0323,
"step": 880
},
{
"epoch": 0.14,
"grad_norm": 1.1485552787780762,
"learning_rate": 1.906361686919228e-05,
"loss": 0.0337,
"step": 884
},
{
"epoch": 0.14,
"grad_norm": 2.6315150260925293,
"learning_rate": 1.9049320943531097e-05,
"loss": 0.0515,
"step": 888
},
{
"epoch": 0.14,
"grad_norm": 1.0435465574264526,
"learning_rate": 1.9035025017869907e-05,
"loss": 0.0458,
"step": 892
},
{
"epoch": 0.14,
"grad_norm": 1.001620888710022,
"learning_rate": 1.9020729092208723e-05,
"loss": 0.0357,
"step": 896
},
{
"epoch": 0.14,
"grad_norm": 0.8570252060890198,
"learning_rate": 1.9006433166547537e-05,
"loss": 0.037,
"step": 900
},
{
"epoch": 0.14,
"eval_loss": 0.04232440143823624,
"eval_pearson_cosine": 0.9451831743059702,
"eval_pearson_dot": 0.9441977144080326,
"eval_pearson_euclidean": 0.9401266241811703,
"eval_pearson_manhattan": 0.940026238812949,
"eval_pearson_max": 0.9451831743059702,
"eval_runtime": 249.3889,
"eval_samples_per_second": 2.005,
"eval_spearman_cosine": 0.9564561958788276,
"eval_spearman_dot": 0.9527831233353083,
"eval_spearman_euclidean": 0.9533796700221547,
"eval_spearman_manhattan": 0.9531501329883199,
"eval_spearman_max": 0.9564561958788276,
"eval_steps_per_second": 2.005,
"step": 900
},
{
"epoch": 0.15,
"grad_norm": 1.118642807006836,
"learning_rate": 1.8992137240886347e-05,
"loss": 0.0424,
"step": 904
},
{
"epoch": 0.15,
"grad_norm": 1.344506025314331,
"learning_rate": 1.8977841315225163e-05,
"loss": 0.03,
"step": 908
},
{
"epoch": 0.15,
"grad_norm": 0.7447184920310974,
"learning_rate": 1.8963545389563977e-05,
"loss": 0.0308,
"step": 912
},
{
"epoch": 0.15,
"grad_norm": 0.6138767004013062,
"learning_rate": 1.894924946390279e-05,
"loss": 0.0314,
"step": 916
},
{
"epoch": 0.15,
"grad_norm": 0.7356119751930237,
"learning_rate": 1.8934953538241603e-05,
"loss": 0.0283,
"step": 920
},
{
"epoch": 0.15,
"grad_norm": 1.263514518737793,
"learning_rate": 1.8920657612580417e-05,
"loss": 0.0293,
"step": 924
},
{
"epoch": 0.15,
"grad_norm": 0.6265131235122681,
"learning_rate": 1.890636168691923e-05,
"loss": 0.0183,
"step": 928
},
{
"epoch": 0.15,
"grad_norm": 1.7762928009033203,
"learning_rate": 1.8892065761258043e-05,
"loss": 0.0449,
"step": 932
},
{
"epoch": 0.15,
"grad_norm": 1.0329370498657227,
"learning_rate": 1.8877769835596857e-05,
"loss": 0.0397,
"step": 936
},
{
"epoch": 0.15,
"grad_norm": 1.2647181749343872,
"learning_rate": 1.886347390993567e-05,
"loss": 0.0329,
"step": 940
},
{
"epoch": 0.15,
"grad_norm": 0.7577453851699829,
"learning_rate": 1.8849177984274483e-05,
"loss": 0.043,
"step": 944
},
{
"epoch": 0.15,
"grad_norm": 1.1184148788452148,
"learning_rate": 1.8834882058613297e-05,
"loss": 0.0419,
"step": 948
},
{
"epoch": 0.15,
"grad_norm": 0.7198161482810974,
"learning_rate": 1.882058613295211e-05,
"loss": 0.0292,
"step": 952
},
{
"epoch": 0.15,
"grad_norm": 1.0045865774154663,
"learning_rate": 1.8806290207290923e-05,
"loss": 0.0333,
"step": 956
},
{
"epoch": 0.15,
"grad_norm": 0.5983513593673706,
"learning_rate": 1.8791994281629737e-05,
"loss": 0.022,
"step": 960
},
{
"epoch": 0.16,
"grad_norm": 1.0982924699783325,
"learning_rate": 1.877769835596855e-05,
"loss": 0.0264,
"step": 964
},
{
"epoch": 0.16,
"grad_norm": 0.6120471358299255,
"learning_rate": 1.8763402430307363e-05,
"loss": 0.0308,
"step": 968
},
{
"epoch": 0.16,
"grad_norm": 1.5346020460128784,
"learning_rate": 1.8749106504646177e-05,
"loss": 0.0355,
"step": 972
},
{
"epoch": 0.16,
"grad_norm": 0.42815306782722473,
"learning_rate": 1.873481057898499e-05,
"loss": 0.0236,
"step": 976
},
{
"epoch": 0.16,
"grad_norm": 0.7016882300376892,
"learning_rate": 1.8720514653323803e-05,
"loss": 0.0278,
"step": 980
},
{
"epoch": 0.16,
"grad_norm": 0.6955506801605225,
"learning_rate": 1.8706218727662617e-05,
"loss": 0.0208,
"step": 984
},
{
"epoch": 0.16,
"grad_norm": 0.7579104900360107,
"learning_rate": 1.8691922802001433e-05,
"loss": 0.0303,
"step": 988
},
{
"epoch": 0.16,
"grad_norm": 0.8700461983680725,
"learning_rate": 1.8677626876340243e-05,
"loss": 0.0212,
"step": 992
},
{
"epoch": 0.16,
"grad_norm": 1.532637357711792,
"learning_rate": 1.8663330950679057e-05,
"loss": 0.0499,
"step": 996
},
{
"epoch": 0.16,
"grad_norm": 1.3528228998184204,
"learning_rate": 1.8649035025017873e-05,
"loss": 0.0349,
"step": 1000
},
{
"epoch": 0.16,
"grad_norm": 0.7273157238960266,
"learning_rate": 1.8634739099356683e-05,
"loss": 0.0202,
"step": 1004
},
{
"epoch": 0.16,
"grad_norm": 0.952356219291687,
"learning_rate": 1.86204431736955e-05,
"loss": 0.0315,
"step": 1008
},
{
"epoch": 0.16,
"grad_norm": 0.6654635071754456,
"learning_rate": 1.8606147248034313e-05,
"loss": 0.0248,
"step": 1012
},
{
"epoch": 0.16,
"grad_norm": 0.578262984752655,
"learning_rate": 1.8591851322373123e-05,
"loss": 0.037,
"step": 1016
},
{
"epoch": 0.16,
"grad_norm": 0.6199663877487183,
"learning_rate": 1.857755539671194e-05,
"loss": 0.0245,
"step": 1020
},
{
"epoch": 0.16,
"grad_norm": 1.0458414554595947,
"learning_rate": 1.8563259471050753e-05,
"loss": 0.0264,
"step": 1024
},
{
"epoch": 0.17,
"grad_norm": 0.5218886733055115,
"learning_rate": 1.8548963545389563e-05,
"loss": 0.0254,
"step": 1028
},
{
"epoch": 0.17,
"grad_norm": 0.4931807518005371,
"learning_rate": 1.853466761972838e-05,
"loss": 0.0163,
"step": 1032
},
{
"epoch": 0.17,
"grad_norm": 0.7079238891601562,
"learning_rate": 1.8520371694067193e-05,
"loss": 0.027,
"step": 1036
},
{
"epoch": 0.17,
"grad_norm": 0.6107800006866455,
"learning_rate": 1.8506075768406007e-05,
"loss": 0.0353,
"step": 1040
},
{
"epoch": 0.17,
"grad_norm": 1.5169730186462402,
"learning_rate": 1.849177984274482e-05,
"loss": 0.0353,
"step": 1044
},
{
"epoch": 0.17,
"grad_norm": 0.6394426822662354,
"learning_rate": 1.8477483917083633e-05,
"loss": 0.0175,
"step": 1048
},
{
"epoch": 0.17,
"grad_norm": 0.5369437336921692,
"learning_rate": 1.8463187991422447e-05,
"loss": 0.0456,
"step": 1052
},
{
"epoch": 0.17,
"grad_norm": 2.259932041168213,
"learning_rate": 1.844889206576126e-05,
"loss": 0.0573,
"step": 1056
},
{
"epoch": 0.17,
"grad_norm": 0.5754424333572388,
"learning_rate": 1.8434596140100073e-05,
"loss": 0.0208,
"step": 1060
},
{
"epoch": 0.17,
"grad_norm": 0.48612886667251587,
"learning_rate": 1.8420300214438887e-05,
"loss": 0.0181,
"step": 1064
},
{
"epoch": 0.17,
"grad_norm": 0.8483503460884094,
"learning_rate": 1.84060042887777e-05,
"loss": 0.0212,
"step": 1068
},
{
"epoch": 0.17,
"grad_norm": 0.9669589996337891,
"learning_rate": 1.8391708363116513e-05,
"loss": 0.0355,
"step": 1072
},
{
"epoch": 0.17,
"grad_norm": 0.6289377212524414,
"learning_rate": 1.8377412437455326e-05,
"loss": 0.0288,
"step": 1076
},
{
"epoch": 0.17,
"grad_norm": 1.055770993232727,
"learning_rate": 1.836311651179414e-05,
"loss": 0.0321,
"step": 1080
},
{
"epoch": 0.17,
"grad_norm": 0.5941286683082581,
"learning_rate": 1.8348820586132953e-05,
"loss": 0.0215,
"step": 1084
},
{
"epoch": 0.17,
"grad_norm": 0.7000011801719666,
"learning_rate": 1.8334524660471766e-05,
"loss": 0.0295,
"step": 1088
},
{
"epoch": 0.18,
"grad_norm": 0.5845941305160522,
"learning_rate": 1.832022873481058e-05,
"loss": 0.0257,
"step": 1092
},
{
"epoch": 0.18,
"grad_norm": 1.1016316413879395,
"learning_rate": 1.8305932809149393e-05,
"loss": 0.0276,
"step": 1096
},
{
"epoch": 0.18,
"grad_norm": 0.8285301327705383,
"learning_rate": 1.8291636883488206e-05,
"loss": 0.0241,
"step": 1100
},
{
"epoch": 0.18,
"grad_norm": 0.8674764037132263,
"learning_rate": 1.827734095782702e-05,
"loss": 0.0274,
"step": 1104
},
{
"epoch": 0.18,
"grad_norm": 0.654329240322113,
"learning_rate": 1.8263045032165833e-05,
"loss": 0.0213,
"step": 1108
},
{
"epoch": 0.18,
"grad_norm": 0.6802071928977966,
"learning_rate": 1.824874910650465e-05,
"loss": 0.0321,
"step": 1112
},
{
"epoch": 0.18,
"grad_norm": 0.7723608016967773,
"learning_rate": 1.823445318084346e-05,
"loss": 0.0289,
"step": 1116
},
{
"epoch": 0.18,
"grad_norm": 0.9586684703826904,
"learning_rate": 1.8220157255182273e-05,
"loss": 0.0264,
"step": 1120
},
{
"epoch": 0.18,
"grad_norm": 0.758579432964325,
"learning_rate": 1.820586132952109e-05,
"loss": 0.0278,
"step": 1124
},
{
"epoch": 0.18,
"grad_norm": 1.0153886079788208,
"learning_rate": 1.81915654038599e-05,
"loss": 0.0305,
"step": 1128
},
{
"epoch": 0.18,
"grad_norm": 1.1771838665008545,
"learning_rate": 1.8177269478198717e-05,
"loss": 0.0512,
"step": 1132
},
{
"epoch": 0.18,
"grad_norm": 1.2490832805633545,
"learning_rate": 1.816297355253753e-05,
"loss": 0.0295,
"step": 1136
},
{
"epoch": 0.18,
"grad_norm": 0.5716216564178467,
"learning_rate": 1.814867762687634e-05,
"loss": 0.0246,
"step": 1140
},
{
"epoch": 0.18,
"grad_norm": 0.7177873253822327,
"learning_rate": 1.8134381701215157e-05,
"loss": 0.0293,
"step": 1144
},
{
"epoch": 0.18,
"grad_norm": 0.5648506879806519,
"learning_rate": 1.812008577555397e-05,
"loss": 0.0225,
"step": 1148
},
{
"epoch": 0.19,
"grad_norm": 0.721409261226654,
"learning_rate": 1.8105789849892783e-05,
"loss": 0.0242,
"step": 1152
},
{
"epoch": 0.19,
"grad_norm": 0.6926946640014648,
"learning_rate": 1.8091493924231596e-05,
"loss": 0.0276,
"step": 1156
},
{
"epoch": 0.19,
"grad_norm": 0.6999531388282776,
"learning_rate": 1.807719799857041e-05,
"loss": 0.0187,
"step": 1160
},
{
"epoch": 0.19,
"grad_norm": 0.7933658957481384,
"learning_rate": 1.8062902072909223e-05,
"loss": 0.0244,
"step": 1164
},
{
"epoch": 0.19,
"grad_norm": 0.4972588121891022,
"learning_rate": 1.8048606147248036e-05,
"loss": 0.0418,
"step": 1168
},
{
"epoch": 0.19,
"grad_norm": 1.0972784757614136,
"learning_rate": 1.803431022158685e-05,
"loss": 0.0319,
"step": 1172
},
{
"epoch": 0.19,
"grad_norm": 1.2647324800491333,
"learning_rate": 1.8020014295925663e-05,
"loss": 0.0325,
"step": 1176
},
{
"epoch": 0.19,
"grad_norm": 0.8409667015075684,
"learning_rate": 1.8005718370264476e-05,
"loss": 0.0236,
"step": 1180
},
{
"epoch": 0.19,
"grad_norm": 0.6574044227600098,
"learning_rate": 1.799142244460329e-05,
"loss": 0.032,
"step": 1184
},
{
"epoch": 0.19,
"grad_norm": 0.5404456257820129,
"learning_rate": 1.7977126518942103e-05,
"loss": 0.0313,
"step": 1188
},
{
"epoch": 0.19,
"grad_norm": 0.7715393900871277,
"learning_rate": 1.7962830593280916e-05,
"loss": 0.0252,
"step": 1192
},
{
"epoch": 0.19,
"grad_norm": 1.3372646570205688,
"learning_rate": 1.794853466761973e-05,
"loss": 0.0268,
"step": 1196
},
{
"epoch": 0.19,
"grad_norm": 0.5368986129760742,
"learning_rate": 1.7934238741958543e-05,
"loss": 0.0234,
"step": 1200
},
{
"epoch": 0.19,
"eval_loss": 0.026826824992895126,
"eval_pearson_cosine": 0.9529420998918104,
"eval_pearson_dot": 0.951139070288119,
"eval_pearson_euclidean": 0.945750278447336,
"eval_pearson_manhattan": 0.945116677167893,
"eval_pearson_max": 0.9529420998918104,
"eval_runtime": 249.7054,
"eval_samples_per_second": 2.002,
"eval_spearman_cosine": 0.9627865603907902,
"eval_spearman_dot": 0.9601426124824484,
"eval_spearman_euclidean": 0.9601416524781244,
"eval_spearman_manhattan": 0.9589273910090907,
"eval_spearman_max": 0.9627865603907902,
"eval_steps_per_second": 2.002,
"step": 1200
},
{
"epoch": 0.19,
"grad_norm": 0.749082624912262,
"learning_rate": 1.7919942816297356e-05,
"loss": 0.0222,
"step": 1204
},
{
"epoch": 0.19,
"grad_norm": 0.8751634955406189,
"learning_rate": 1.790564689063617e-05,
"loss": 0.0267,
"step": 1208
},
{
"epoch": 0.19,
"grad_norm": 0.622231125831604,
"learning_rate": 1.7891350964974983e-05,
"loss": 0.02,
"step": 1212
},
{
"epoch": 0.2,
"grad_norm": 1.1356748342514038,
"learning_rate": 1.7877055039313796e-05,
"loss": 0.0286,
"step": 1216
},
{
"epoch": 0.2,
"grad_norm": 0.6375044584274292,
"learning_rate": 1.786275911365261e-05,
"loss": 0.024,
"step": 1220
},
{
"epoch": 0.2,
"grad_norm": 0.9166258573532104,
"learning_rate": 1.7848463187991427e-05,
"loss": 0.0285,
"step": 1224
},
{
"epoch": 0.2,
"grad_norm": 0.7663798332214355,
"learning_rate": 1.7834167262330236e-05,
"loss": 0.0188,
"step": 1228
},
{
"epoch": 0.2,
"grad_norm": 1.0997885465621948,
"learning_rate": 1.781987133666905e-05,
"loss": 0.0345,
"step": 1232
},
{
"epoch": 0.2,
"grad_norm": 0.8155802488327026,
"learning_rate": 1.7805575411007866e-05,
"loss": 0.0291,
"step": 1236
},
{
"epoch": 0.2,
"grad_norm": 0.5071162581443787,
"learning_rate": 1.7791279485346676e-05,
"loss": 0.0225,
"step": 1240
},
{
"epoch": 0.2,
"grad_norm": 0.4950205981731415,
"learning_rate": 1.777698355968549e-05,
"loss": 0.0255,
"step": 1244
},
{
"epoch": 0.2,
"grad_norm": 0.6503371000289917,
"learning_rate": 1.7762687634024306e-05,
"loss": 0.0353,
"step": 1248
},
{
"epoch": 0.2,
"grad_norm": 0.4491406977176666,
"learning_rate": 1.7748391708363116e-05,
"loss": 0.0287,
"step": 1252
},
{
"epoch": 0.2,
"grad_norm": 0.5856379270553589,
"learning_rate": 1.7734095782701933e-05,
"loss": 0.029,
"step": 1256
},
{
"epoch": 0.2,
"grad_norm": 0.9508484601974487,
"learning_rate": 1.7719799857040746e-05,
"loss": 0.02,
"step": 1260
},
{
"epoch": 0.2,
"grad_norm": 1.2820254564285278,
"learning_rate": 1.7705503931379556e-05,
"loss": 0.0323,
"step": 1264
},
{
"epoch": 0.2,
"grad_norm": 0.885857105255127,
"learning_rate": 1.7691208005718373e-05,
"loss": 0.0241,
"step": 1268
},
{
"epoch": 0.2,
"grad_norm": 0.5690828561782837,
"learning_rate": 1.7676912080057183e-05,
"loss": 0.0205,
"step": 1272
},
{
"epoch": 0.21,
"grad_norm": 0.41895803809165955,
"learning_rate": 1.7662616154396e-05,
"loss": 0.0188,
"step": 1276
},
{
"epoch": 0.21,
"grad_norm": 0.8826864361763,
"learning_rate": 1.7648320228734813e-05,
"loss": 0.026,
"step": 1280
},
{
"epoch": 0.21,
"grad_norm": 0.9115862250328064,
"learning_rate": 1.7634024303073623e-05,
"loss": 0.0328,
"step": 1284
},
{
"epoch": 0.21,
"grad_norm": 1.5499801635742188,
"learning_rate": 1.761972837741244e-05,
"loss": 0.0385,
"step": 1288
},
{
"epoch": 0.21,
"grad_norm": 1.07961106300354,
"learning_rate": 1.7605432451751253e-05,
"loss": 0.0276,
"step": 1292
},
{
"epoch": 0.21,
"grad_norm": 1.7965257167816162,
"learning_rate": 1.7591136526090066e-05,
"loss": 0.0305,
"step": 1296
},
{
"epoch": 0.21,
"grad_norm": 0.47339317202568054,
"learning_rate": 1.757684060042888e-05,
"loss": 0.0181,
"step": 1300
},
{
"epoch": 0.21,
"grad_norm": 0.6750252842903137,
"learning_rate": 1.7562544674767693e-05,
"loss": 0.0208,
"step": 1304
},
{
"epoch": 0.21,
"grad_norm": 0.9396247267723083,
"learning_rate": 1.7548248749106506e-05,
"loss": 0.0361,
"step": 1308
},
{
"epoch": 0.21,
"grad_norm": 1.0881524085998535,
"learning_rate": 1.753395282344532e-05,
"loss": 0.0331,
"step": 1312
},
{
"epoch": 0.21,
"grad_norm": 0.7517051100730896,
"learning_rate": 1.7519656897784133e-05,
"loss": 0.0281,
"step": 1316
},
{
"epoch": 0.21,
"grad_norm": 0.7083280682563782,
"learning_rate": 1.7505360972122946e-05,
"loss": 0.0209,
"step": 1320
},
{
"epoch": 0.21,
"grad_norm": 0.727603018283844,
"learning_rate": 1.749106504646176e-05,
"loss": 0.0312,
"step": 1324
},
{
"epoch": 0.21,
"grad_norm": 0.4598117470741272,
"learning_rate": 1.7476769120800573e-05,
"loss": 0.02,
"step": 1328
},
{
"epoch": 0.21,
"grad_norm": 0.6653364896774292,
"learning_rate": 1.7462473195139386e-05,
"loss": 0.0263,
"step": 1332
},
{
"epoch": 0.21,
"grad_norm": 0.527958869934082,
"learning_rate": 1.74481772694782e-05,
"loss": 0.0176,
"step": 1336
},
{
"epoch": 0.22,
"grad_norm": 0.9711959362030029,
"learning_rate": 1.7433881343817013e-05,
"loss": 0.0336,
"step": 1340
},
{
"epoch": 0.22,
"grad_norm": 0.8734799027442932,
"learning_rate": 1.7419585418155826e-05,
"loss": 0.0384,
"step": 1344
},
{
"epoch": 0.22,
"grad_norm": 0.599764883518219,
"learning_rate": 1.740528949249464e-05,
"loss": 0.0317,
"step": 1348
},
{
"epoch": 0.22,
"grad_norm": 0.6115812659263611,
"learning_rate": 1.7390993566833453e-05,
"loss": 0.0246,
"step": 1352
},
{
"epoch": 0.22,
"grad_norm": 0.660057008266449,
"learning_rate": 1.7376697641172266e-05,
"loss": 0.027,
"step": 1356
},
{
"epoch": 0.22,
"grad_norm": 1.2455826997756958,
"learning_rate": 1.736240171551108e-05,
"loss": 0.0434,
"step": 1360
},
{
"epoch": 0.22,
"grad_norm": 1.07332181930542,
"learning_rate": 1.7348105789849893e-05,
"loss": 0.038,
"step": 1364
},
{
"epoch": 0.22,
"grad_norm": 0.6912384629249573,
"learning_rate": 1.733380986418871e-05,
"loss": 0.0252,
"step": 1368
},
{
"epoch": 0.22,
"grad_norm": 0.7599236369132996,
"learning_rate": 1.731951393852752e-05,
"loss": 0.025,
"step": 1372
},
{
"epoch": 0.22,
"grad_norm": 0.7361788153648376,
"learning_rate": 1.7305218012866333e-05,
"loss": 0.0413,
"step": 1376
},
{
"epoch": 0.22,
"grad_norm": 0.6756102442741394,
"learning_rate": 1.729092208720515e-05,
"loss": 0.0374,
"step": 1380
},
{
"epoch": 0.22,
"grad_norm": 0.5338143706321716,
"learning_rate": 1.727662616154396e-05,
"loss": 0.0226,
"step": 1384
},
{
"epoch": 0.22,
"grad_norm": 0.5998290777206421,
"learning_rate": 1.7262330235882773e-05,
"loss": 0.0244,
"step": 1388
},
{
"epoch": 0.22,
"grad_norm": 0.5274918079376221,
"learning_rate": 1.724803431022159e-05,
"loss": 0.0267,
"step": 1392
},
{
"epoch": 0.22,
"grad_norm": 0.6132778525352478,
"learning_rate": 1.72337383845604e-05,
"loss": 0.0351,
"step": 1396
},
{
"epoch": 0.23,
"grad_norm": 0.7461394667625427,
"learning_rate": 1.7219442458899216e-05,
"loss": 0.0314,
"step": 1400
},
{
"epoch": 0.23,
"grad_norm": 0.7438216209411621,
"learning_rate": 1.720514653323803e-05,
"loss": 0.0261,
"step": 1404
},
{
"epoch": 0.23,
"grad_norm": 0.730912983417511,
"learning_rate": 1.719085060757684e-05,
"loss": 0.0229,
"step": 1408
},
{
"epoch": 0.23,
"grad_norm": 1.2940622568130493,
"learning_rate": 1.7176554681915656e-05,
"loss": 0.0273,
"step": 1412
},
{
"epoch": 0.23,
"grad_norm": 0.7200002670288086,
"learning_rate": 1.716225875625447e-05,
"loss": 0.0267,
"step": 1416
},
{
"epoch": 0.23,
"grad_norm": 1.3229494094848633,
"learning_rate": 1.7147962830593283e-05,
"loss": 0.0334,
"step": 1420
},
{
"epoch": 0.23,
"grad_norm": 0.7718178033828735,
"learning_rate": 1.7133666904932096e-05,
"loss": 0.0312,
"step": 1424
},
{
"epoch": 0.23,
"grad_norm": 0.6765419244766235,
"learning_rate": 1.711937097927091e-05,
"loss": 0.0236,
"step": 1428
},
{
"epoch": 0.23,
"grad_norm": 0.753416121006012,
"learning_rate": 1.7105075053609723e-05,
"loss": 0.0271,
"step": 1432
},
{
"epoch": 0.23,
"grad_norm": 0.6442211866378784,
"learning_rate": 1.7090779127948536e-05,
"loss": 0.0151,
"step": 1436
},
{
"epoch": 0.23,
"grad_norm": 0.5224249362945557,
"learning_rate": 1.707648320228735e-05,
"loss": 0.0355,
"step": 1440
},
{
"epoch": 0.23,
"grad_norm": 0.7751701474189758,
"learning_rate": 1.7062187276626163e-05,
"loss": 0.0251,
"step": 1444
},
{
"epoch": 0.23,
"grad_norm": 0.7004714012145996,
"learning_rate": 1.7047891350964976e-05,
"loss": 0.0171,
"step": 1448
},
{
"epoch": 0.23,
"grad_norm": 0.7819869518280029,
"learning_rate": 1.703359542530379e-05,
"loss": 0.0309,
"step": 1452
},
{
"epoch": 0.23,
"grad_norm": 0.5115138292312622,
"learning_rate": 1.7019299499642603e-05,
"loss": 0.0321,
"step": 1456
},
{
"epoch": 0.23,
"grad_norm": 0.3391636610031128,
"learning_rate": 1.7005003573981416e-05,
"loss": 0.0221,
"step": 1460
},
{
"epoch": 0.24,
"grad_norm": 1.2895567417144775,
"learning_rate": 1.699070764832023e-05,
"loss": 0.0317,
"step": 1464
},
{
"epoch": 0.24,
"grad_norm": 0.7947621941566467,
"learning_rate": 1.6976411722659043e-05,
"loss": 0.0266,
"step": 1468
},
{
"epoch": 0.24,
"grad_norm": 0.7865754961967468,
"learning_rate": 1.6962115796997856e-05,
"loss": 0.0421,
"step": 1472
},
{
"epoch": 0.24,
"grad_norm": 1.002314567565918,
"learning_rate": 1.694781987133667e-05,
"loss": 0.0308,
"step": 1476
},
{
"epoch": 0.24,
"grad_norm": 0.6675818562507629,
"learning_rate": 1.6933523945675483e-05,
"loss": 0.0202,
"step": 1480
},
{
"epoch": 0.24,
"grad_norm": 1.2045787572860718,
"learning_rate": 1.6919228020014296e-05,
"loss": 0.0387,
"step": 1484
},
{
"epoch": 0.24,
"grad_norm": 0.5665823817253113,
"learning_rate": 1.690493209435311e-05,
"loss": 0.0149,
"step": 1488
},
{
"epoch": 0.24,
"grad_norm": 0.5733370780944824,
"learning_rate": 1.6890636168691926e-05,
"loss": 0.024,
"step": 1492
},
{
"epoch": 0.24,
"grad_norm": 0.32628941535949707,
"learning_rate": 1.6876340243030736e-05,
"loss": 0.0181,
"step": 1496
},
{
"epoch": 0.24,
"grad_norm": 0.5624873638153076,
"learning_rate": 1.686204431736955e-05,
"loss": 0.0308,
"step": 1500
},
{
"epoch": 0.24,
"eval_loss": 0.02633051760494709,
"eval_pearson_cosine": 0.9510150119057015,
"eval_pearson_dot": 0.9497414425975934,
"eval_pearson_euclidean": 0.9338662221774623,
"eval_pearson_manhattan": 0.9334364652708103,
"eval_pearson_max": 0.9510150119057015,
"eval_runtime": 249.0704,
"eval_samples_per_second": 2.007,
"eval_spearman_cosine": 0.9551964302048428,
"eval_spearman_dot": 0.9547767163144516,
"eval_spearman_euclidean": 0.9485047200653796,
"eval_spearman_manhattan": 0.9472191302750829,
"eval_spearman_max": 0.9551964302048428,
"eval_steps_per_second": 2.007,
"step": 1500
},
{
"epoch": 0.24,
"grad_norm": 0.4453175663948059,
"learning_rate": 1.6847748391708366e-05,
"loss": 0.0251,
"step": 1504
},
{
"epoch": 0.24,
"grad_norm": 0.8831977844238281,
"learning_rate": 1.6833452466047176e-05,
"loss": 0.0253,
"step": 1508
},
{
"epoch": 0.24,
"grad_norm": 0.9551718831062317,
"learning_rate": 1.6819156540385993e-05,
"loss": 0.0451,
"step": 1512
},
{
"epoch": 0.24,
"grad_norm": 0.9147098064422607,
"learning_rate": 1.6804860614724806e-05,
"loss": 0.0233,
"step": 1516
},
{
"epoch": 0.24,
"grad_norm": 0.8430941104888916,
"learning_rate": 1.6790564689063616e-05,
"loss": 0.0224,
"step": 1520
},
{
"epoch": 0.25,
"grad_norm": 0.6627079844474792,
"learning_rate": 1.6776268763402433e-05,
"loss": 0.0221,
"step": 1524
},
{
"epoch": 0.25,
"grad_norm": 0.7855361104011536,
"learning_rate": 1.6761972837741246e-05,
"loss": 0.0307,
"step": 1528
},
{
"epoch": 0.25,
"grad_norm": 0.45924311876296997,
"learning_rate": 1.6747676912080056e-05,
"loss": 0.0257,
"step": 1532
},
{
"epoch": 0.25,
"grad_norm": 0.7786813378334045,
"learning_rate": 1.6733380986418873e-05,
"loss": 0.0345,
"step": 1536
},
{
"epoch": 0.25,
"grad_norm": 0.5647817254066467,
"learning_rate": 1.6719085060757686e-05,
"loss": 0.0181,
"step": 1540
},
{
"epoch": 0.25,
"grad_norm": 1.0387967824935913,
"learning_rate": 1.67047891350965e-05,
"loss": 0.0297,
"step": 1544
},
{
"epoch": 0.25,
"grad_norm": 0.5947245359420776,
"learning_rate": 1.6690493209435313e-05,
"loss": 0.0239,
"step": 1548
},
{
"epoch": 0.25,
"grad_norm": 0.5582525134086609,
"learning_rate": 1.6676197283774126e-05,
"loss": 0.0305,
"step": 1552
},
{
"epoch": 0.25,
"grad_norm": 0.664482057094574,
"learning_rate": 1.666190135811294e-05,
"loss": 0.0249,
"step": 1556
},
{
"epoch": 0.25,
"grad_norm": 0.9069839715957642,
"learning_rate": 1.6647605432451753e-05,
"loss": 0.0298,
"step": 1560
},
{
"epoch": 0.25,
"grad_norm": 0.8026562333106995,
"learning_rate": 1.6633309506790566e-05,
"loss": 0.0203,
"step": 1564
},
{
"epoch": 0.25,
"grad_norm": 0.6699361801147461,
"learning_rate": 1.661901358112938e-05,
"loss": 0.0312,
"step": 1568
},
{
"epoch": 0.25,
"grad_norm": 0.6312674283981323,
"learning_rate": 1.6604717655468193e-05,
"loss": 0.0204,
"step": 1572
},
{
"epoch": 0.25,
"grad_norm": 0.4308748245239258,
"learning_rate": 1.6590421729807006e-05,
"loss": 0.0209,
"step": 1576
},
{
"epoch": 0.25,
"grad_norm": 1.3553575277328491,
"learning_rate": 1.657612580414582e-05,
"loss": 0.0257,
"step": 1580
},
{
"epoch": 0.25,
"grad_norm": 1.4249012470245361,
"learning_rate": 1.6561829878484633e-05,
"loss": 0.0306,
"step": 1584
},
{
"epoch": 0.26,
"grad_norm": 0.4437320828437805,
"learning_rate": 1.6547533952823446e-05,
"loss": 0.017,
"step": 1588
},
{
"epoch": 0.26,
"grad_norm": 0.6957021951675415,
"learning_rate": 1.653323802716226e-05,
"loss": 0.0227,
"step": 1592
},
{
"epoch": 0.26,
"grad_norm": 0.5041042566299438,
"learning_rate": 1.6518942101501073e-05,
"loss": 0.0228,
"step": 1596
},
{
"epoch": 0.26,
"grad_norm": 0.6849528551101685,
"learning_rate": 1.6504646175839886e-05,
"loss": 0.0205,
"step": 1600
},
{
"epoch": 0.26,
"grad_norm": 0.8483012318611145,
"learning_rate": 1.64903502501787e-05,
"loss": 0.0336,
"step": 1604
},
{
"epoch": 0.26,
"grad_norm": 0.9397591352462769,
"learning_rate": 1.6476054324517513e-05,
"loss": 0.029,
"step": 1608
},
{
"epoch": 0.26,
"grad_norm": 0.9127416014671326,
"learning_rate": 1.6461758398856326e-05,
"loss": 0.0225,
"step": 1612
},
{
"epoch": 0.26,
"grad_norm": 0.8871793150901794,
"learning_rate": 1.6447462473195143e-05,
"loss": 0.0185,
"step": 1616
},
{
"epoch": 0.26,
"grad_norm": 0.7608364224433899,
"learning_rate": 1.6433166547533953e-05,
"loss": 0.0273,
"step": 1620
},
{
"epoch": 0.26,
"grad_norm": 0.778032660484314,
"learning_rate": 1.6418870621872766e-05,
"loss": 0.02,
"step": 1624
},
{
"epoch": 0.26,
"grad_norm": 0.6790982484817505,
"learning_rate": 1.6404574696211583e-05,
"loss": 0.025,
"step": 1628
},
{
"epoch": 0.26,
"grad_norm": 0.4867960810661316,
"learning_rate": 1.6390278770550393e-05,
"loss": 0.0175,
"step": 1632
},
{
"epoch": 0.26,
"grad_norm": 0.5005691647529602,
"learning_rate": 1.637598284488921e-05,
"loss": 0.0329,
"step": 1636
},
{
"epoch": 0.26,
"grad_norm": 0.7641982436180115,
"learning_rate": 1.6361686919228023e-05,
"loss": 0.0312,
"step": 1640
},
{
"epoch": 0.26,
"grad_norm": 1.112260103225708,
"learning_rate": 1.6347390993566833e-05,
"loss": 0.0289,
"step": 1644
},
{
"epoch": 0.27,
"grad_norm": 0.8543418049812317,
"learning_rate": 1.633309506790565e-05,
"loss": 0.0205,
"step": 1648
},
{
"epoch": 0.27,
"grad_norm": 0.5461686849594116,
"learning_rate": 1.6318799142244463e-05,
"loss": 0.0155,
"step": 1652
},
{
"epoch": 0.27,
"grad_norm": 0.5375934839248657,
"learning_rate": 1.6304503216583276e-05,
"loss": 0.0237,
"step": 1656
},
{
"epoch": 0.27,
"grad_norm": 0.6225507855415344,
"learning_rate": 1.629020729092209e-05,
"loss": 0.031,
"step": 1660
},
{
"epoch": 0.27,
"grad_norm": 1.2959158420562744,
"learning_rate": 1.62759113652609e-05,
"loss": 0.0315,
"step": 1664
},
{
"epoch": 0.27,
"grad_norm": 0.8647311329841614,
"learning_rate": 1.6261615439599716e-05,
"loss": 0.0204,
"step": 1668
},
{
"epoch": 0.27,
"grad_norm": 0.5671218633651733,
"learning_rate": 1.624731951393853e-05,
"loss": 0.0182,
"step": 1672
},
{
"epoch": 0.27,
"grad_norm": 0.5024406313896179,
"learning_rate": 1.623302358827734e-05,
"loss": 0.0257,
"step": 1676
},
{
"epoch": 0.27,
"grad_norm": 0.4015944004058838,
"learning_rate": 1.6218727662616156e-05,
"loss": 0.023,
"step": 1680
},
{
"epoch": 0.27,
"grad_norm": 0.5640401840209961,
"learning_rate": 1.620443173695497e-05,
"loss": 0.0179,
"step": 1684
},
{
"epoch": 0.27,
"grad_norm": 0.7147939205169678,
"learning_rate": 1.6190135811293783e-05,
"loss": 0.0176,
"step": 1688
},
{
"epoch": 0.27,
"grad_norm": 0.8704376816749573,
"learning_rate": 1.6175839885632596e-05,
"loss": 0.02,
"step": 1692
},
{
"epoch": 0.27,
"grad_norm": 0.7839465737342834,
"learning_rate": 1.616154395997141e-05,
"loss": 0.0209,
"step": 1696
},
{
"epoch": 0.27,
"grad_norm": 0.4096917510032654,
"learning_rate": 1.6147248034310223e-05,
"loss": 0.0204,
"step": 1700
},
{
"epoch": 0.27,
"grad_norm": 0.5869227051734924,
"learning_rate": 1.6132952108649036e-05,
"loss": 0.0198,
"step": 1704
},
{
"epoch": 0.27,
"grad_norm": 0.6609891057014465,
"learning_rate": 1.611865618298785e-05,
"loss": 0.0251,
"step": 1708
},
{
"epoch": 0.28,
"grad_norm": 0.5184406042098999,
"learning_rate": 1.6104360257326663e-05,
"loss": 0.0234,
"step": 1712
},
{
"epoch": 0.28,
"grad_norm": 0.4317310154438019,
"learning_rate": 1.6090064331665476e-05,
"loss": 0.0263,
"step": 1716
},
{
"epoch": 0.28,
"grad_norm": 0.5459049940109253,
"learning_rate": 1.607576840600429e-05,
"loss": 0.0243,
"step": 1720
},
{
"epoch": 0.28,
"grad_norm": 0.7071524858474731,
"learning_rate": 1.6061472480343103e-05,
"loss": 0.0235,
"step": 1724
},
{
"epoch": 0.28,
"grad_norm": 0.8446561694145203,
"learning_rate": 1.604717655468192e-05,
"loss": 0.0206,
"step": 1728
},
{
"epoch": 0.28,
"grad_norm": 0.6042218804359436,
"learning_rate": 1.603288062902073e-05,
"loss": 0.0332,
"step": 1732
},
{
"epoch": 0.28,
"grad_norm": 0.5404290556907654,
"learning_rate": 1.6018584703359543e-05,
"loss": 0.0198,
"step": 1736
},
{
"epoch": 0.28,
"grad_norm": 0.3870123326778412,
"learning_rate": 1.6004288777698356e-05,
"loss": 0.0216,
"step": 1740
},
{
"epoch": 0.28,
"grad_norm": 0.803578794002533,
"learning_rate": 1.598999285203717e-05,
"loss": 0.0248,
"step": 1744
},
{
"epoch": 0.28,
"grad_norm": 0.7153476476669312,
"learning_rate": 1.5975696926375983e-05,
"loss": 0.0237,
"step": 1748
},
{
"epoch": 0.28,
"grad_norm": 1.2977997064590454,
"learning_rate": 1.5961401000714796e-05,
"loss": 0.037,
"step": 1752
},
{
"epoch": 0.28,
"grad_norm": 0.7146950364112854,
"learning_rate": 1.594710507505361e-05,
"loss": 0.02,
"step": 1756
},
{
"epoch": 0.28,
"grad_norm": 1.2458608150482178,
"learning_rate": 1.5932809149392426e-05,
"loss": 0.0396,
"step": 1760
},
{
"epoch": 0.28,
"grad_norm": 1.0099600553512573,
"learning_rate": 1.5918513223731236e-05,
"loss": 0.0244,
"step": 1764
},
{
"epoch": 0.28,
"grad_norm": 0.8322357535362244,
"learning_rate": 1.590421729807005e-05,
"loss": 0.0224,
"step": 1768
},
{
"epoch": 0.28,
"grad_norm": 0.7080321907997131,
"learning_rate": 1.5889921372408866e-05,
"loss": 0.0294,
"step": 1772
},
{
"epoch": 0.29,
"grad_norm": 0.5599932670593262,
"learning_rate": 1.5875625446747676e-05,
"loss": 0.0194,
"step": 1776
},
{
"epoch": 0.29,
"grad_norm": 0.4317460060119629,
"learning_rate": 1.5861329521086493e-05,
"loss": 0.0144,
"step": 1780
},
{
"epoch": 0.29,
"grad_norm": 0.8583347797393799,
"learning_rate": 1.5847033595425306e-05,
"loss": 0.022,
"step": 1784
},
{
"epoch": 0.29,
"grad_norm": 0.6212904453277588,
"learning_rate": 1.5832737669764116e-05,
"loss": 0.0235,
"step": 1788
},
{
"epoch": 0.29,
"grad_norm": 0.6250016689300537,
"learning_rate": 1.5818441744102933e-05,
"loss": 0.024,
"step": 1792
},
{
"epoch": 0.29,
"grad_norm": 0.7036072611808777,
"learning_rate": 1.5804145818441746e-05,
"loss": 0.0155,
"step": 1796
},
{
"epoch": 0.29,
"grad_norm": 0.7857645750045776,
"learning_rate": 1.578984989278056e-05,
"loss": 0.0177,
"step": 1800
},
{
"epoch": 0.29,
"eval_loss": 0.022910235449671745,
"eval_pearson_cosine": 0.9571668454101061,
"eval_pearson_dot": 0.9545933082183199,
"eval_pearson_euclidean": 0.9493360141559007,
"eval_pearson_manhattan": 0.9494330807780875,
"eval_pearson_max": 0.9571668454101061,
"eval_runtime": 248.8356,
"eval_samples_per_second": 2.009,
"eval_spearman_cosine": 0.9652652915549957,
"eval_spearman_dot": 0.9626083355880656,
"eval_spearman_euclidean": 0.9635709799238159,
"eval_spearman_manhattan": 0.9633432188979801,
"eval_spearman_max": 0.9652652915549957,
"eval_steps_per_second": 2.009,
"step": 1800
}
],
"logging_steps": 4,
"max_steps": 6218,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}