|
{ |
|
"best_metric": 0.9627865603907902, |
|
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-21_10-51/checkpoint-1200", |
|
"epoch": 0.2733668341708543, |
|
"eval_steps": 300, |
|
"global_step": 1700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.4826388359069824, |
|
"learning_rate": 1.2861736334405146e-07, |
|
"loss": 0.0905, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.3191146850585938, |
|
"learning_rate": 2.572347266881029e-07, |
|
"loss": 0.1138, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.398422956466675, |
|
"learning_rate": 3.858520900321544e-07, |
|
"loss": 0.1055, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.034379720687866, |
|
"learning_rate": 5.144694533762058e-07, |
|
"loss": 0.1113, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 5.541696071624756, |
|
"learning_rate": 6.430868167202573e-07, |
|
"loss": 0.0875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.2232377529144287, |
|
"learning_rate": 7.717041800643088e-07, |
|
"loss": 0.1163, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.692831039428711, |
|
"learning_rate": 9.003215434083602e-07, |
|
"loss": 0.0746, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9550053477287292, |
|
"learning_rate": 1.0289389067524116e-06, |
|
"loss": 0.0697, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.5329389572143555, |
|
"learning_rate": 1.157556270096463e-06, |
|
"loss": 0.0716, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.0605404376983643, |
|
"learning_rate": 1.2861736334405146e-06, |
|
"loss": 0.0478, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.4411487579345703, |
|
"learning_rate": 1.4147909967845661e-06, |
|
"loss": 0.0971, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.9422132968902588, |
|
"learning_rate": 1.5434083601286177e-06, |
|
"loss": 0.0567, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.2038466930389404, |
|
"learning_rate": 1.6720257234726688e-06, |
|
"loss": 0.0477, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.7133512496948242, |
|
"learning_rate": 1.8006430868167204e-06, |
|
"loss": 0.0528, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.754583716392517, |
|
"learning_rate": 1.9292604501607717e-06, |
|
"loss": 0.048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8963572382926941, |
|
"learning_rate": 2.0578778135048233e-06, |
|
"loss": 0.0508, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.0214054584503174, |
|
"learning_rate": 2.186495176848875e-06, |
|
"loss": 0.0485, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.9729621410369873, |
|
"learning_rate": 2.315112540192926e-06, |
|
"loss": 0.051, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5615208148956299, |
|
"learning_rate": 2.4437299035369775e-06, |
|
"loss": 0.0604, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.6368948221206665, |
|
"learning_rate": 2.572347266881029e-06, |
|
"loss": 0.0442, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.133140802383423, |
|
"learning_rate": 2.7009646302250807e-06, |
|
"loss": 0.0302, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.2055103778839111, |
|
"learning_rate": 2.8295819935691322e-06, |
|
"loss": 0.0432, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5411338806152344, |
|
"learning_rate": 2.9581993569131834e-06, |
|
"loss": 0.0319, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.75223708152771, |
|
"learning_rate": 3.0868167202572353e-06, |
|
"loss": 0.0364, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4433358907699585, |
|
"learning_rate": 3.2154340836012865e-06, |
|
"loss": 0.0661, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.5212054252624512, |
|
"learning_rate": 3.3440514469453376e-06, |
|
"loss": 0.0609, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.2315409183502197, |
|
"learning_rate": 3.4726688102893896e-06, |
|
"loss": 0.0546, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.073073148727417, |
|
"learning_rate": 3.6012861736334407e-06, |
|
"loss": 0.0686, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4473097324371338, |
|
"learning_rate": 3.7299035369774923e-06, |
|
"loss": 0.0363, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.4474904537200928, |
|
"learning_rate": 3.8585209003215434e-06, |
|
"loss": 0.0425, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.410780429840088, |
|
"learning_rate": 3.987138263665595e-06, |
|
"loss": 0.057, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.380175232887268, |
|
"learning_rate": 4.1157556270096466e-06, |
|
"loss": 0.045, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.0906972885131836, |
|
"learning_rate": 4.244372990353698e-06, |
|
"loss": 0.0338, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.6216851472854614, |
|
"learning_rate": 4.37299035369775e-06, |
|
"loss": 0.036, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.806192934513092, |
|
"learning_rate": 4.501607717041801e-06, |
|
"loss": 0.037, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.959051251411438, |
|
"learning_rate": 4.630225080385852e-06, |
|
"loss": 0.0473, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4807804226875305, |
|
"learning_rate": 4.758842443729904e-06, |
|
"loss": 0.022, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.982123613357544, |
|
"learning_rate": 4.887459807073955e-06, |
|
"loss": 0.04, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4609640836715698, |
|
"learning_rate": 5.016077170418007e-06, |
|
"loss": 0.0266, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4000838994979858, |
|
"learning_rate": 5.144694533762058e-06, |
|
"loss": 0.0211, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.828529953956604, |
|
"learning_rate": 5.273311897106109e-06, |
|
"loss": 0.0239, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.8723558187484741, |
|
"learning_rate": 5.401929260450161e-06, |
|
"loss": 0.0273, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4310917854309082, |
|
"learning_rate": 5.530546623794213e-06, |
|
"loss": 0.0383, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0429316759109497, |
|
"learning_rate": 5.6591639871382644e-06, |
|
"loss": 0.0348, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7097664475440979, |
|
"learning_rate": 5.787781350482315e-06, |
|
"loss": 0.0199, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.269923448562622, |
|
"learning_rate": 5.916398713826367e-06, |
|
"loss": 0.0274, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.1587966680526733, |
|
"learning_rate": 6.045016077170418e-06, |
|
"loss": 0.0347, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.1199533939361572, |
|
"learning_rate": 6.173633440514471e-06, |
|
"loss": 0.0294, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.9978614449501038, |
|
"learning_rate": 6.302250803858521e-06, |
|
"loss": 0.0237, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0913878679275513, |
|
"learning_rate": 6.430868167202573e-06, |
|
"loss": 0.039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7430211901664734, |
|
"learning_rate": 6.5594855305466245e-06, |
|
"loss": 0.0328, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.597890019416809, |
|
"learning_rate": 6.688102893890675e-06, |
|
"loss": 0.033, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.8224192261695862, |
|
"learning_rate": 6.816720257234727e-06, |
|
"loss": 0.0207, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.1937623023986816, |
|
"learning_rate": 6.945337620578779e-06, |
|
"loss": 0.0341, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.337793231010437, |
|
"learning_rate": 7.073954983922831e-06, |
|
"loss": 0.0359, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3222864866256714, |
|
"learning_rate": 7.2025723472668815e-06, |
|
"loss": 0.0341, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.6585116386413574, |
|
"learning_rate": 7.331189710610933e-06, |
|
"loss": 0.0319, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.9420249462127686, |
|
"learning_rate": 7.459807073954985e-06, |
|
"loss": 0.0458, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1558046340942383, |
|
"learning_rate": 7.588424437299035e-06, |
|
"loss": 0.0309, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.778165340423584, |
|
"learning_rate": 7.717041800643087e-06, |
|
"loss": 0.0532, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3744961023330688, |
|
"learning_rate": 7.84565916398714e-06, |
|
"loss": 0.0306, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.4764065742492676, |
|
"learning_rate": 7.97427652733119e-06, |
|
"loss": 0.0398, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0220906734466553, |
|
"learning_rate": 8.102893890675242e-06, |
|
"loss": 0.0552, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.4686410427093506, |
|
"learning_rate": 8.231511254019293e-06, |
|
"loss": 0.0599, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0917580127716064, |
|
"learning_rate": 8.360128617363345e-06, |
|
"loss": 0.036, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3227124214172363, |
|
"learning_rate": 8.488745980707396e-06, |
|
"loss": 0.0337, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.853574275970459, |
|
"learning_rate": 8.617363344051448e-06, |
|
"loss": 0.0356, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.250444769859314, |
|
"learning_rate": 8.7459807073955e-06, |
|
"loss": 0.0625, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.1477417945861816, |
|
"learning_rate": 8.874598070739551e-06, |
|
"loss": 0.0325, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.7810678482055664, |
|
"learning_rate": 9.003215434083602e-06, |
|
"loss": 0.028, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.5006675720214844, |
|
"learning_rate": 9.131832797427654e-06, |
|
"loss": 0.046, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.8945916295051575, |
|
"learning_rate": 9.260450160771704e-06, |
|
"loss": 0.0318, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.4942963123321533, |
|
"learning_rate": 9.389067524115757e-06, |
|
"loss": 0.0254, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9653424620628357, |
|
"learning_rate": 9.517684887459809e-06, |
|
"loss": 0.0286, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.740561604499817, |
|
"learning_rate": 9.64630225080386e-06, |
|
"loss": 0.032, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.03372881934046745, |
|
"eval_pearson_cosine": 0.9332293835600463, |
|
"eval_pearson_dot": 0.9301330880973007, |
|
"eval_pearson_euclidean": 0.9260017674080668, |
|
"eval_pearson_manhattan": 0.9271288221546901, |
|
"eval_pearson_max": 0.9332293835600463, |
|
"eval_runtime": 249.5828, |
|
"eval_samples_per_second": 2.003, |
|
"eval_spearman_cosine": 0.947022185388043, |
|
"eval_spearman_dot": 0.9435553217732889, |
|
"eval_spearman_euclidean": 0.940413083620648, |
|
"eval_spearman_manhattan": 0.9413909920251474, |
|
"eval_spearman_max": 0.947022185388043, |
|
"eval_steps_per_second": 2.003, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6091077923774719, |
|
"learning_rate": 9.77491961414791e-06, |
|
"loss": 0.0197, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2511804103851318, |
|
"learning_rate": 9.903536977491962e-06, |
|
"loss": 0.0306, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3721988201141357, |
|
"learning_rate": 1.0032154340836013e-05, |
|
"loss": 0.0187, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7939477562904358, |
|
"learning_rate": 1.0160771704180067e-05, |
|
"loss": 0.0318, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6942979097366333, |
|
"learning_rate": 1.0289389067524116e-05, |
|
"loss": 0.0264, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3466440439224243, |
|
"learning_rate": 1.0418006430868168e-05, |
|
"loss": 0.0316, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7388295531272888, |
|
"learning_rate": 1.0546623794212218e-05, |
|
"loss": 0.0322, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.094037413597107, |
|
"learning_rate": 1.0675241157556271e-05, |
|
"loss": 0.029, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.9884235262870789, |
|
"learning_rate": 1.0803858520900323e-05, |
|
"loss": 0.0424, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2799253463745117, |
|
"learning_rate": 1.0932475884244374e-05, |
|
"loss": 0.0221, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.830769419670105, |
|
"learning_rate": 1.1061093247588426e-05, |
|
"loss": 0.0255, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.4065282344818115, |
|
"learning_rate": 1.1189710610932476e-05, |
|
"loss": 0.0257, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.4823110103607178, |
|
"learning_rate": 1.1318327974276529e-05, |
|
"loss": 0.0277, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5865018367767334, |
|
"learning_rate": 1.144694533762058e-05, |
|
"loss": 0.0272, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3011387586593628, |
|
"learning_rate": 1.157556270096463e-05, |
|
"loss": 0.0249, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0222949981689453, |
|
"learning_rate": 1.1704180064308684e-05, |
|
"loss": 0.025, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.650606393814087, |
|
"learning_rate": 1.1832797427652733e-05, |
|
"loss": 0.0427, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.919157981872559, |
|
"learning_rate": 1.1961414790996787e-05, |
|
"loss": 0.0432, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.2726035118103027, |
|
"learning_rate": 1.2090032154340837e-05, |
|
"loss": 0.0244, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.1458909511566162, |
|
"learning_rate": 1.2218649517684888e-05, |
|
"loss": 0.026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.4866142272949219, |
|
"learning_rate": 1.2347266881028941e-05, |
|
"loss": 0.0315, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.7649275064468384, |
|
"learning_rate": 1.2475884244372991e-05, |
|
"loss": 0.0253, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5921647548675537, |
|
"learning_rate": 1.2604501607717043e-05, |
|
"loss": 0.0229, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.977344810962677, |
|
"learning_rate": 1.2733118971061094e-05, |
|
"loss": 0.0225, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.8201347589492798, |
|
"learning_rate": 1.2861736334405146e-05, |
|
"loss": 0.0219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.8397660255432129, |
|
"learning_rate": 1.2990353697749196e-05, |
|
"loss": 0.0281, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5883716940879822, |
|
"learning_rate": 1.3118971061093249e-05, |
|
"loss": 0.0256, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.352879524230957, |
|
"learning_rate": 1.32475884244373e-05, |
|
"loss": 0.0323, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.8051841259002686, |
|
"learning_rate": 1.337620578778135e-05, |
|
"loss": 0.0389, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.8029029965400696, |
|
"learning_rate": 1.3504823151125404e-05, |
|
"loss": 0.0198, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1253607273101807, |
|
"learning_rate": 1.3633440514469454e-05, |
|
"loss": 0.0303, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.313376784324646, |
|
"learning_rate": 1.3762057877813507e-05, |
|
"loss": 0.0415, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5886867046356201, |
|
"learning_rate": 1.3890675241157558e-05, |
|
"loss": 0.028, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.150387167930603, |
|
"learning_rate": 1.4019292604501608e-05, |
|
"loss": 0.0317, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9141702651977539, |
|
"learning_rate": 1.4147909967845662e-05, |
|
"loss": 0.0375, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.5728639364242554, |
|
"learning_rate": 1.4276527331189711e-05, |
|
"loss": 0.035, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9280940294265747, |
|
"learning_rate": 1.4405144694533763e-05, |
|
"loss": 0.0254, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.4746482372283936, |
|
"learning_rate": 1.4533762057877815e-05, |
|
"loss": 0.0449, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.9735682606697083, |
|
"learning_rate": 1.4662379421221866e-05, |
|
"loss": 0.0305, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.2252638339996338, |
|
"learning_rate": 1.479099678456592e-05, |
|
"loss": 0.0286, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.308204412460327, |
|
"learning_rate": 1.491961414790997e-05, |
|
"loss": 0.0376, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1166713237762451, |
|
"learning_rate": 1.504823151125402e-05, |
|
"loss": 0.0309, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1420267820358276, |
|
"learning_rate": 1.517684887459807e-05, |
|
"loss": 0.0243, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.7457785606384277, |
|
"learning_rate": 1.5305466237942124e-05, |
|
"loss": 0.0358, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1351250410079956, |
|
"learning_rate": 1.5434083601286174e-05, |
|
"loss": 0.0381, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.669029951095581, |
|
"learning_rate": 1.5562700964630227e-05, |
|
"loss": 0.0233, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1537421941757202, |
|
"learning_rate": 1.569131832797428e-05, |
|
"loss": 0.0319, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1718066930770874, |
|
"learning_rate": 1.581993569131833e-05, |
|
"loss": 0.0366, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6619621515274048, |
|
"learning_rate": 1.594855305466238e-05, |
|
"loss": 0.0433, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.9330528378486633, |
|
"learning_rate": 1.607717041800643e-05, |
|
"loss": 0.0336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.4244128465652466, |
|
"learning_rate": 1.6205787781350483e-05, |
|
"loss": 0.0279, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.719264566898346, |
|
"learning_rate": 1.6334405144694536e-05, |
|
"loss": 0.026, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5247528553009033, |
|
"learning_rate": 1.6463022508038586e-05, |
|
"loss": 0.0204, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9459376335144043, |
|
"learning_rate": 1.659163987138264e-05, |
|
"loss": 0.0457, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6106892824172974, |
|
"learning_rate": 1.672025723472669e-05, |
|
"loss": 0.024, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9702305793762207, |
|
"learning_rate": 1.6848874598070743e-05, |
|
"loss": 0.0424, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.8634403347969055, |
|
"learning_rate": 1.6977491961414792e-05, |
|
"loss": 0.0377, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.0177710056304932, |
|
"learning_rate": 1.7106109324758842e-05, |
|
"loss": 0.0299, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.7187432646751404, |
|
"learning_rate": 1.7234726688102896e-05, |
|
"loss": 0.03, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.3794456720352173, |
|
"learning_rate": 1.7363344051446945e-05, |
|
"loss": 0.0461, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.011230707168579, |
|
"learning_rate": 1.7491961414791e-05, |
|
"loss": 0.0423, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9847524166107178, |
|
"learning_rate": 1.762057877813505e-05, |
|
"loss": 0.0379, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.4435635805130005, |
|
"learning_rate": 1.7749196141479102e-05, |
|
"loss": 0.0407, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6206502318382263, |
|
"learning_rate": 1.7877813504823152e-05, |
|
"loss": 0.0293, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6152936816215515, |
|
"learning_rate": 1.8006430868167205e-05, |
|
"loss": 0.0239, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9495165944099426, |
|
"learning_rate": 1.8135048231511255e-05, |
|
"loss": 0.0279, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.1117522716522217, |
|
"learning_rate": 1.8263665594855308e-05, |
|
"loss": 0.0282, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9599608182907104, |
|
"learning_rate": 1.8392282958199358e-05, |
|
"loss": 0.0241, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.665824294090271, |
|
"learning_rate": 1.8520900321543408e-05, |
|
"loss": 0.032, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9927297830581665, |
|
"learning_rate": 1.864951768488746e-05, |
|
"loss": 0.0386, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8864579200744629, |
|
"learning_rate": 1.8778135048231514e-05, |
|
"loss": 0.0424, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8934502601623535, |
|
"learning_rate": 1.8906752411575564e-05, |
|
"loss": 0.0303, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.7153275609016418, |
|
"learning_rate": 1.9035369774919617e-05, |
|
"loss": 0.0351, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.8853140473365784, |
|
"learning_rate": 1.9163987138263667e-05, |
|
"loss": 0.0251, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.535198450088501, |
|
"learning_rate": 1.929260450160772e-05, |
|
"loss": 0.039, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.03215770050883293, |
|
"eval_pearson_cosine": 0.9396208600788933, |
|
"eval_pearson_dot": 0.9361396977412548, |
|
"eval_pearson_euclidean": 0.9366702713808518, |
|
"eval_pearson_manhattan": 0.935916312626658, |
|
"eval_pearson_max": 0.9396208600788933, |
|
"eval_runtime": 249.4161, |
|
"eval_samples_per_second": 2.005, |
|
"eval_spearman_cosine": 0.9531478289779426, |
|
"eval_spearman_dot": 0.9484099676386145, |
|
"eval_spearman_euclidean": 0.9506823938736226, |
|
"eval_spearman_manhattan": 0.9494599723678361, |
|
"eval_spearman_max": 0.9531478289779426, |
|
"eval_steps_per_second": 2.005, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6895681023597717, |
|
"learning_rate": 1.942122186495177e-05, |
|
"loss": 0.0243, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.0323344469070435, |
|
"learning_rate": 1.954983922829582e-05, |
|
"loss": 0.0366, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6633996367454529, |
|
"learning_rate": 1.9678456591639874e-05, |
|
"loss": 0.0472, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.3444079160690308, |
|
"learning_rate": 1.9807073954983923e-05, |
|
"loss": 0.0323, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.9151845574378967, |
|
"learning_rate": 1.9935691318327977e-05, |
|
"loss": 0.0263, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1424955129623413, |
|
"learning_rate": 1.9992852037169407e-05, |
|
"loss": 0.0333, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.0085678100585938, |
|
"learning_rate": 1.9978556111508223e-05, |
|
"loss": 0.0375, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.071134328842163, |
|
"learning_rate": 1.9964260185847037e-05, |
|
"loss": 0.0299, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.2681633234024048, |
|
"learning_rate": 1.9949964260185847e-05, |
|
"loss": 0.0317, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.9661902189254761, |
|
"learning_rate": 1.9935668334524663e-05, |
|
"loss": 0.0358, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.77792489528656, |
|
"learning_rate": 1.9921372408863477e-05, |
|
"loss": 0.0318, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4316706955432892, |
|
"learning_rate": 1.990707648320229e-05, |
|
"loss": 0.0288, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.73268061876297, |
|
"learning_rate": 1.9892780557541103e-05, |
|
"loss": 0.0373, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.6720423698425293, |
|
"learning_rate": 1.9878484631879917e-05, |
|
"loss": 0.0478, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5472896695137024, |
|
"learning_rate": 1.986418870621873e-05, |
|
"loss": 0.0169, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.016210675239563, |
|
"learning_rate": 1.9849892780557543e-05, |
|
"loss": 0.0279, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.6803948879241943, |
|
"learning_rate": 1.9835596854896357e-05, |
|
"loss": 0.0323, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.570452868938446, |
|
"learning_rate": 1.982130092923517e-05, |
|
"loss": 0.0189, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.1386935710906982, |
|
"learning_rate": 1.9807005003573983e-05, |
|
"loss": 0.0253, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.8023974895477295, |
|
"learning_rate": 1.9792709077912797e-05, |
|
"loss": 0.0251, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.1469191312789917, |
|
"learning_rate": 1.977841315225161e-05, |
|
"loss": 0.0247, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.133340835571289, |
|
"learning_rate": 1.9764117226590423e-05, |
|
"loss": 0.0363, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.3125261068344116, |
|
"learning_rate": 1.9749821300929237e-05, |
|
"loss": 0.022, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.689136266708374, |
|
"learning_rate": 1.973552537526805e-05, |
|
"loss": 0.0332, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.9812389016151428, |
|
"learning_rate": 1.9721229449606863e-05, |
|
"loss": 0.0309, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.1285403966903687, |
|
"learning_rate": 1.9706933523945677e-05, |
|
"loss": 0.0295, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.7089455723762512, |
|
"learning_rate": 1.969263759828449e-05, |
|
"loss": 0.0253, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.8068435788154602, |
|
"learning_rate": 1.9678341672623303e-05, |
|
"loss": 0.0239, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.2158197164535522, |
|
"learning_rate": 1.9664045746962117e-05, |
|
"loss": 0.0348, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.1878087520599365, |
|
"learning_rate": 1.9649749821300933e-05, |
|
"loss": 0.0352, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.019784927368164, |
|
"learning_rate": 1.9635453895639743e-05, |
|
"loss": 0.0468, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.391402244567871, |
|
"learning_rate": 1.9621157969978557e-05, |
|
"loss": 0.0243, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.5212937593460083, |
|
"learning_rate": 1.9606862044317373e-05, |
|
"loss": 0.0454, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7201647758483887, |
|
"learning_rate": 1.9592566118656183e-05, |
|
"loss": 0.0436, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.0111092329025269, |
|
"learning_rate": 1.9578270192994997e-05, |
|
"loss": 0.0245, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.8695981502532959, |
|
"learning_rate": 1.9563974267333813e-05, |
|
"loss": 0.0369, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.9623269438743591, |
|
"learning_rate": 1.9549678341672623e-05, |
|
"loss": 0.0222, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6350664496421814, |
|
"learning_rate": 1.953538241601144e-05, |
|
"loss": 0.0211, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.9464645981788635, |
|
"learning_rate": 1.9521086490350253e-05, |
|
"loss": 0.0416, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5733113288879395, |
|
"learning_rate": 1.9506790564689063e-05, |
|
"loss": 0.0217, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.8460751175880432, |
|
"learning_rate": 1.949249463902788e-05, |
|
"loss": 0.0318, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.7062273025512695, |
|
"learning_rate": 1.9478198713366693e-05, |
|
"loss": 0.0428, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.8697042465209961, |
|
"learning_rate": 1.9463902787705507e-05, |
|
"loss": 0.0414, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.0291727781295776, |
|
"learning_rate": 1.944960686204432e-05, |
|
"loss": 0.0288, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.8506454229354858, |
|
"learning_rate": 1.9435310936383133e-05, |
|
"loss": 0.047, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.9701406955718994, |
|
"learning_rate": 1.9421015010721947e-05, |
|
"loss": 0.0276, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1087899208068848, |
|
"learning_rate": 1.940671908506076e-05, |
|
"loss": 0.0448, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.8718247413635254, |
|
"learning_rate": 1.9392423159399573e-05, |
|
"loss": 0.0241, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.3904443979263306, |
|
"learning_rate": 1.9378127233738387e-05, |
|
"loss": 0.0451, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7410137057304382, |
|
"learning_rate": 1.93638313080772e-05, |
|
"loss": 0.0244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.9150621294975281, |
|
"learning_rate": 1.9349535382416013e-05, |
|
"loss": 0.0272, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7707653045654297, |
|
"learning_rate": 1.9335239456754827e-05, |
|
"loss": 0.036, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.9467148184776306, |
|
"learning_rate": 1.932094353109364e-05, |
|
"loss": 0.0363, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.2180854082107544, |
|
"learning_rate": 1.9306647605432453e-05, |
|
"loss": 0.039, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.9589481949806213, |
|
"learning_rate": 1.9292351679771267e-05, |
|
"loss": 0.0346, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.086827039718628, |
|
"learning_rate": 1.927805575411008e-05, |
|
"loss": 0.0352, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.0733484029769897, |
|
"learning_rate": 1.9263759828448893e-05, |
|
"loss": 0.0308, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7703049182891846, |
|
"learning_rate": 1.9249463902787707e-05, |
|
"loss": 0.0271, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1060117483139038, |
|
"learning_rate": 1.923516797712652e-05, |
|
"loss": 0.0281, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7313349843025208, |
|
"learning_rate": 1.9220872051465333e-05, |
|
"loss": 0.0245, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7229084372520447, |
|
"learning_rate": 1.920657612580415e-05, |
|
"loss": 0.0249, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.4665247201919556, |
|
"learning_rate": 1.919228020014296e-05, |
|
"loss": 0.0412, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8117924928665161, |
|
"learning_rate": 1.9177984274481773e-05, |
|
"loss": 0.0391, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.9442553520202637, |
|
"learning_rate": 1.916368834882059e-05, |
|
"loss": 0.0328, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7885982394218445, |
|
"learning_rate": 1.91493924231594e-05, |
|
"loss": 0.0182, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2472141981124878, |
|
"learning_rate": 1.9135096497498217e-05, |
|
"loss": 0.0342, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8535823225975037, |
|
"learning_rate": 1.9120800571837027e-05, |
|
"loss": 0.0223, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.84065842628479, |
|
"learning_rate": 1.910650464617584e-05, |
|
"loss": 0.0359, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0987244844436646, |
|
"learning_rate": 1.9092208720514657e-05, |
|
"loss": 0.0343, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7632778286933899, |
|
"learning_rate": 1.9077912794853467e-05, |
|
"loss": 0.0323, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.1485552787780762, |
|
"learning_rate": 1.906361686919228e-05, |
|
"loss": 0.0337, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.6315150260925293, |
|
"learning_rate": 1.9049320943531097e-05, |
|
"loss": 0.0515, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0435465574264526, |
|
"learning_rate": 1.9035025017869907e-05, |
|
"loss": 0.0458, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.001620888710022, |
|
"learning_rate": 1.9020729092208723e-05, |
|
"loss": 0.0357, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8570252060890198, |
|
"learning_rate": 1.9006433166547537e-05, |
|
"loss": 0.037, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.04232440143823624, |
|
"eval_pearson_cosine": 0.9451831743059702, |
|
"eval_pearson_dot": 0.9441977144080326, |
|
"eval_pearson_euclidean": 0.9401266241811703, |
|
"eval_pearson_manhattan": 0.940026238812949, |
|
"eval_pearson_max": 0.9451831743059702, |
|
"eval_runtime": 249.3889, |
|
"eval_samples_per_second": 2.005, |
|
"eval_spearman_cosine": 0.9564561958788276, |
|
"eval_spearman_dot": 0.9527831233353083, |
|
"eval_spearman_euclidean": 0.9533796700221547, |
|
"eval_spearman_manhattan": 0.9531501329883199, |
|
"eval_spearman_max": 0.9564561958788276, |
|
"eval_steps_per_second": 2.005, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.118642807006836, |
|
"learning_rate": 1.8992137240886347e-05, |
|
"loss": 0.0424, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.344506025314331, |
|
"learning_rate": 1.8977841315225163e-05, |
|
"loss": 0.03, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7447184920310974, |
|
"learning_rate": 1.8963545389563977e-05, |
|
"loss": 0.0308, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.6138767004013062, |
|
"learning_rate": 1.894924946390279e-05, |
|
"loss": 0.0314, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7356119751930237, |
|
"learning_rate": 1.8934953538241603e-05, |
|
"loss": 0.0283, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.263514518737793, |
|
"learning_rate": 1.8920657612580417e-05, |
|
"loss": 0.0293, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.6265131235122681, |
|
"learning_rate": 1.890636168691923e-05, |
|
"loss": 0.0183, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.7762928009033203, |
|
"learning_rate": 1.8892065761258043e-05, |
|
"loss": 0.0449, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.0329370498657227, |
|
"learning_rate": 1.8877769835596857e-05, |
|
"loss": 0.0397, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2647181749343872, |
|
"learning_rate": 1.886347390993567e-05, |
|
"loss": 0.0329, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7577453851699829, |
|
"learning_rate": 1.8849177984274483e-05, |
|
"loss": 0.043, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.1184148788452148, |
|
"learning_rate": 1.8834882058613297e-05, |
|
"loss": 0.0419, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7198161482810974, |
|
"learning_rate": 1.882058613295211e-05, |
|
"loss": 0.0292, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.0045865774154663, |
|
"learning_rate": 1.8806290207290923e-05, |
|
"loss": 0.0333, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5983513593673706, |
|
"learning_rate": 1.8791994281629737e-05, |
|
"loss": 0.022, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.0982924699783325, |
|
"learning_rate": 1.877769835596855e-05, |
|
"loss": 0.0264, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6120471358299255, |
|
"learning_rate": 1.8763402430307363e-05, |
|
"loss": 0.0308, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.5346020460128784, |
|
"learning_rate": 1.8749106504646177e-05, |
|
"loss": 0.0355, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.42815306782722473, |
|
"learning_rate": 1.873481057898499e-05, |
|
"loss": 0.0236, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7016882300376892, |
|
"learning_rate": 1.8720514653323803e-05, |
|
"loss": 0.0278, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6955506801605225, |
|
"learning_rate": 1.8706218727662617e-05, |
|
"loss": 0.0208, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7579104900360107, |
|
"learning_rate": 1.8691922802001433e-05, |
|
"loss": 0.0303, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.8700461983680725, |
|
"learning_rate": 1.8677626876340243e-05, |
|
"loss": 0.0212, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.532637357711792, |
|
"learning_rate": 1.8663330950679057e-05, |
|
"loss": 0.0499, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.3528228998184204, |
|
"learning_rate": 1.8649035025017873e-05, |
|
"loss": 0.0349, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7273157238960266, |
|
"learning_rate": 1.8634739099356683e-05, |
|
"loss": 0.0202, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.952356219291687, |
|
"learning_rate": 1.86204431736955e-05, |
|
"loss": 0.0315, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6654635071754456, |
|
"learning_rate": 1.8606147248034313e-05, |
|
"loss": 0.0248, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.578262984752655, |
|
"learning_rate": 1.8591851322373123e-05, |
|
"loss": 0.037, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6199663877487183, |
|
"learning_rate": 1.857755539671194e-05, |
|
"loss": 0.0245, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.0458414554595947, |
|
"learning_rate": 1.8563259471050753e-05, |
|
"loss": 0.0264, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5218886733055115, |
|
"learning_rate": 1.8548963545389563e-05, |
|
"loss": 0.0254, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4931807518005371, |
|
"learning_rate": 1.853466761972838e-05, |
|
"loss": 0.0163, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.7079238891601562, |
|
"learning_rate": 1.8520371694067193e-05, |
|
"loss": 0.027, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.6107800006866455, |
|
"learning_rate": 1.8506075768406007e-05, |
|
"loss": 0.0353, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5169730186462402, |
|
"learning_rate": 1.849177984274482e-05, |
|
"loss": 0.0353, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.6394426822662354, |
|
"learning_rate": 1.8477483917083633e-05, |
|
"loss": 0.0175, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5369437336921692, |
|
"learning_rate": 1.8463187991422447e-05, |
|
"loss": 0.0456, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.259932041168213, |
|
"learning_rate": 1.844889206576126e-05, |
|
"loss": 0.0573, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5754424333572388, |
|
"learning_rate": 1.8434596140100073e-05, |
|
"loss": 0.0208, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.48612886667251587, |
|
"learning_rate": 1.8420300214438887e-05, |
|
"loss": 0.0181, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.8483503460884094, |
|
"learning_rate": 1.84060042887777e-05, |
|
"loss": 0.0212, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.9669589996337891, |
|
"learning_rate": 1.8391708363116513e-05, |
|
"loss": 0.0355, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.6289377212524414, |
|
"learning_rate": 1.8377412437455326e-05, |
|
"loss": 0.0288, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.055770993232727, |
|
"learning_rate": 1.836311651179414e-05, |
|
"loss": 0.0321, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5941286683082581, |
|
"learning_rate": 1.8348820586132953e-05, |
|
"loss": 0.0215, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.7000011801719666, |
|
"learning_rate": 1.8334524660471766e-05, |
|
"loss": 0.0295, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.5845941305160522, |
|
"learning_rate": 1.832022873481058e-05, |
|
"loss": 0.0257, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1016316413879395, |
|
"learning_rate": 1.8305932809149393e-05, |
|
"loss": 0.0276, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.8285301327705383, |
|
"learning_rate": 1.8291636883488206e-05, |
|
"loss": 0.0241, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.8674764037132263, |
|
"learning_rate": 1.827734095782702e-05, |
|
"loss": 0.0274, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.654329240322113, |
|
"learning_rate": 1.8263045032165833e-05, |
|
"loss": 0.0213, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.6802071928977966, |
|
"learning_rate": 1.824874910650465e-05, |
|
"loss": 0.0321, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.7723608016967773, |
|
"learning_rate": 1.823445318084346e-05, |
|
"loss": 0.0289, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.9586684703826904, |
|
"learning_rate": 1.8220157255182273e-05, |
|
"loss": 0.0264, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.758579432964325, |
|
"learning_rate": 1.820586132952109e-05, |
|
"loss": 0.0278, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.0153886079788208, |
|
"learning_rate": 1.81915654038599e-05, |
|
"loss": 0.0305, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1771838665008545, |
|
"learning_rate": 1.8177269478198717e-05, |
|
"loss": 0.0512, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2490832805633545, |
|
"learning_rate": 1.816297355253753e-05, |
|
"loss": 0.0295, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.5716216564178467, |
|
"learning_rate": 1.814867762687634e-05, |
|
"loss": 0.0246, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.7177873253822327, |
|
"learning_rate": 1.8134381701215157e-05, |
|
"loss": 0.0293, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.5648506879806519, |
|
"learning_rate": 1.812008577555397e-05, |
|
"loss": 0.0225, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.721409261226654, |
|
"learning_rate": 1.8105789849892783e-05, |
|
"loss": 0.0242, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6926946640014648, |
|
"learning_rate": 1.8091493924231596e-05, |
|
"loss": 0.0276, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6999531388282776, |
|
"learning_rate": 1.807719799857041e-05, |
|
"loss": 0.0187, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.7933658957481384, |
|
"learning_rate": 1.8062902072909223e-05, |
|
"loss": 0.0244, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.4972588121891022, |
|
"learning_rate": 1.8048606147248036e-05, |
|
"loss": 0.0418, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.0972784757614136, |
|
"learning_rate": 1.803431022158685e-05, |
|
"loss": 0.0319, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.2647324800491333, |
|
"learning_rate": 1.8020014295925663e-05, |
|
"loss": 0.0325, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.8409667015075684, |
|
"learning_rate": 1.8005718370264476e-05, |
|
"loss": 0.0236, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6574044227600098, |
|
"learning_rate": 1.799142244460329e-05, |
|
"loss": 0.032, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.5404456257820129, |
|
"learning_rate": 1.7977126518942103e-05, |
|
"loss": 0.0313, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.7715393900871277, |
|
"learning_rate": 1.7962830593280916e-05, |
|
"loss": 0.0252, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3372646570205688, |
|
"learning_rate": 1.794853466761973e-05, |
|
"loss": 0.0268, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.5368986129760742, |
|
"learning_rate": 1.7934238741958543e-05, |
|
"loss": 0.0234, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.026826824992895126, |
|
"eval_pearson_cosine": 0.9529420998918104, |
|
"eval_pearson_dot": 0.951139070288119, |
|
"eval_pearson_euclidean": 0.945750278447336, |
|
"eval_pearson_manhattan": 0.945116677167893, |
|
"eval_pearson_max": 0.9529420998918104, |
|
"eval_runtime": 249.7054, |
|
"eval_samples_per_second": 2.002, |
|
"eval_spearman_cosine": 0.9627865603907902, |
|
"eval_spearman_dot": 0.9601426124824484, |
|
"eval_spearman_euclidean": 0.9601416524781244, |
|
"eval_spearman_manhattan": 0.9589273910090907, |
|
"eval_spearman_max": 0.9627865603907902, |
|
"eval_steps_per_second": 2.002, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.749082624912262, |
|
"learning_rate": 1.7919942816297356e-05, |
|
"loss": 0.0222, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.8751634955406189, |
|
"learning_rate": 1.790564689063617e-05, |
|
"loss": 0.0267, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.622231125831604, |
|
"learning_rate": 1.7891350964974983e-05, |
|
"loss": 0.02, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.1356748342514038, |
|
"learning_rate": 1.7877055039313796e-05, |
|
"loss": 0.0286, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6375044584274292, |
|
"learning_rate": 1.786275911365261e-05, |
|
"loss": 0.024, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.9166258573532104, |
|
"learning_rate": 1.7848463187991427e-05, |
|
"loss": 0.0285, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.7663798332214355, |
|
"learning_rate": 1.7834167262330236e-05, |
|
"loss": 0.0188, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.0997885465621948, |
|
"learning_rate": 1.781987133666905e-05, |
|
"loss": 0.0345, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.8155802488327026, |
|
"learning_rate": 1.7805575411007866e-05, |
|
"loss": 0.0291, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5071162581443787, |
|
"learning_rate": 1.7791279485346676e-05, |
|
"loss": 0.0225, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4950205981731415, |
|
"learning_rate": 1.777698355968549e-05, |
|
"loss": 0.0255, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6503371000289917, |
|
"learning_rate": 1.7762687634024306e-05, |
|
"loss": 0.0353, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4491406977176666, |
|
"learning_rate": 1.7748391708363116e-05, |
|
"loss": 0.0287, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5856379270553589, |
|
"learning_rate": 1.7734095782701933e-05, |
|
"loss": 0.029, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.9508484601974487, |
|
"learning_rate": 1.7719799857040746e-05, |
|
"loss": 0.02, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2820254564285278, |
|
"learning_rate": 1.7705503931379556e-05, |
|
"loss": 0.0323, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.885857105255127, |
|
"learning_rate": 1.7691208005718373e-05, |
|
"loss": 0.0241, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5690828561782837, |
|
"learning_rate": 1.7676912080057183e-05, |
|
"loss": 0.0205, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.41895803809165955, |
|
"learning_rate": 1.7662616154396e-05, |
|
"loss": 0.0188, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.8826864361763, |
|
"learning_rate": 1.7648320228734813e-05, |
|
"loss": 0.026, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.9115862250328064, |
|
"learning_rate": 1.7634024303073623e-05, |
|
"loss": 0.0328, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.5499801635742188, |
|
"learning_rate": 1.761972837741244e-05, |
|
"loss": 0.0385, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.07961106300354, |
|
"learning_rate": 1.7605432451751253e-05, |
|
"loss": 0.0276, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.7965257167816162, |
|
"learning_rate": 1.7591136526090066e-05, |
|
"loss": 0.0305, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.47339317202568054, |
|
"learning_rate": 1.757684060042888e-05, |
|
"loss": 0.0181, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.6750252842903137, |
|
"learning_rate": 1.7562544674767693e-05, |
|
"loss": 0.0208, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.9396247267723083, |
|
"learning_rate": 1.7548248749106506e-05, |
|
"loss": 0.0361, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.0881524085998535, |
|
"learning_rate": 1.753395282344532e-05, |
|
"loss": 0.0331, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.7517051100730896, |
|
"learning_rate": 1.7519656897784133e-05, |
|
"loss": 0.0281, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.7083280682563782, |
|
"learning_rate": 1.7505360972122946e-05, |
|
"loss": 0.0209, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.727603018283844, |
|
"learning_rate": 1.749106504646176e-05, |
|
"loss": 0.0312, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.4598117470741272, |
|
"learning_rate": 1.7476769120800573e-05, |
|
"loss": 0.02, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.6653364896774292, |
|
"learning_rate": 1.7462473195139386e-05, |
|
"loss": 0.0263, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.527958869934082, |
|
"learning_rate": 1.74481772694782e-05, |
|
"loss": 0.0176, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.9711959362030029, |
|
"learning_rate": 1.7433881343817013e-05, |
|
"loss": 0.0336, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.8734799027442932, |
|
"learning_rate": 1.7419585418155826e-05, |
|
"loss": 0.0384, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.599764883518219, |
|
"learning_rate": 1.740528949249464e-05, |
|
"loss": 0.0317, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6115812659263611, |
|
"learning_rate": 1.7390993566833453e-05, |
|
"loss": 0.0246, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.660057008266449, |
|
"learning_rate": 1.7376697641172266e-05, |
|
"loss": 0.027, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.2455826997756958, |
|
"learning_rate": 1.736240171551108e-05, |
|
"loss": 0.0434, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.07332181930542, |
|
"learning_rate": 1.7348105789849893e-05, |
|
"loss": 0.038, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6912384629249573, |
|
"learning_rate": 1.733380986418871e-05, |
|
"loss": 0.0252, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.7599236369132996, |
|
"learning_rate": 1.731951393852752e-05, |
|
"loss": 0.025, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.7361788153648376, |
|
"learning_rate": 1.7305218012866333e-05, |
|
"loss": 0.0413, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6756102442741394, |
|
"learning_rate": 1.729092208720515e-05, |
|
"loss": 0.0374, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.5338143706321716, |
|
"learning_rate": 1.727662616154396e-05, |
|
"loss": 0.0226, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.5998290777206421, |
|
"learning_rate": 1.7262330235882773e-05, |
|
"loss": 0.0244, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.5274918079376221, |
|
"learning_rate": 1.724803431022159e-05, |
|
"loss": 0.0267, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6132778525352478, |
|
"learning_rate": 1.72337383845604e-05, |
|
"loss": 0.0351, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7461394667625427, |
|
"learning_rate": 1.7219442458899216e-05, |
|
"loss": 0.0314, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7438216209411621, |
|
"learning_rate": 1.720514653323803e-05, |
|
"loss": 0.0261, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.730912983417511, |
|
"learning_rate": 1.719085060757684e-05, |
|
"loss": 0.0229, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2940622568130493, |
|
"learning_rate": 1.7176554681915656e-05, |
|
"loss": 0.0273, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7200002670288086, |
|
"learning_rate": 1.716225875625447e-05, |
|
"loss": 0.0267, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3229494094848633, |
|
"learning_rate": 1.7147962830593283e-05, |
|
"loss": 0.0334, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7718178033828735, |
|
"learning_rate": 1.7133666904932096e-05, |
|
"loss": 0.0312, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.6765419244766235, |
|
"learning_rate": 1.711937097927091e-05, |
|
"loss": 0.0236, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.753416121006012, |
|
"learning_rate": 1.7105075053609723e-05, |
|
"loss": 0.0271, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.6442211866378784, |
|
"learning_rate": 1.7090779127948536e-05, |
|
"loss": 0.0151, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5224249362945557, |
|
"learning_rate": 1.707648320228735e-05, |
|
"loss": 0.0355, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7751701474189758, |
|
"learning_rate": 1.7062187276626163e-05, |
|
"loss": 0.0251, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7004714012145996, |
|
"learning_rate": 1.7047891350964976e-05, |
|
"loss": 0.0171, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7819869518280029, |
|
"learning_rate": 1.703359542530379e-05, |
|
"loss": 0.0309, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5115138292312622, |
|
"learning_rate": 1.7019299499642603e-05, |
|
"loss": 0.0321, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3391636610031128, |
|
"learning_rate": 1.7005003573981416e-05, |
|
"loss": 0.0221, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2895567417144775, |
|
"learning_rate": 1.699070764832023e-05, |
|
"loss": 0.0317, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.7947621941566467, |
|
"learning_rate": 1.6976411722659043e-05, |
|
"loss": 0.0266, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.7865754961967468, |
|
"learning_rate": 1.6962115796997856e-05, |
|
"loss": 0.0421, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.002314567565918, |
|
"learning_rate": 1.694781987133667e-05, |
|
"loss": 0.0308, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.6675818562507629, |
|
"learning_rate": 1.6933523945675483e-05, |
|
"loss": 0.0202, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2045787572860718, |
|
"learning_rate": 1.6919228020014296e-05, |
|
"loss": 0.0387, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5665823817253113, |
|
"learning_rate": 1.690493209435311e-05, |
|
"loss": 0.0149, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5733370780944824, |
|
"learning_rate": 1.6890636168691926e-05, |
|
"loss": 0.024, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.32628941535949707, |
|
"learning_rate": 1.6876340243030736e-05, |
|
"loss": 0.0181, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5624873638153076, |
|
"learning_rate": 1.686204431736955e-05, |
|
"loss": 0.0308, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.02633051760494709, |
|
"eval_pearson_cosine": 0.9510150119057015, |
|
"eval_pearson_dot": 0.9497414425975934, |
|
"eval_pearson_euclidean": 0.9338662221774623, |
|
"eval_pearson_manhattan": 0.9334364652708103, |
|
"eval_pearson_max": 0.9510150119057015, |
|
"eval_runtime": 249.0704, |
|
"eval_samples_per_second": 2.007, |
|
"eval_spearman_cosine": 0.9551964302048428, |
|
"eval_spearman_dot": 0.9547767163144516, |
|
"eval_spearman_euclidean": 0.9485047200653796, |
|
"eval_spearman_manhattan": 0.9472191302750829, |
|
"eval_spearman_max": 0.9551964302048428, |
|
"eval_steps_per_second": 2.007, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.4453175663948059, |
|
"learning_rate": 1.6847748391708366e-05, |
|
"loss": 0.0251, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.8831977844238281, |
|
"learning_rate": 1.6833452466047176e-05, |
|
"loss": 0.0253, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.9551718831062317, |
|
"learning_rate": 1.6819156540385993e-05, |
|
"loss": 0.0451, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.9147098064422607, |
|
"learning_rate": 1.6804860614724806e-05, |
|
"loss": 0.0233, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.8430941104888916, |
|
"learning_rate": 1.6790564689063616e-05, |
|
"loss": 0.0224, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6627079844474792, |
|
"learning_rate": 1.6776268763402433e-05, |
|
"loss": 0.0221, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.7855361104011536, |
|
"learning_rate": 1.6761972837741246e-05, |
|
"loss": 0.0307, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.45924311876296997, |
|
"learning_rate": 1.6747676912080056e-05, |
|
"loss": 0.0257, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.7786813378334045, |
|
"learning_rate": 1.6733380986418873e-05, |
|
"loss": 0.0345, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5647817254066467, |
|
"learning_rate": 1.6719085060757686e-05, |
|
"loss": 0.0181, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.0387967824935913, |
|
"learning_rate": 1.67047891350965e-05, |
|
"loss": 0.0297, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5947245359420776, |
|
"learning_rate": 1.6690493209435313e-05, |
|
"loss": 0.0239, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5582525134086609, |
|
"learning_rate": 1.6676197283774126e-05, |
|
"loss": 0.0305, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.664482057094574, |
|
"learning_rate": 1.666190135811294e-05, |
|
"loss": 0.0249, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.9069839715957642, |
|
"learning_rate": 1.6647605432451753e-05, |
|
"loss": 0.0298, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.8026562333106995, |
|
"learning_rate": 1.6633309506790566e-05, |
|
"loss": 0.0203, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6699361801147461, |
|
"learning_rate": 1.661901358112938e-05, |
|
"loss": 0.0312, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6312674283981323, |
|
"learning_rate": 1.6604717655468193e-05, |
|
"loss": 0.0204, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4308748245239258, |
|
"learning_rate": 1.6590421729807006e-05, |
|
"loss": 0.0209, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3553575277328491, |
|
"learning_rate": 1.657612580414582e-05, |
|
"loss": 0.0257, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.4249012470245361, |
|
"learning_rate": 1.6561829878484633e-05, |
|
"loss": 0.0306, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4437320828437805, |
|
"learning_rate": 1.6547533952823446e-05, |
|
"loss": 0.017, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.6957021951675415, |
|
"learning_rate": 1.653323802716226e-05, |
|
"loss": 0.0227, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5041042566299438, |
|
"learning_rate": 1.6518942101501073e-05, |
|
"loss": 0.0228, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.6849528551101685, |
|
"learning_rate": 1.6504646175839886e-05, |
|
"loss": 0.0205, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.8483012318611145, |
|
"learning_rate": 1.64903502501787e-05, |
|
"loss": 0.0336, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.9397591352462769, |
|
"learning_rate": 1.6476054324517513e-05, |
|
"loss": 0.029, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.9127416014671326, |
|
"learning_rate": 1.6461758398856326e-05, |
|
"loss": 0.0225, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.8871793150901794, |
|
"learning_rate": 1.6447462473195143e-05, |
|
"loss": 0.0185, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.7608364224433899, |
|
"learning_rate": 1.6433166547533953e-05, |
|
"loss": 0.0273, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.778032660484314, |
|
"learning_rate": 1.6418870621872766e-05, |
|
"loss": 0.02, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.6790982484817505, |
|
"learning_rate": 1.6404574696211583e-05, |
|
"loss": 0.025, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4867960810661316, |
|
"learning_rate": 1.6390278770550393e-05, |
|
"loss": 0.0175, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5005691647529602, |
|
"learning_rate": 1.637598284488921e-05, |
|
"loss": 0.0329, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.7641982436180115, |
|
"learning_rate": 1.6361686919228023e-05, |
|
"loss": 0.0312, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.112260103225708, |
|
"learning_rate": 1.6347390993566833e-05, |
|
"loss": 0.0289, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8543418049812317, |
|
"learning_rate": 1.633309506790565e-05, |
|
"loss": 0.0205, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5461686849594116, |
|
"learning_rate": 1.6318799142244463e-05, |
|
"loss": 0.0155, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5375934839248657, |
|
"learning_rate": 1.6304503216583276e-05, |
|
"loss": 0.0237, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.6225507855415344, |
|
"learning_rate": 1.629020729092209e-05, |
|
"loss": 0.031, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2959158420562744, |
|
"learning_rate": 1.62759113652609e-05, |
|
"loss": 0.0315, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8647311329841614, |
|
"learning_rate": 1.6261615439599716e-05, |
|
"loss": 0.0204, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5671218633651733, |
|
"learning_rate": 1.624731951393853e-05, |
|
"loss": 0.0182, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5024406313896179, |
|
"learning_rate": 1.623302358827734e-05, |
|
"loss": 0.0257, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4015944004058838, |
|
"learning_rate": 1.6218727662616156e-05, |
|
"loss": 0.023, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5640401840209961, |
|
"learning_rate": 1.620443173695497e-05, |
|
"loss": 0.0179, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.7147939205169678, |
|
"learning_rate": 1.6190135811293783e-05, |
|
"loss": 0.0176, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8704376816749573, |
|
"learning_rate": 1.6175839885632596e-05, |
|
"loss": 0.02, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.7839465737342834, |
|
"learning_rate": 1.616154395997141e-05, |
|
"loss": 0.0209, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4096917510032654, |
|
"learning_rate": 1.6147248034310223e-05, |
|
"loss": 0.0204, |
|
"step": 1700 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 6218, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|