diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14481 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15, + "eval_steps": 100, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 0.06993763389097322, + "learning_rate": 1.2553691692674561e-06, + "loss": 3.0703, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 0.06153174752924976, + "learning_rate": 2.5107383385349122e-06, + "loss": 2.9844, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 0.2222815841415942, + "learning_rate": 3.245082227117844e-06, + "loss": 2.1152, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 0.0537359374559032, + "learning_rate": 3.7661075078023677e-06, + "loss": 2.9844, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 0.06405006802699156, + "learning_rate": 4.170246112844972e-06, + "loss": 2.6562, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 0.11637142063571428, + "learning_rate": 4.5004513963853e-06, + "loss": 2.4043, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 0.1892893466719824, + "learning_rate": 4.779635985609814e-06, + "loss": 3.4062, + "step": 14 + }, + { + "epoch": 0.0, + "grad_norm": 0.048251497571099866, + "learning_rate": 5.0214766770698244e-06, + "loss": 2.1211, + "step": 16 + }, + { + "epoch": 0.0, + "grad_norm": 0.0568415678322659, + "learning_rate": 5.234795284968231e-06, + "loss": 2.1914, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 0.200681152700164, + "learning_rate": 5.425615282112428e-06, + "loss": 3.1094, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 0.18051216338180223, + "learning_rate": 5.598232966493732e-06, + "loss": 3.5078, + "step": 22 + }, + { + "epoch": 0.0, + "grad_norm": 0.06093213542225355, + "learning_rate": 5.755820565652757e-06, + "loss": 2.1797, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 0.05166025684541596, + "learning_rate": 5.900787104154539e-06, + "loss": 1.8945, + "step": 26 + }, + { + "epoch": 0.0, + "grad_norm": 0.07724068655290238, + "learning_rate": 6.03500515487727e-06, + "loss": 2.3125, + "step": 28 + }, + { + "epoch": 0.0, + "grad_norm": 0.10396538948031947, + "learning_rate": 6.159959170695358e-06, + "loss": 2.0352, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 0.054259556614586625, + "learning_rate": 6.27684584633728e-06, + "loss": 1.6758, + "step": 32 + }, + { + "epoch": 0.0, + "grad_norm": 0.07654097884702263, + "learning_rate": 6.386644000699491e-06, + "loss": 2.6172, + "step": 34 + }, + { + "epoch": 0.0, + "grad_norm": 0.13452991659941343, + "learning_rate": 6.490164454235688e-06, + "loss": 2.0039, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 0.0511568875605143, + "learning_rate": 6.5880864029275e-06, + "loss": 3.2227, + "step": 38 + }, + { + "epoch": 0.0, + "grad_norm": 0.05027966184401755, + "learning_rate": 6.680984451379884e-06, + "loss": 1.9648, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 0.05870045244699116, + "learning_rate": 6.769349043460203e-06, + "loss": 1.418, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 0.06159022656905628, + "learning_rate": 6.853602135761187e-06, + "loss": 4.0156, + "step": 44 + }, + { + "epoch": 0.0, + "grad_norm": 0.13273234613359977, + "learning_rate": 6.934109384172617e-06, + "loss": 3.2891, + "step": 46 + }, + { + "epoch": 0.0, + "grad_norm": 0.052052813270225366, + "learning_rate": 7.011189734920213e-06, + "loss": 2.6172, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 0.15961881341722708, + "learning_rate": 7.085123056422486e-06, + "loss": 4.6094, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 0.049703982396482015, + "learning_rate": 7.156156273421995e-06, + "loss": 2.9766, + "step": 52 + }, + { + "epoch": 0.0, + "grad_norm": 0.1779622827984218, + "learning_rate": 7.224508342818619e-06, + "loss": 2.7109, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 0.05820561642082821, + "learning_rate": 7.290374324144728e-06, + "loss": 4.0156, + "step": 56 + }, + { + "epoch": 0.0, + "grad_norm": 0.1367064129848971, + "learning_rate": 7.3539287354378455e-06, + "loss": 2.0254, + "step": 58 + }, + { + "epoch": 0.0, + "grad_norm": 0.07025650563122704, + "learning_rate": 7.415328339962814e-06, + "loss": 3.0938, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 0.07864152211515654, + "learning_rate": 7.474714475825724e-06, + "loss": 3.2656, + "step": 62 + }, + { + "epoch": 0.0, + "grad_norm": 0.10434333055982575, + "learning_rate": 7.532215015604735e-06, + "loss": 2.5859, + "step": 64 + }, + { + "epoch": 0.0, + "grad_norm": 0.08468608134478554, + "learning_rate": 7.587946024344118e-06, + "loss": 2.9844, + "step": 66 + }, + { + "epoch": 0.0, + "grad_norm": 0.06122282169051712, + "learning_rate": 7.642013169966947e-06, + "loss": 2.5859, + "step": 68 + }, + { + "epoch": 0.0, + "grad_norm": 0.0865492074104283, + "learning_rate": 7.69451292918733e-06, + "loss": 3.1172, + "step": 70 + }, + { + "epoch": 0.0, + "grad_norm": 0.10794311555188178, + "learning_rate": 7.745533623503144e-06, + "loss": 1.6191, + "step": 72 + }, + { + "epoch": 0.0, + "grad_norm": 0.1030752468383858, + "learning_rate": 7.795156313214624e-06, + "loss": 2.3516, + "step": 74 + }, + { + "epoch": 0.0, + "grad_norm": 0.059585575196562775, + "learning_rate": 7.843455572194956e-06, + "loss": 3.1016, + "step": 76 + }, + { + "epoch": 0.0, + "grad_norm": 0.06094201801741937, + "learning_rate": 7.890500162004926e-06, + "loss": 3.7188, + "step": 78 + }, + { + "epoch": 0.0, + "grad_norm": 0.06172587189145012, + "learning_rate": 7.93635362064734e-06, + "loss": 3.2188, + "step": 80 + }, + { + "epoch": 0.0, + "grad_norm": 0.05842479147840705, + "learning_rate": 7.981074778612054e-06, + "loss": 2.5859, + "step": 82 + }, + { + "epoch": 0.0, + "grad_norm": 0.08713550699538668, + "learning_rate": 8.024718212727658e-06, + "loss": 4.25, + "step": 84 + }, + { + "epoch": 0.0, + "grad_norm": 0.05385791652102987, + "learning_rate": 8.067334646603105e-06, + "loss": 2.1328, + "step": 86 + }, + { + "epoch": 0.0, + "grad_norm": 0.061292113145551747, + "learning_rate": 8.108971305028645e-06, + "loss": 2.7227, + "step": 88 + }, + { + "epoch": 0.0, + "grad_norm": 0.04357968658567487, + "learning_rate": 8.149672228545746e-06, + "loss": 2.7344, + "step": 90 + }, + { + "epoch": 0.0, + "grad_norm": 0.0604204079926121, + "learning_rate": 8.189478553440074e-06, + "loss": 3.0781, + "step": 92 + }, + { + "epoch": 0.0, + "grad_norm": 0.05658025854347226, + "learning_rate": 8.228428761620285e-06, + "loss": 2.6641, + "step": 94 + }, + { + "epoch": 0.0, + "grad_norm": 0.048768682059982936, + "learning_rate": 8.266558904187668e-06, + "loss": 1.9219, + "step": 96 + }, + { + "epoch": 0.0, + "grad_norm": 0.049735516778038245, + "learning_rate": 8.303902801952174e-06, + "loss": 3.8281, + "step": 98 + }, + { + "epoch": 0.01, + "grad_norm": 0.05805351434989718, + "learning_rate": 8.340492225689944e-06, + "loss": 3.6328, + "step": 100 + }, + { + "epoch": 0.01, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.66924285888672, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 13.6882, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 4.676, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.073, + "step": 100 + }, + { + "epoch": 0.01, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.59375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.540794372558594, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 3.573, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 17.912, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.28, + "step": 100 + }, + { + "epoch": 0.01, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87890625, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.20653533935547, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 12.4184, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.154, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.081, + "step": 100 + }, + { + "epoch": 0.01, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.4765625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.46875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.372718811035156, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 15.9617, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.01, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 100 + }, + { + "epoch": 0.01, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.9609375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.66063690185547, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.8761, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.389, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.084, + "step": 100 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.05279541015625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 12.0374, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.317, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.083, + "step": 100 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.0390625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.59375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.44341278076172, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.9454, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.358, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.084, + "step": 100 + }, + { + "epoch": 0.01, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 3.015625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.08790588378906, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 17.8889, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.578, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.01, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.5, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.1015625, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.84375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.242340087890625, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.6659, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 9.601, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.15, + "step": 100 + }, + { + "epoch": 0.01, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.578125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.9529800415039, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.2652, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.743, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.121, + "step": 100 + }, + { + "epoch": 0.01, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.82560729980469, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 6.2525, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 10.236, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.16, + "step": 100 + }, + { + "epoch": 0.01, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.8125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 73.828125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 55.7899284362793, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 15.3397, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.172, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.065, + "step": 100 + }, + { + "epoch": 0.01, + "grad_norm": 0.0555762154078443, + "learning_rate": 8.376357058549878e-06, + "loss": 3.8516, + "step": 102 + }, + { + "epoch": 0.01, + "grad_norm": 0.09096721847690384, + "learning_rate": 8.41152544268945e-06, + "loss": 3.0312, + "step": 104 + }, + { + "epoch": 0.01, + "grad_norm": 0.09138371442473803, + "learning_rate": 8.446023911942528e-06, + "loss": 3.2656, + "step": 106 + }, + { + "epoch": 0.01, + "grad_norm": 0.05511478622620939, + "learning_rate": 8.479877512086076e-06, + "loss": 3.6562, + "step": 108 + }, + { + "epoch": 0.01, + "grad_norm": 0.08097997634746824, + "learning_rate": 8.513109910071246e-06, + "loss": 3.375, + "step": 110 + }, + { + "epoch": 0.01, + "grad_norm": 0.053618034298021276, + "learning_rate": 8.545743493412182e-06, + "loss": 2.5977, + "step": 112 + }, + { + "epoch": 0.01, + "grad_norm": 0.048484076517767906, + "learning_rate": 8.577799460777888e-06, + "loss": 2.3438, + "step": 114 + }, + { + "epoch": 0.01, + "grad_norm": 0.04762160994558407, + "learning_rate": 8.609297904705302e-06, + "loss": 2.5625, + "step": 116 + }, + { + "epoch": 0.01, + "grad_norm": 0.056551775221808875, + "learning_rate": 8.640257887241806e-06, + "loss": 3.6562, + "step": 118 + }, + { + "epoch": 0.01, + "grad_norm": 0.08650912291299868, + "learning_rate": 8.67069750923027e-06, + "loss": 3.0781, + "step": 120 + }, + { + "epoch": 0.01, + "grad_norm": 0.05024595516846764, + "learning_rate": 8.700633973867262e-06, + "loss": 2.5312, + "step": 122 + }, + { + "epoch": 0.01, + "grad_norm": 0.08139131343094162, + "learning_rate": 8.73008364509318e-06, + "loss": 2.9844, + "step": 124 + }, + { + "epoch": 0.01, + "grad_norm": 0.05261944016574215, + "learning_rate": 8.75906210131059e-06, + "loss": 3.1719, + "step": 126 + }, + { + "epoch": 0.01, + "grad_norm": 0.04990762703132796, + "learning_rate": 8.787584184872193e-06, + "loss": 2.8438, + "step": 128 + }, + { + "epoch": 0.01, + "grad_norm": 0.11405115920346946, + "learning_rate": 8.815664047732054e-06, + "loss": 3.625, + "step": 130 + }, + { + "epoch": 0.01, + "grad_norm": 0.046456081831816425, + "learning_rate": 8.843315193611574e-06, + "loss": 3.4141, + "step": 132 + }, + { + "epoch": 0.01, + "grad_norm": 0.05027807580007265, + "learning_rate": 8.870550516994724e-06, + "loss": 2.5742, + "step": 134 + }, + { + "epoch": 0.01, + "grad_norm": 0.04783365099303492, + "learning_rate": 8.897382339234405e-06, + "loss": 2.207, + "step": 136 + }, + { + "epoch": 0.01, + "grad_norm": 0.053648309608052606, + "learning_rate": 8.923822442023006e-06, + "loss": 3.625, + "step": 138 + }, + { + "epoch": 0.01, + "grad_norm": 0.05821291421967534, + "learning_rate": 8.949882098454784e-06, + "loss": 2.3711, + "step": 140 + }, + { + "epoch": 0.01, + "grad_norm": 0.07947431557911265, + "learning_rate": 8.975572101884981e-06, + "loss": 3.0, + "step": 142 + }, + { + "epoch": 0.01, + "grad_norm": 0.042870498622682544, + "learning_rate": 9.0009027927706e-06, + "loss": 3.3047, + "step": 144 + }, + { + "epoch": 0.01, + "grad_norm": 0.04492287531029236, + "learning_rate": 9.025884083659961e-06, + "loss": 2.5508, + "step": 146 + }, + { + "epoch": 0.01, + "grad_norm": 0.055349818184018997, + "learning_rate": 9.05052548248208e-06, + "loss": 2.0547, + "step": 148 + }, + { + "epoch": 0.01, + "grad_norm": 0.06308661407367727, + "learning_rate": 9.074836114272873e-06, + "loss": 3.4922, + "step": 150 + }, + { + "epoch": 0.01, + "grad_norm": 0.055429411592394985, + "learning_rate": 9.098824741462414e-06, + "loss": 3.8359, + "step": 152 + }, + { + "epoch": 0.01, + "grad_norm": 0.04545788666314569, + "learning_rate": 9.12249978283609e-06, + "loss": 3.5547, + "step": 154 + }, + { + "epoch": 0.01, + "grad_norm": 0.05213374911481201, + "learning_rate": 9.145869331272382e-06, + "loss": 1.8984, + "step": 156 + }, + { + "epoch": 0.01, + "grad_norm": 0.0929787972975564, + "learning_rate": 9.16894117035073e-06, + "loss": 3.4375, + "step": 158 + }, + { + "epoch": 0.01, + "grad_norm": 0.06586975877837985, + "learning_rate": 9.191722789914796e-06, + "loss": 3.2578, + "step": 160 + }, + { + "epoch": 0.01, + "grad_norm": 0.07128694282933898, + "learning_rate": 9.214221400669006e-06, + "loss": 3.2266, + "step": 162 + }, + { + "epoch": 0.01, + "grad_norm": 0.05663837642152596, + "learning_rate": 9.23644394787951e-06, + "loss": 2.9766, + "step": 164 + }, + { + "epoch": 0.01, + "grad_norm": 0.04490447484833529, + "learning_rate": 9.258397124244722e-06, + "loss": 2.75, + "step": 166 + }, + { + "epoch": 0.01, + "grad_norm": 0.05914922510919387, + "learning_rate": 9.280087381995114e-06, + "loss": 3.8125, + "step": 168 + }, + { + "epoch": 0.01, + "grad_norm": 0.055595307384766375, + "learning_rate": 9.301520944277006e-06, + "loss": 2.3398, + "step": 170 + }, + { + "epoch": 0.01, + "grad_norm": 0.04965775451157443, + "learning_rate": 9.32270381587056e-06, + "loss": 2.4844, + "step": 172 + }, + { + "epoch": 0.01, + "grad_norm": 0.05284752010553776, + "learning_rate": 9.343641793288234e-06, + "loss": 2.6719, + "step": 174 + }, + { + "epoch": 0.01, + "grad_norm": 0.19756340929876257, + "learning_rate": 9.3643404742961e-06, + "loss": 1.041, + "step": 176 + }, + { + "epoch": 0.01, + "grad_norm": 0.045348920860207864, + "learning_rate": 9.384805266897236e-06, + "loss": 2.5625, + "step": 178 + }, + { + "epoch": 0.01, + "grad_norm": 0.06276055132520378, + "learning_rate": 9.405041397813202e-06, + "loss": 2.3047, + "step": 180 + }, + { + "epoch": 0.01, + "grad_norm": 0.058813238321584256, + "learning_rate": 9.425053920496896e-06, + "loss": 2.5117, + "step": 182 + }, + { + "epoch": 0.01, + "grad_norm": 0.0661215147360088, + "learning_rate": 9.44484772270753e-06, + "loss": 3.5703, + "step": 184 + }, + { + "epoch": 0.01, + "grad_norm": 0.05292887713455962, + "learning_rate": 9.464427533676113e-06, + "loss": 4.0938, + "step": 186 + }, + { + "epoch": 0.01, + "grad_norm": 0.05263024221290531, + "learning_rate": 9.483797930887741e-06, + "loss": 2.457, + "step": 188 + }, + { + "epoch": 0.01, + "grad_norm": 0.04471022826113325, + "learning_rate": 9.502963346505015e-06, + "loss": 2.7891, + "step": 190 + }, + { + "epoch": 0.01, + "grad_norm": 0.08212123204504364, + "learning_rate": 9.521928073455125e-06, + "loss": 2.1836, + "step": 192 + }, + { + "epoch": 0.01, + "grad_norm": 0.05038826441595468, + "learning_rate": 9.540696271201526e-06, + "loss": 1.582, + "step": 194 + }, + { + "epoch": 0.01, + "grad_norm": 0.05778434920226073, + "learning_rate": 9.559271971219628e-06, + "loss": 3.6328, + "step": 196 + }, + { + "epoch": 0.01, + "grad_norm": 0.04973184410313608, + "learning_rate": 9.577659082194506e-06, + "loss": 1.6289, + "step": 198 + }, + { + "epoch": 0.01, + "grad_norm": 0.09038426203739623, + "learning_rate": 9.595861394957398e-06, + "loss": 3.7031, + "step": 200 + }, + { + "epoch": 0.01, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.5916748046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5049, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.563, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 200 + }, + { + "epoch": 0.01, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.6689510345459, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7618, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.174, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.362, + "step": 200 + }, + { + "epoch": 0.01, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.7734375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.84564971923828, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8596, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.893, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 200 + }, + { + "epoch": 0.01, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.992000579833984, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.9761, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.273, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.067, + "step": 200 + }, + { + "epoch": 0.01, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.3515625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.20314025878906, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8663, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.89, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 200 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.29474639892578, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0218, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.807, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 200 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.84375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.7890625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 62.934173583984375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8548, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.896, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 200 + }, + { + "epoch": 0.01, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.3828125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.890625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.17569732666016, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3741, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.909, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 200 + }, + { + "epoch": 0.01, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.546875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.171875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.256744384765625, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.4017, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.848, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.185, + "step": 200 + }, + { + "epoch": 0.01, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.2890625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.64799499511719, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 5.7737, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 11.085, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.173, + "step": 200 + }, + { + "epoch": 0.01, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.96875, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.84375, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.71356964111328, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1763, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.364, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.193, + "step": 200 + }, + { + "epoch": 0.01, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 27.734375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 64.84375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 50.73663330078125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1216, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.532, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, + "step": 200 + }, + { + "epoch": 0.01, + "grad_norm": 0.05187297999516563, + "learning_rate": 9.613882587176614e-06, + "loss": 1.6211, + "step": 202 + }, + { + "epoch": 0.01, + "grad_norm": 0.06054422157379995, + "learning_rate": 9.631726227817333e-06, + "loss": 2.4805, + "step": 204 + }, + { + "epoch": 0.01, + "grad_norm": 0.05470661272122422, + "learning_rate": 9.64939578138386e-06, + "loss": 2.8594, + "step": 206 + }, + { + "epoch": 0.01, + "grad_norm": 0.04252591798544973, + "learning_rate": 9.666894611956906e-06, + "loss": 2.2148, + "step": 208 + }, + { + "epoch": 0.01, + "grad_norm": 0.055328731041222816, + "learning_rate": 9.684225987037717e-06, + "loss": 2.0742, + "step": 210 + }, + { + "epoch": 0.01, + "grad_norm": 0.06429040482274337, + "learning_rate": 9.701393081209986e-06, + "loss": 1.2773, + "step": 212 + }, + { + "epoch": 0.01, + "grad_norm": 0.04865171991818917, + "learning_rate": 9.718398979629844e-06, + "loss": 2.6875, + "step": 214 + }, + { + "epoch": 0.01, + "grad_norm": 0.062086833306091785, + "learning_rate": 9.735246681353532e-06, + "loss": 3.6016, + "step": 216 + }, + { + "epoch": 0.01, + "grad_norm": 0.055348962148643696, + "learning_rate": 9.751939102511684e-06, + "loss": 4.3203, + "step": 218 + }, + { + "epoch": 0.01, + "grad_norm": 0.0654632532081427, + "learning_rate": 9.768479079338704e-06, + "loss": 2.6484, + "step": 220 + }, + { + "epoch": 0.01, + "grad_norm": 0.056276097965491816, + "learning_rate": 9.78486937106501e-06, + "loss": 2.1543, + "step": 222 + }, + { + "epoch": 0.01, + "grad_norm": 0.04384275761621124, + "learning_rate": 9.801112662679638e-06, + "loss": 2.9062, + "step": 224 + }, + { + "epoch": 0.01, + "grad_norm": 0.10285480569491967, + "learning_rate": 9.817211567569991e-06, + "loss": 2.1309, + "step": 226 + }, + { + "epoch": 0.01, + "grad_norm": 0.057563429339058884, + "learning_rate": 9.833168630045344e-06, + "loss": 1.3789, + "step": 228 + }, + { + "epoch": 0.01, + "grad_norm": 0.04845892803303715, + "learning_rate": 9.848986327750132e-06, + "loss": 3.0078, + "step": 230 + }, + { + "epoch": 0.01, + "grad_norm": 0.052079127807501846, + "learning_rate": 9.864667073972758e-06, + "loss": 1.8516, + "step": 232 + }, + { + "epoch": 0.01, + "grad_norm": 0.0580523372207002, + "learning_rate": 9.880213219855314e-06, + "loss": 3.5938, + "step": 234 + }, + { + "epoch": 0.01, + "grad_norm": 0.05520583701375494, + "learning_rate": 9.895627056509262e-06, + "loss": 1.8652, + "step": 236 + }, + { + "epoch": 0.01, + "grad_norm": 0.04670165215553551, + "learning_rate": 9.91091081704185e-06, + "loss": 2.5469, + "step": 238 + }, + { + "epoch": 0.01, + "grad_norm": 0.05111165944588646, + "learning_rate": 9.926066678497726e-06, + "loss": 2.5312, + "step": 240 + }, + { + "epoch": 0.01, + "grad_norm": 0.045520169806954836, + "learning_rate": 9.941096763720006e-06, + "loss": 1.9609, + "step": 242 + }, + { + "epoch": 0.01, + "grad_norm": 0.04443447929589807, + "learning_rate": 9.956003143134718e-06, + "loss": 1.7402, + "step": 244 + }, + { + "epoch": 0.01, + "grad_norm": 0.0457533842247698, + "learning_rate": 9.97078783646244e-06, + "loss": 3.0859, + "step": 246 + }, + { + "epoch": 0.01, + "grad_norm": 0.07183809693572032, + "learning_rate": 9.985452814360637e-06, + "loss": 2.1992, + "step": 248 + }, + { + "epoch": 0.01, + "grad_norm": 0.04695659727754891, + "learning_rate": 1e-05, + "loss": 1.9609, + "step": 250 + }, + { + "epoch": 0.01, + "grad_norm": 0.045207151805051436, + "learning_rate": 9.999493670886077e-06, + "loss": 2.9766, + "step": 252 + }, + { + "epoch": 0.01, + "grad_norm": 0.05494287375989865, + "learning_rate": 9.998481012658229e-06, + "loss": 2.0117, + "step": 254 + }, + { + "epoch": 0.01, + "grad_norm": 0.05104609803909529, + "learning_rate": 9.99746835443038e-06, + "loss": 4.0469, + "step": 256 + }, + { + "epoch": 0.01, + "grad_norm": 0.10716340875144627, + "learning_rate": 9.996455696202532e-06, + "loss": 3.3438, + "step": 258 + }, + { + "epoch": 0.01, + "grad_norm": 0.049844088116010224, + "learning_rate": 9.995443037974684e-06, + "loss": 2.3359, + "step": 260 + }, + { + "epoch": 0.01, + "grad_norm": 0.06172126420851459, + "learning_rate": 9.994430379746836e-06, + "loss": 2.9883, + "step": 262 + }, + { + "epoch": 0.01, + "grad_norm": 0.05462014145721095, + "learning_rate": 9.993417721518988e-06, + "loss": 3.3047, + "step": 264 + }, + { + "epoch": 0.01, + "grad_norm": 0.1061552628130712, + "learning_rate": 9.99240506329114e-06, + "loss": 2.6836, + "step": 266 + }, + { + "epoch": 0.01, + "grad_norm": 0.07377903646023005, + "learning_rate": 9.991392405063292e-06, + "loss": 2.293, + "step": 268 + }, + { + "epoch": 0.01, + "grad_norm": 0.07509279074117713, + "learning_rate": 9.990379746835444e-06, + "loss": 2.9492, + "step": 270 + }, + { + "epoch": 0.01, + "grad_norm": 0.05146566974838422, + "learning_rate": 9.989367088607596e-06, + "loss": 3.0703, + "step": 272 + }, + { + "epoch": 0.01, + "grad_norm": 0.04868427544965678, + "learning_rate": 9.988354430379748e-06, + "loss": 3.1016, + "step": 274 + }, + { + "epoch": 0.01, + "grad_norm": 0.05261970344084726, + "learning_rate": 9.9873417721519e-06, + "loss": 2.0859, + "step": 276 + }, + { + "epoch": 0.01, + "grad_norm": 0.054466522746173085, + "learning_rate": 9.986329113924052e-06, + "loss": 2.5625, + "step": 278 + }, + { + "epoch": 0.01, + "grad_norm": 0.06804701762945423, + "learning_rate": 9.985316455696203e-06, + "loss": 2.8203, + "step": 280 + }, + { + "epoch": 0.01, + "grad_norm": 0.05772343231173062, + "learning_rate": 9.984303797468355e-06, + "loss": 2.3496, + "step": 282 + }, + { + "epoch": 0.01, + "grad_norm": 0.053545347629839375, + "learning_rate": 9.983291139240507e-06, + "loss": 3.1797, + "step": 284 + }, + { + "epoch": 0.01, + "grad_norm": 0.06325664571765138, + "learning_rate": 9.98227848101266e-06, + "loss": 3.1719, + "step": 286 + }, + { + "epoch": 0.01, + "grad_norm": 0.06480821776360049, + "learning_rate": 9.981265822784811e-06, + "loss": 2.0859, + "step": 288 + }, + { + "epoch": 0.01, + "grad_norm": 0.044109436690912, + "learning_rate": 9.980253164556963e-06, + "loss": 2.8516, + "step": 290 + }, + { + "epoch": 0.01, + "grad_norm": 0.05720379192483985, + "learning_rate": 9.979240506329115e-06, + "loss": 1.6055, + "step": 292 + }, + { + "epoch": 0.01, + "grad_norm": 0.05018627784925598, + "learning_rate": 9.978227848101267e-06, + "loss": 1.375, + "step": 294 + }, + { + "epoch": 0.01, + "grad_norm": 0.04823007306027534, + "learning_rate": 9.977215189873419e-06, + "loss": 2.3477, + "step": 296 + }, + { + "epoch": 0.01, + "grad_norm": 0.05029189238276064, + "learning_rate": 9.97620253164557e-06, + "loss": 1.9141, + "step": 298 + }, + { + "epoch": 0.01, + "grad_norm": 0.053517034957577904, + "learning_rate": 9.975189873417723e-06, + "loss": 2.166, + "step": 300 + }, + { + "epoch": 0.01, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.3828125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.2406005859375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3388, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.644, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 300 + }, + { + "epoch": 0.01, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.4609375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.357505798339844, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6656, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.01, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.375, + "step": 300 + }, + { + "epoch": 0.01, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.2890625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.95645904541016, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8291, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.91, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 300 + }, + { + "epoch": 0.01, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.0859375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.8828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.024009704589844, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4815, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.419, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 300 + }, + { + "epoch": 0.01, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.74642944335938, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.995, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.821, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 300 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.75, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.20555114746094, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7704, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.942, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 300 + }, + { + "epoch": 0.01, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.1796875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.0312271118164, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.3256, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.198, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.097, + "step": 300 + }, + { + "epoch": 0.01, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.640625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.14747619628906, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.7985, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.051, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 300 + }, + { + "epoch": 0.01, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 86.328125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.693603515625, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.4009, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.85, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.185, + "step": 300 + }, + { + "epoch": 0.01, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.5546875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.1675033569336, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.2559, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.24, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.098, + "step": 300 + }, + { + "epoch": 0.01, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.58917999267578, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8726, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.135, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.205, + "step": 300 + }, + { + "epoch": 0.01, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.6171875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.6796875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 55.05112075805664, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.0688, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.549, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, + "step": 300 + }, + { + "epoch": 0.02, + "grad_norm": 0.05220067643814575, + "learning_rate": 9.974177215189874e-06, + "loss": 1.0488, + "step": 302 + }, + { + "epoch": 0.02, + "grad_norm": 0.04761786468609522, + "learning_rate": 9.973164556962026e-06, + "loss": 2.7422, + "step": 304 + }, + { + "epoch": 0.02, + "grad_norm": 0.06625101584092043, + "learning_rate": 9.972151898734178e-06, + "loss": 3.7188, + "step": 306 + }, + { + "epoch": 0.02, + "grad_norm": 0.030759557769197126, + "learning_rate": 9.97113924050633e-06, + "loss": 3.332, + "step": 308 + }, + { + "epoch": 0.02, + "grad_norm": 0.09398290889068939, + "learning_rate": 9.970126582278482e-06, + "loss": 1.9922, + "step": 310 + }, + { + "epoch": 0.02, + "grad_norm": 0.053608723396326066, + "learning_rate": 9.969113924050634e-06, + "loss": 2.9375, + "step": 312 + }, + { + "epoch": 0.02, + "grad_norm": 0.061954101693703315, + "learning_rate": 9.968101265822786e-06, + "loss": 3.1172, + "step": 314 + }, + { + "epoch": 0.02, + "grad_norm": 0.09075162197271411, + "learning_rate": 9.967088607594938e-06, + "loss": 3.0742, + "step": 316 + }, + { + "epoch": 0.02, + "grad_norm": 0.05736468842676422, + "learning_rate": 9.966075949367088e-06, + "loss": 3.2969, + "step": 318 + }, + { + "epoch": 0.02, + "grad_norm": 0.05043427744527494, + "learning_rate": 9.965063291139242e-06, + "loss": 2.1602, + "step": 320 + }, + { + "epoch": 0.02, + "grad_norm": 0.05239520008604763, + "learning_rate": 9.964050632911394e-06, + "loss": 1.9922, + "step": 322 + }, + { + "epoch": 0.02, + "grad_norm": 0.04720087325304379, + "learning_rate": 9.963037974683545e-06, + "loss": 2.668, + "step": 324 + }, + { + "epoch": 0.02, + "grad_norm": 0.04631212913230197, + "learning_rate": 9.962025316455697e-06, + "loss": 3.5312, + "step": 326 + }, + { + "epoch": 0.02, + "grad_norm": 0.0651010365528417, + "learning_rate": 9.96101265822785e-06, + "loss": 3.5859, + "step": 328 + }, + { + "epoch": 0.02, + "grad_norm": 0.05901109953694509, + "learning_rate": 9.960000000000001e-06, + "loss": 2.3125, + "step": 330 + }, + { + "epoch": 0.02, + "grad_norm": 0.05452453224180663, + "learning_rate": 9.958987341772153e-06, + "loss": 2.1113, + "step": 332 + }, + { + "epoch": 0.02, + "grad_norm": 0.040191372325169936, + "learning_rate": 9.957974683544305e-06, + "loss": 3.5703, + "step": 334 + }, + { + "epoch": 0.02, + "grad_norm": 0.0780827914897579, + "learning_rate": 9.956962025316457e-06, + "loss": 2.4844, + "step": 336 + }, + { + "epoch": 0.02, + "grad_norm": 0.04856945184709343, + "learning_rate": 9.955949367088609e-06, + "loss": 4.2422, + "step": 338 + }, + { + "epoch": 0.02, + "grad_norm": 0.057463653524251264, + "learning_rate": 9.95493670886076e-06, + "loss": 3.5547, + "step": 340 + }, + { + "epoch": 0.02, + "grad_norm": 0.05588622681586604, + "learning_rate": 9.953924050632913e-06, + "loss": 3.4297, + "step": 342 + }, + { + "epoch": 0.02, + "grad_norm": 0.05218677183317097, + "learning_rate": 9.952911392405065e-06, + "loss": 3.1797, + "step": 344 + }, + { + "epoch": 0.02, + "grad_norm": 0.06249090031601202, + "learning_rate": 9.951898734177215e-06, + "loss": 3.2344, + "step": 346 + }, + { + "epoch": 0.02, + "grad_norm": 0.05314294944842914, + "learning_rate": 9.950886075949367e-06, + "loss": 2.5938, + "step": 348 + }, + { + "epoch": 0.02, + "grad_norm": 0.096944124609397, + "learning_rate": 9.94987341772152e-06, + "loss": 2.5469, + "step": 350 + }, + { + "epoch": 0.02, + "grad_norm": 0.06123597867106346, + "learning_rate": 9.948860759493672e-06, + "loss": 2.375, + "step": 352 + }, + { + "epoch": 0.02, + "grad_norm": 0.058049734216535656, + "learning_rate": 9.947848101265824e-06, + "loss": 3.5234, + "step": 354 + }, + { + "epoch": 0.02, + "grad_norm": 0.04927350529384349, + "learning_rate": 9.946835443037976e-06, + "loss": 3.1016, + "step": 356 + }, + { + "epoch": 0.02, + "grad_norm": 0.08193739278667748, + "learning_rate": 9.945822784810128e-06, + "loss": 2.043, + "step": 358 + }, + { + "epoch": 0.02, + "grad_norm": 0.04891879246910809, + "learning_rate": 9.94481012658228e-06, + "loss": 2.6641, + "step": 360 + }, + { + "epoch": 0.02, + "grad_norm": 0.05658411010001912, + "learning_rate": 9.943797468354432e-06, + "loss": 2.0156, + "step": 362 + }, + { + "epoch": 0.02, + "grad_norm": 0.06126680664799663, + "learning_rate": 9.942784810126584e-06, + "loss": 3.3047, + "step": 364 + }, + { + "epoch": 0.02, + "grad_norm": 0.042300489786256924, + "learning_rate": 9.941772151898736e-06, + "loss": 3.0312, + "step": 366 + }, + { + "epoch": 0.02, + "grad_norm": 0.043547415767791385, + "learning_rate": 9.940759493670887e-06, + "loss": 3.25, + "step": 368 + }, + { + "epoch": 0.02, + "grad_norm": 0.046855612063298045, + "learning_rate": 9.93974683544304e-06, + "loss": 1.8672, + "step": 370 + }, + { + "epoch": 0.02, + "grad_norm": 0.04977376295679974, + "learning_rate": 9.93873417721519e-06, + "loss": 2.6406, + "step": 372 + }, + { + "epoch": 0.02, + "grad_norm": 0.04732396273321346, + "learning_rate": 9.937721518987341e-06, + "loss": 1.6602, + "step": 374 + }, + { + "epoch": 0.02, + "grad_norm": 0.048938423986438856, + "learning_rate": 9.936708860759493e-06, + "loss": 2.3281, + "step": 376 + }, + { + "epoch": 0.02, + "grad_norm": 0.04944323057700011, + "learning_rate": 9.935696202531645e-06, + "loss": 2.6289, + "step": 378 + }, + { + "epoch": 0.02, + "grad_norm": 0.048255186414602136, + "learning_rate": 9.934683544303799e-06, + "loss": 2.625, + "step": 380 + }, + { + "epoch": 0.02, + "grad_norm": 0.041219753976543594, + "learning_rate": 9.93367088607595e-06, + "loss": 4.0547, + "step": 382 + }, + { + "epoch": 0.02, + "grad_norm": 0.047695318412624185, + "learning_rate": 9.932658227848103e-06, + "loss": 1.873, + "step": 384 + }, + { + "epoch": 0.02, + "grad_norm": 0.04174272882598536, + "learning_rate": 9.931645569620255e-06, + "loss": 3.5, + "step": 386 + }, + { + "epoch": 0.02, + "grad_norm": 0.05577264201105512, + "learning_rate": 9.930632911392407e-06, + "loss": 3.5391, + "step": 388 + }, + { + "epoch": 0.02, + "grad_norm": 0.078976293555083, + "learning_rate": 9.929620253164558e-06, + "loss": 3.0703, + "step": 390 + }, + { + "epoch": 0.02, + "grad_norm": 0.05359446077111816, + "learning_rate": 9.92860759493671e-06, + "loss": 2.7891, + "step": 392 + }, + { + "epoch": 0.02, + "grad_norm": 0.058576859582871475, + "learning_rate": 9.927594936708862e-06, + "loss": 3.2422, + "step": 394 + }, + { + "epoch": 0.02, + "grad_norm": 0.05182847786102723, + "learning_rate": 9.926582278481014e-06, + "loss": 2.1953, + "step": 396 + }, + { + "epoch": 0.02, + "grad_norm": 0.05513545900717149, + "learning_rate": 9.925569620253164e-06, + "loss": 2.9609, + "step": 398 + }, + { + "epoch": 0.02, + "grad_norm": 0.04759815076884959, + "learning_rate": 9.924556962025316e-06, + "loss": 3.6016, + "step": 400 + }, + { + "epoch": 0.02, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.5546875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.8203125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.53609466552734, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2086, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.71, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 400 + }, + { + "epoch": 0.02, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.484375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.657047271728516, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.8575, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 22.398, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.35, + "step": 400 + }, + { + "epoch": 0.02, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 89.0625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87890625, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.59227752685547, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.1426, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.744, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 400 + }, + { + "epoch": 0.02, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.2109375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.26415252685547, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.633, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.374, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, + "step": 400 + }, + { + "epoch": 0.02, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.9609375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.21744537353516, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.1982, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.715, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 400 + }, + { + "epoch": 0.02, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.03494262695312, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.914, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.864, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 400 + }, + { + "epoch": 0.02, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.328125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.41943359375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6481, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.01, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 400 + }, + { + "epoch": 0.02, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 90.234375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.765625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.61365509033203, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.386, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.906, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 400 + }, + { + "epoch": 0.02, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.890625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 80.078125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.828125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.95426940917969, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2239, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.251, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.191, + "step": 400 + }, + { + "epoch": 0.02, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.359375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.48290252685547, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3648, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.834, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, + "step": 400 + }, + { + "epoch": 0.02, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.1640625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.2109375, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.15265655517578, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.2019, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.303, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.192, + "step": 400 + }, + { + "epoch": 0.02, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.7890625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 75.0, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 57.39921569824219, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3277, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.467, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 400 + }, + { + "epoch": 0.02, + "grad_norm": 0.07222205536882423, + "learning_rate": 9.923544303797468e-06, + "loss": 1.3027, + "step": 402 + }, + { + "epoch": 0.02, + "grad_norm": 0.15917535202581484, + "learning_rate": 9.92253164556962e-06, + "loss": 2.6562, + "step": 404 + }, + { + "epoch": 0.02, + "grad_norm": 0.07928809201876366, + "learning_rate": 9.921518987341772e-06, + "loss": 1.5039, + "step": 406 + }, + { + "epoch": 0.02, + "grad_norm": 0.0471266705915067, + "learning_rate": 9.920506329113924e-06, + "loss": 2.5078, + "step": 408 + }, + { + "epoch": 0.02, + "grad_norm": 0.04866293621489947, + "learning_rate": 9.919493670886078e-06, + "loss": 2.875, + "step": 410 + }, + { + "epoch": 0.02, + "grad_norm": 0.045016903500605984, + "learning_rate": 9.91848101265823e-06, + "loss": 1.7422, + "step": 412 + }, + { + "epoch": 0.02, + "grad_norm": 0.0550550579066975, + "learning_rate": 9.917468354430381e-06, + "loss": 2.0195, + "step": 414 + }, + { + "epoch": 0.02, + "grad_norm": 0.039185639839193286, + "learning_rate": 9.916455696202533e-06, + "loss": 2.2305, + "step": 416 + }, + { + "epoch": 0.02, + "grad_norm": 0.06433655210455046, + "learning_rate": 9.915443037974685e-06, + "loss": 2.1875, + "step": 418 + }, + { + "epoch": 0.02, + "grad_norm": 0.04953175680987281, + "learning_rate": 9.914430379746837e-06, + "loss": 2.082, + "step": 420 + }, + { + "epoch": 0.02, + "grad_norm": 0.050612062350781264, + "learning_rate": 9.913417721518989e-06, + "loss": 1.3594, + "step": 422 + }, + { + "epoch": 0.02, + "grad_norm": 0.04995271945096085, + "learning_rate": 9.912405063291141e-06, + "loss": 2.332, + "step": 424 + }, + { + "epoch": 0.02, + "grad_norm": 0.04739567789158815, + "learning_rate": 9.911392405063291e-06, + "loss": 2.2227, + "step": 426 + }, + { + "epoch": 0.02, + "grad_norm": 0.048251034334719445, + "learning_rate": 9.910379746835443e-06, + "loss": 1.8066, + "step": 428 + }, + { + "epoch": 0.02, + "grad_norm": 0.062472987670653804, + "learning_rate": 9.909367088607595e-06, + "loss": 3.6719, + "step": 430 + }, + { + "epoch": 0.02, + "grad_norm": 0.054186276016001844, + "learning_rate": 9.908354430379747e-06, + "loss": 2.2422, + "step": 432 + }, + { + "epoch": 0.02, + "grad_norm": 0.07225226588103273, + "learning_rate": 9.907341772151899e-06, + "loss": 2.1523, + "step": 434 + }, + { + "epoch": 0.02, + "grad_norm": 0.061083982011778226, + "learning_rate": 9.90632911392405e-06, + "loss": 2.7578, + "step": 436 + }, + { + "epoch": 0.02, + "grad_norm": 0.059831340507868036, + "learning_rate": 9.905316455696203e-06, + "loss": 3.8359, + "step": 438 + }, + { + "epoch": 0.02, + "grad_norm": 0.05045156333502398, + "learning_rate": 9.904303797468356e-06, + "loss": 1.6797, + "step": 440 + }, + { + "epoch": 0.02, + "grad_norm": 0.046842201003395235, + "learning_rate": 9.903291139240508e-06, + "loss": 3.4922, + "step": 442 + }, + { + "epoch": 0.02, + "grad_norm": 0.05029184145728825, + "learning_rate": 9.90227848101266e-06, + "loss": 3.0625, + "step": 444 + }, + { + "epoch": 0.02, + "grad_norm": 0.07671286356421637, + "learning_rate": 9.901265822784812e-06, + "loss": 0.8242, + "step": 446 + }, + { + "epoch": 0.02, + "grad_norm": 0.04915808295544781, + "learning_rate": 9.900253164556964e-06, + "loss": 3.0078, + "step": 448 + }, + { + "epoch": 0.02, + "grad_norm": 0.080096966573764, + "learning_rate": 9.899240506329116e-06, + "loss": 3.6484, + "step": 450 + }, + { + "epoch": 0.02, + "grad_norm": 0.058943602067565375, + "learning_rate": 9.898227848101266e-06, + "loss": 2.5898, + "step": 452 + }, + { + "epoch": 0.02, + "grad_norm": 0.05135092632013379, + "learning_rate": 9.897215189873418e-06, + "loss": 2.9062, + "step": 454 + }, + { + "epoch": 0.02, + "grad_norm": 0.06148651316849905, + "learning_rate": 9.89620253164557e-06, + "loss": 2.8867, + "step": 456 + }, + { + "epoch": 0.02, + "grad_norm": 0.04512943900335472, + "learning_rate": 9.895189873417722e-06, + "loss": 2.75, + "step": 458 + }, + { + "epoch": 0.02, + "grad_norm": 0.07181862208600082, + "learning_rate": 9.894177215189874e-06, + "loss": 3.0547, + "step": 460 + }, + { + "epoch": 0.02, + "grad_norm": 0.06373905049284342, + "learning_rate": 9.893164556962025e-06, + "loss": 2.418, + "step": 462 + }, + { + "epoch": 0.02, + "grad_norm": 0.08387698193619678, + "learning_rate": 9.892151898734177e-06, + "loss": 2.1758, + "step": 464 + }, + { + "epoch": 0.02, + "grad_norm": 0.05084833656375217, + "learning_rate": 9.89113924050633e-06, + "loss": 2.5781, + "step": 466 + }, + { + "epoch": 0.02, + "grad_norm": 0.05234011096545917, + "learning_rate": 9.890126582278481e-06, + "loss": 1.6719, + "step": 468 + }, + { + "epoch": 0.02, + "grad_norm": 0.07821649022272881, + "learning_rate": 9.889113924050635e-06, + "loss": 1.7852, + "step": 470 + }, + { + "epoch": 0.02, + "grad_norm": 0.059443530418835516, + "learning_rate": 9.888101265822787e-06, + "loss": 2.7422, + "step": 472 + }, + { + "epoch": 0.02, + "grad_norm": 0.061142752223246014, + "learning_rate": 9.887088607594939e-06, + "loss": 3.3164, + "step": 474 + }, + { + "epoch": 0.02, + "grad_norm": 0.07359821255430475, + "learning_rate": 9.88607594936709e-06, + "loss": 2.8906, + "step": 476 + }, + { + "epoch": 0.02, + "grad_norm": 0.054895938291319654, + "learning_rate": 9.88506329113924e-06, + "loss": 0.9883, + "step": 478 + }, + { + "epoch": 0.02, + "grad_norm": 0.045605934761492745, + "learning_rate": 9.884050632911393e-06, + "loss": 3.8203, + "step": 480 + }, + { + "epoch": 0.02, + "grad_norm": 0.06526291645439951, + "learning_rate": 9.883037974683545e-06, + "loss": 3.6172, + "step": 482 + }, + { + "epoch": 0.02, + "grad_norm": 0.06413356717781271, + "learning_rate": 9.882025316455696e-06, + "loss": 1.957, + "step": 484 + }, + { + "epoch": 0.02, + "grad_norm": 0.053544808439294274, + "learning_rate": 9.881012658227848e-06, + "loss": 1.5898, + "step": 486 + }, + { + "epoch": 0.02, + "grad_norm": 0.057347606537699075, + "learning_rate": 9.88e-06, + "loss": 2.5859, + "step": 488 + }, + { + "epoch": 0.02, + "grad_norm": 0.060150115517773294, + "learning_rate": 9.878987341772152e-06, + "loss": 1.9141, + "step": 490 + }, + { + "epoch": 0.02, + "grad_norm": 0.04980395120188664, + "learning_rate": 9.877974683544304e-06, + "loss": 3.1719, + "step": 492 + }, + { + "epoch": 0.02, + "grad_norm": 0.05041464419822217, + "learning_rate": 9.876962025316456e-06, + "loss": 2.5312, + "step": 494 + }, + { + "epoch": 0.02, + "grad_norm": 0.05121478836630337, + "learning_rate": 9.875949367088608e-06, + "loss": 3.25, + "step": 496 + }, + { + "epoch": 0.02, + "grad_norm": 0.07952860073200034, + "learning_rate": 9.87493670886076e-06, + "loss": 2.7812, + "step": 498 + }, + { + "epoch": 0.03, + "grad_norm": 0.05042393538929106, + "learning_rate": 9.873924050632913e-06, + "loss": 3.6094, + "step": 500 + }, + { + "epoch": 0.03, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.1171875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.5546875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 70.9421615600586, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2816, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.673, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 500 + }, + { + "epoch": 0.03, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.59375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.163715362548828, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7785, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.034, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.36, + "step": 500 + }, + { + "epoch": 0.03, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 89.6484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 67.29576110839844, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9868, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.825, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 500 + }, + { + "epoch": 0.03, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.5, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.1015625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0078125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.74357986450195, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.7402, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.342, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, + "step": 500 + }, + { + "epoch": 0.03, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.3046875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.07913970947266, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0179, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.809, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 500 + }, + { + "epoch": 0.03, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.9453125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.80879211425781, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8716, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.887, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 500 + }, + { + "epoch": 0.03, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.74609375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.67704010009766, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6862, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.989, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 500 + }, + { + "epoch": 0.03, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.65625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.1350326538086, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.7505, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.063, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 500 + }, + { + "epoch": 0.03, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 40.4296875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 84.375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 64.05941009521484, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1597, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.404, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.194, + "step": 500 + }, + { + "epoch": 0.03, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.08375549316406, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3563, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.84, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, + "step": 500 + }, + { + "epoch": 0.03, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 93.359375, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 70.08747100830078, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9513, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.926, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.202, + "step": 500 + }, + { + "epoch": 0.03, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.03125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.8984375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.63041687011719, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2012, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.507, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 500 + }, + { + "epoch": 0.03, + "grad_norm": 0.052086814624947436, + "learning_rate": 9.872911392405065e-06, + "loss": 2.5898, + "step": 502 + }, + { + "epoch": 0.03, + "grad_norm": 0.1399877800142533, + "learning_rate": 9.871898734177216e-06, + "loss": 3.0859, + "step": 504 + }, + { + "epoch": 0.03, + "grad_norm": 0.0532839732473988, + "learning_rate": 9.870886075949367e-06, + "loss": 2.2617, + "step": 506 + }, + { + "epoch": 0.03, + "grad_norm": 0.15155453931410987, + "learning_rate": 9.86987341772152e-06, + "loss": 1.0664, + "step": 508 + }, + { + "epoch": 0.03, + "grad_norm": 0.07651479570528692, + "learning_rate": 9.868860759493671e-06, + "loss": 2.2188, + "step": 510 + }, + { + "epoch": 0.03, + "grad_norm": 0.07446787145246864, + "learning_rate": 9.867848101265823e-06, + "loss": 1.6797, + "step": 512 + }, + { + "epoch": 0.03, + "grad_norm": 0.048532115622284516, + "learning_rate": 9.866835443037975e-06, + "loss": 2.5898, + "step": 514 + }, + { + "epoch": 0.03, + "grad_norm": 0.0636538158151717, + "learning_rate": 9.865822784810127e-06, + "loss": 3.6484, + "step": 516 + }, + { + "epoch": 0.03, + "grad_norm": 0.050666875139838745, + "learning_rate": 9.864810126582279e-06, + "loss": 1.0391, + "step": 518 + }, + { + "epoch": 0.03, + "grad_norm": 0.05095008833138064, + "learning_rate": 9.86379746835443e-06, + "loss": 1.6992, + "step": 520 + }, + { + "epoch": 0.03, + "grad_norm": 0.057319866044410354, + "learning_rate": 9.862784810126583e-06, + "loss": 2.1484, + "step": 522 + }, + { + "epoch": 0.03, + "grad_norm": 0.05464334493499168, + "learning_rate": 9.861772151898735e-06, + "loss": 2.6172, + "step": 524 + }, + { + "epoch": 0.03, + "grad_norm": 0.04411594193402404, + "learning_rate": 9.860759493670887e-06, + "loss": 2.8984, + "step": 526 + }, + { + "epoch": 0.03, + "grad_norm": 0.07936098978533161, + "learning_rate": 9.859746835443038e-06, + "loss": 1.7305, + "step": 528 + }, + { + "epoch": 0.03, + "grad_norm": 0.09236852220817833, + "learning_rate": 9.858734177215192e-06, + "loss": 2.1875, + "step": 530 + }, + { + "epoch": 0.03, + "grad_norm": 0.07824501713336816, + "learning_rate": 9.857721518987342e-06, + "loss": 3.4297, + "step": 532 + }, + { + "epoch": 0.03, + "grad_norm": 0.1470976833188794, + "learning_rate": 9.856708860759494e-06, + "loss": 2.5215, + "step": 534 + }, + { + "epoch": 0.03, + "grad_norm": 0.04917188752231493, + "learning_rate": 9.855696202531646e-06, + "loss": 2.6484, + "step": 536 + }, + { + "epoch": 0.03, + "grad_norm": 0.0827307002464502, + "learning_rate": 9.854683544303798e-06, + "loss": 2.1562, + "step": 538 + }, + { + "epoch": 0.03, + "grad_norm": 0.08666793122490411, + "learning_rate": 9.85367088607595e-06, + "loss": 1.2793, + "step": 540 + }, + { + "epoch": 0.03, + "grad_norm": 0.09175616234034045, + "learning_rate": 9.852658227848102e-06, + "loss": 1.8516, + "step": 542 + }, + { + "epoch": 0.03, + "grad_norm": 0.046371319373000215, + "learning_rate": 9.851645569620254e-06, + "loss": 2.5625, + "step": 544 + }, + { + "epoch": 0.03, + "grad_norm": 0.06035931438672101, + "learning_rate": 9.850632911392406e-06, + "loss": 2.2695, + "step": 546 + }, + { + "epoch": 0.03, + "grad_norm": 0.04850263350227715, + "learning_rate": 9.849620253164558e-06, + "loss": 2.5547, + "step": 548 + }, + { + "epoch": 0.03, + "grad_norm": 0.07522083134321193, + "learning_rate": 9.84860759493671e-06, + "loss": 3.4922, + "step": 550 + }, + { + "epoch": 0.03, + "grad_norm": 0.06045883646056416, + "learning_rate": 9.847594936708861e-06, + "loss": 2.7852, + "step": 552 + }, + { + "epoch": 0.03, + "grad_norm": 0.05489802073459884, + "learning_rate": 9.846582278481013e-06, + "loss": 2.6211, + "step": 554 + }, + { + "epoch": 0.03, + "grad_norm": 0.06095202697702689, + "learning_rate": 9.845569620253165e-06, + "loss": 2.8984, + "step": 556 + }, + { + "epoch": 0.03, + "grad_norm": 0.04973758122191133, + "learning_rate": 9.844556962025317e-06, + "loss": 1.791, + "step": 558 + }, + { + "epoch": 0.03, + "grad_norm": 0.13716884502493287, + "learning_rate": 9.843544303797469e-06, + "loss": 3.3125, + "step": 560 + }, + { + "epoch": 0.03, + "grad_norm": 0.04949379404632409, + "learning_rate": 9.842531645569621e-06, + "loss": 2.2852, + "step": 562 + }, + { + "epoch": 0.03, + "grad_norm": 0.051544756346996144, + "learning_rate": 9.841518987341773e-06, + "loss": 1.6953, + "step": 564 + }, + { + "epoch": 0.03, + "grad_norm": 0.056998301240872185, + "learning_rate": 9.840506329113925e-06, + "loss": 3.4766, + "step": 566 + }, + { + "epoch": 0.03, + "grad_norm": 0.05953859829543366, + "learning_rate": 9.839493670886077e-06, + "loss": 2.1992, + "step": 568 + }, + { + "epoch": 0.03, + "grad_norm": 0.05423558793405452, + "learning_rate": 9.838481012658229e-06, + "loss": 1.0488, + "step": 570 + }, + { + "epoch": 0.03, + "grad_norm": 0.05084089297876364, + "learning_rate": 9.83746835443038e-06, + "loss": 2.9531, + "step": 572 + }, + { + "epoch": 0.03, + "grad_norm": 0.0703427693327456, + "learning_rate": 9.836455696202532e-06, + "loss": 2.4375, + "step": 574 + }, + { + "epoch": 0.03, + "grad_norm": 0.06170662786551502, + "learning_rate": 9.835443037974684e-06, + "loss": 3.2891, + "step": 576 + }, + { + "epoch": 0.03, + "grad_norm": 0.08852110535169934, + "learning_rate": 9.834430379746836e-06, + "loss": 2.625, + "step": 578 + }, + { + "epoch": 0.03, + "grad_norm": 0.10241258632199284, + "learning_rate": 9.833417721518988e-06, + "loss": 1.6133, + "step": 580 + }, + { + "epoch": 0.03, + "grad_norm": 0.056497885218087136, + "learning_rate": 9.83240506329114e-06, + "loss": 2.3867, + "step": 582 + }, + { + "epoch": 0.03, + "grad_norm": 0.053611763514035925, + "learning_rate": 9.831392405063292e-06, + "loss": 2.1406, + "step": 584 + }, + { + "epoch": 0.03, + "grad_norm": 0.044289570354148024, + "learning_rate": 9.830379746835444e-06, + "loss": 2.8125, + "step": 586 + }, + { + "epoch": 0.03, + "grad_norm": 0.051724720218463194, + "learning_rate": 9.829367088607596e-06, + "loss": 2.3438, + "step": 588 + }, + { + "epoch": 0.03, + "grad_norm": 0.043367256971020826, + "learning_rate": 9.828354430379748e-06, + "loss": 2.2695, + "step": 590 + }, + { + "epoch": 0.03, + "grad_norm": 0.08974582061892053, + "learning_rate": 9.8273417721519e-06, + "loss": 2.5742, + "step": 592 + }, + { + "epoch": 0.03, + "grad_norm": 0.050289857638527925, + "learning_rate": 9.826329113924051e-06, + "loss": 1.0898, + "step": 594 + }, + { + "epoch": 0.03, + "grad_norm": 0.04624382395844746, + "learning_rate": 9.825316455696203e-06, + "loss": 2.1914, + "step": 596 + }, + { + "epoch": 0.03, + "grad_norm": 0.0413793982291362, + "learning_rate": 9.824303797468355e-06, + "loss": 2.9219, + "step": 598 + }, + { + "epoch": 0.03, + "grad_norm": 0.053437477503598924, + "learning_rate": 9.823291139240507e-06, + "loss": 2.3242, + "step": 600 + }, + { + "epoch": 0.03, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 46.09375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.9453125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.61688995361328, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.248, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.69, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 600 + }, + { + "epoch": 0.03, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 24.21875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.876731872558594, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6765, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.912, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.374, + "step": 600 + }, + { + "epoch": 0.03, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 93.9453125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.94965362548828, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0773, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.778, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 600 + }, + { + "epoch": 0.03, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.6484375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.3984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.75840377807617, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5581, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.396, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 600 + }, + { + "epoch": 0.03, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.1796875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.93449020385742, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8983, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.872, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 600 + }, + { + "epoch": 0.03, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.94852447509766, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.028, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.803, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 600 + }, + { + "epoch": 0.03, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.15625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.46388244628906, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6996, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.982, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 600 + }, + { + "epoch": 0.03, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.4375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.77207946777344, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2949, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.928, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 600 + }, + { + "epoch": 0.03, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.5234375, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.7109375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.384422302246094, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1332, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.468, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.195, + "step": 600 + }, + { + "epoch": 0.03, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.5546875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.30752563476562, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.2807, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.225, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, + "step": 600 + }, + { + "epoch": 0.03, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.560791015625, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8394, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.225, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.207, + "step": 600 + }, + { + "epoch": 0.03, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.6875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.1640625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.87109375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.34931564331055, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2839, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.481, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 600 + }, + { + "epoch": 0.03, + "grad_norm": 0.08623680100136266, + "learning_rate": 9.822278481012659e-06, + "loss": 2.6758, + "step": 602 + }, + { + "epoch": 0.03, + "grad_norm": 0.07319114177823224, + "learning_rate": 9.821265822784811e-06, + "loss": 3.8359, + "step": 604 + }, + { + "epoch": 0.03, + "grad_norm": 0.0482368697003861, + "learning_rate": 9.820253164556963e-06, + "loss": 2.4297, + "step": 606 + }, + { + "epoch": 0.03, + "grad_norm": 0.07861562097515083, + "learning_rate": 9.819240506329115e-06, + "loss": 2.4766, + "step": 608 + }, + { + "epoch": 0.03, + "grad_norm": 0.051800647140822956, + "learning_rate": 9.818227848101267e-06, + "loss": 3.5703, + "step": 610 + }, + { + "epoch": 0.03, + "grad_norm": 0.05715139468084694, + "learning_rate": 9.817215189873419e-06, + "loss": 2.6406, + "step": 612 + }, + { + "epoch": 0.03, + "grad_norm": 0.10738698652889787, + "learning_rate": 9.81620253164557e-06, + "loss": 1.7559, + "step": 614 + }, + { + "epoch": 0.03, + "grad_norm": 0.0589747795295819, + "learning_rate": 9.815189873417722e-06, + "loss": 3.2188, + "step": 616 + }, + { + "epoch": 0.03, + "grad_norm": 0.047291320031127355, + "learning_rate": 9.814177215189874e-06, + "loss": 2.7969, + "step": 618 + }, + { + "epoch": 0.03, + "grad_norm": 0.0513134336965957, + "learning_rate": 9.813164556962026e-06, + "loss": 3.2422, + "step": 620 + }, + { + "epoch": 0.03, + "grad_norm": 0.05058905562434262, + "learning_rate": 9.812151898734178e-06, + "loss": 2.2773, + "step": 622 + }, + { + "epoch": 0.03, + "grad_norm": 0.07249453092368723, + "learning_rate": 9.81113924050633e-06, + "loss": 3.4219, + "step": 624 + }, + { + "epoch": 0.03, + "grad_norm": 0.04474715275304292, + "learning_rate": 9.810126582278482e-06, + "loss": 2.625, + "step": 626 + }, + { + "epoch": 0.03, + "grad_norm": 0.10601883505288384, + "learning_rate": 9.809113924050634e-06, + "loss": 2.4453, + "step": 628 + }, + { + "epoch": 0.03, + "grad_norm": 0.04627960285472802, + "learning_rate": 9.808101265822786e-06, + "loss": 2.0195, + "step": 630 + }, + { + "epoch": 0.03, + "grad_norm": 0.0651145970413386, + "learning_rate": 9.807088607594938e-06, + "loss": 3.3125, + "step": 632 + }, + { + "epoch": 0.03, + "grad_norm": 0.048961687998195856, + "learning_rate": 9.80607594936709e-06, + "loss": 2.3555, + "step": 634 + }, + { + "epoch": 0.03, + "grad_norm": 0.08215525597931916, + "learning_rate": 9.805063291139241e-06, + "loss": 2.793, + "step": 636 + }, + { + "epoch": 0.03, + "grad_norm": 0.05335831196369767, + "learning_rate": 9.804050632911393e-06, + "loss": 2.8516, + "step": 638 + }, + { + "epoch": 0.03, + "grad_norm": 0.05642278685436397, + "learning_rate": 9.803037974683545e-06, + "loss": 2.3594, + "step": 640 + }, + { + "epoch": 0.03, + "grad_norm": 0.05549078037094988, + "learning_rate": 9.802025316455697e-06, + "loss": 1.5977, + "step": 642 + }, + { + "epoch": 0.03, + "grad_norm": 0.05058085579920028, + "learning_rate": 9.801012658227849e-06, + "loss": 2.4688, + "step": 644 + }, + { + "epoch": 0.03, + "grad_norm": 0.06410392898663864, + "learning_rate": 9.800000000000001e-06, + "loss": 4.0469, + "step": 646 + }, + { + "epoch": 0.03, + "grad_norm": 0.05075679015744242, + "learning_rate": 9.798987341772153e-06, + "loss": 1.3359, + "step": 648 + }, + { + "epoch": 0.03, + "grad_norm": 0.06223558769043007, + "learning_rate": 9.797974683544305e-06, + "loss": 2.6797, + "step": 650 + }, + { + "epoch": 0.03, + "grad_norm": 0.04602290584883312, + "learning_rate": 9.796962025316457e-06, + "loss": 1.1328, + "step": 652 + }, + { + "epoch": 0.03, + "grad_norm": 0.06882275137670477, + "learning_rate": 9.795949367088609e-06, + "loss": 3.5547, + "step": 654 + }, + { + "epoch": 0.03, + "grad_norm": 0.044864511663979555, + "learning_rate": 9.79493670886076e-06, + "loss": 1.9414, + "step": 656 + }, + { + "epoch": 0.03, + "grad_norm": 0.04974066836528442, + "learning_rate": 9.793924050632912e-06, + "loss": 2.1836, + "step": 658 + }, + { + "epoch": 0.03, + "grad_norm": 0.048577610153630706, + "learning_rate": 9.792911392405064e-06, + "loss": 3.2109, + "step": 660 + }, + { + "epoch": 0.03, + "grad_norm": 0.07990453093749185, + "learning_rate": 9.791898734177216e-06, + "loss": 3.8438, + "step": 662 + }, + { + "epoch": 0.03, + "grad_norm": 0.04685956979146973, + "learning_rate": 9.790886075949367e-06, + "loss": 3.2188, + "step": 664 + }, + { + "epoch": 0.03, + "grad_norm": 0.04572710820131021, + "learning_rate": 9.78987341772152e-06, + "loss": 3.2031, + "step": 666 + }, + { + "epoch": 0.03, + "grad_norm": 0.04694385984204392, + "learning_rate": 9.788860759493672e-06, + "loss": 3.3047, + "step": 668 + }, + { + "epoch": 0.03, + "grad_norm": 0.0572023022137153, + "learning_rate": 9.787848101265824e-06, + "loss": 3.0391, + "step": 670 + }, + { + "epoch": 0.03, + "grad_norm": 0.04710781129215592, + "learning_rate": 9.786835443037976e-06, + "loss": 2.2109, + "step": 672 + }, + { + "epoch": 0.03, + "grad_norm": 0.07245719172520465, + "learning_rate": 9.785822784810128e-06, + "loss": 1.6289, + "step": 674 + }, + { + "epoch": 0.03, + "grad_norm": 0.061101444487292524, + "learning_rate": 9.78481012658228e-06, + "loss": 3.0078, + "step": 676 + }, + { + "epoch": 0.03, + "grad_norm": 0.1327970089012375, + "learning_rate": 9.783797468354432e-06, + "loss": 3.3203, + "step": 678 + }, + { + "epoch": 0.03, + "grad_norm": 0.06351862027611924, + "learning_rate": 9.782784810126583e-06, + "loss": 3.9688, + "step": 680 + }, + { + "epoch": 0.03, + "grad_norm": 0.0551890108401501, + "learning_rate": 9.781772151898735e-06, + "loss": 1.5664, + "step": 682 + }, + { + "epoch": 0.03, + "grad_norm": 0.049497806599666815, + "learning_rate": 9.780759493670887e-06, + "loss": 3.3438, + "step": 684 + }, + { + "epoch": 0.03, + "grad_norm": 0.05577083436309615, + "learning_rate": 9.77974683544304e-06, + "loss": 3.5, + "step": 686 + }, + { + "epoch": 0.03, + "grad_norm": 0.04930049337356384, + "learning_rate": 9.778734177215191e-06, + "loss": 3.5234, + "step": 688 + }, + { + "epoch": 0.03, + "grad_norm": 0.052658271181243624, + "learning_rate": 9.777721518987343e-06, + "loss": 2.4375, + "step": 690 + }, + { + "epoch": 0.03, + "grad_norm": 0.05890374238080107, + "learning_rate": 9.776708860759493e-06, + "loss": 2.4102, + "step": 692 + }, + { + "epoch": 0.03, + "grad_norm": 0.05692843983127017, + "learning_rate": 9.775696202531645e-06, + "loss": 3.8516, + "step": 694 + }, + { + "epoch": 0.03, + "grad_norm": 0.08032851209382136, + "learning_rate": 9.774683544303799e-06, + "loss": 3.2617, + "step": 696 + }, + { + "epoch": 0.03, + "grad_norm": 0.06266368353357782, + "learning_rate": 9.77367088607595e-06, + "loss": 2.5938, + "step": 698 + }, + { + "epoch": 0.04, + "grad_norm": 0.055326096687423366, + "learning_rate": 9.772658227848103e-06, + "loss": 3.5781, + "step": 700 + }, + { + "epoch": 0.04, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.1875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.31864929199219, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2991, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.664, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 700 + }, + { + "epoch": 0.04, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 11.1328125, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.1953125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 23.333736419677734, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.8764, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 22.25, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.348, + "step": 700 + }, + { + "epoch": 0.04, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 97.0703125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.60289764404297, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9461, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.847, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 700 + }, + { + "epoch": 0.04, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.3984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.90742111206055, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5324, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.404, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 700 + }, + { + "epoch": 0.04, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.74537658691406, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0849, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.774, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 700 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.8515625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 96.09375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 72.10258483886719, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8533, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.897, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 700 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.01837158203125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9244, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.858, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 700 + }, + { + "epoch": 0.04, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.46875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.89733123779297, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0426, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.989, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 700 + }, + { + "epoch": 0.04, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 85.7421875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.13924407958984, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2631, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.16, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.19, + "step": 700 + }, + { + "epoch": 0.04, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.2890625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.59991455078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 5.745, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 11.14, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.174, + "step": 700 + }, + { + "epoch": 0.04, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.97984313964844, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9407, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.954, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.202, + "step": 700 + }, + { + "epoch": 0.04, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.2265625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.1171875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.56657791137695, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.286, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.48, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 700 + }, + { + "epoch": 0.04, + "grad_norm": 0.06118596899639704, + "learning_rate": 9.771645569620254e-06, + "loss": 3.2109, + "step": 702 + }, + { + "epoch": 0.04, + "grad_norm": 0.047725355354412885, + "learning_rate": 9.770632911392406e-06, + "loss": 1.8242, + "step": 704 + }, + { + "epoch": 0.04, + "grad_norm": 0.052086155288461826, + "learning_rate": 9.769620253164558e-06, + "loss": 1.8867, + "step": 706 + }, + { + "epoch": 0.04, + "grad_norm": 0.059152476425263945, + "learning_rate": 9.76860759493671e-06, + "loss": 2.6211, + "step": 708 + }, + { + "epoch": 0.04, + "grad_norm": 0.04760669560978858, + "learning_rate": 9.767594936708862e-06, + "loss": 2.6523, + "step": 710 + }, + { + "epoch": 0.04, + "grad_norm": 0.04015573940759585, + "learning_rate": 9.766582278481014e-06, + "loss": 3.4375, + "step": 712 + }, + { + "epoch": 0.04, + "grad_norm": 0.04941952895660446, + "learning_rate": 9.765569620253166e-06, + "loss": 1.8672, + "step": 714 + }, + { + "epoch": 0.04, + "grad_norm": 0.06014704393875058, + "learning_rate": 9.764556962025318e-06, + "loss": 3.2344, + "step": 716 + }, + { + "epoch": 0.04, + "grad_norm": 0.05626127416491607, + "learning_rate": 9.763544303797468e-06, + "loss": 1.3164, + "step": 718 + }, + { + "epoch": 0.04, + "grad_norm": 0.04878155156099776, + "learning_rate": 9.76253164556962e-06, + "loss": 2.1953, + "step": 720 + }, + { + "epoch": 0.04, + "grad_norm": 0.05715478816178281, + "learning_rate": 9.761518987341772e-06, + "loss": 3.0781, + "step": 722 + }, + { + "epoch": 0.04, + "grad_norm": 0.09967059703652872, + "learning_rate": 9.760506329113924e-06, + "loss": 3.2266, + "step": 724 + }, + { + "epoch": 0.04, + "grad_norm": 0.05764160810613535, + "learning_rate": 9.759493670886077e-06, + "loss": 1.4375, + "step": 726 + }, + { + "epoch": 0.04, + "grad_norm": 0.06727665333940962, + "learning_rate": 9.75848101265823e-06, + "loss": 3.0078, + "step": 728 + }, + { + "epoch": 0.04, + "grad_norm": 0.05330506559159864, + "learning_rate": 9.757468354430381e-06, + "loss": 1.9844, + "step": 730 + }, + { + "epoch": 0.04, + "grad_norm": 0.0732283674553054, + "learning_rate": 9.756455696202533e-06, + "loss": 2.4297, + "step": 732 + }, + { + "epoch": 0.04, + "grad_norm": 0.06311516586623442, + "learning_rate": 9.755443037974685e-06, + "loss": 2.2441, + "step": 734 + }, + { + "epoch": 0.04, + "grad_norm": 0.0711140836729622, + "learning_rate": 9.754430379746837e-06, + "loss": 1.2246, + "step": 736 + }, + { + "epoch": 0.04, + "grad_norm": 0.06195722460862401, + "learning_rate": 9.753417721518989e-06, + "loss": 3.9219, + "step": 738 + }, + { + "epoch": 0.04, + "grad_norm": 0.03857369362196024, + "learning_rate": 9.75240506329114e-06, + "loss": 2.9766, + "step": 740 + }, + { + "epoch": 0.04, + "grad_norm": 0.08141326010590223, + "learning_rate": 9.751392405063293e-06, + "loss": 2.1875, + "step": 742 + }, + { + "epoch": 0.04, + "grad_norm": 0.053241870631494596, + "learning_rate": 9.750379746835443e-06, + "loss": 2.3203, + "step": 744 + }, + { + "epoch": 0.04, + "grad_norm": 0.052583485208944805, + "learning_rate": 9.749367088607595e-06, + "loss": 1.3594, + "step": 746 + }, + { + "epoch": 0.04, + "grad_norm": 0.06262550939445087, + "learning_rate": 9.748354430379747e-06, + "loss": 3.8125, + "step": 748 + }, + { + "epoch": 0.04, + "grad_norm": 0.05713627433538821, + "learning_rate": 9.747341772151899e-06, + "loss": 2.9922, + "step": 750 + }, + { + "epoch": 0.04, + "grad_norm": 0.08006391748138383, + "learning_rate": 9.74632911392405e-06, + "loss": 2.6172, + "step": 752 + }, + { + "epoch": 0.04, + "grad_norm": 0.04952527656245675, + "learning_rate": 9.745316455696202e-06, + "loss": 3.6562, + "step": 754 + }, + { + "epoch": 0.04, + "grad_norm": 0.09081931780198628, + "learning_rate": 9.744303797468356e-06, + "loss": 1.4219, + "step": 756 + }, + { + "epoch": 0.04, + "grad_norm": 0.0949847693923792, + "learning_rate": 9.743291139240508e-06, + "loss": 3.4688, + "step": 758 + }, + { + "epoch": 0.04, + "grad_norm": 0.050663076963228845, + "learning_rate": 9.74227848101266e-06, + "loss": 1.0195, + "step": 760 + }, + { + "epoch": 0.04, + "grad_norm": 0.0497400270790924, + "learning_rate": 9.741265822784812e-06, + "loss": 1.3047, + "step": 762 + }, + { + "epoch": 0.04, + "grad_norm": 0.1255160423475118, + "learning_rate": 9.740253164556964e-06, + "loss": 0.7227, + "step": 764 + }, + { + "epoch": 0.04, + "grad_norm": 0.05129981962807472, + "learning_rate": 9.739240506329116e-06, + "loss": 1.4102, + "step": 766 + }, + { + "epoch": 0.04, + "grad_norm": 0.07927376346910217, + "learning_rate": 9.738227848101267e-06, + "loss": 2.9648, + "step": 768 + }, + { + "epoch": 0.04, + "grad_norm": 0.04935552207195148, + "learning_rate": 9.73721518987342e-06, + "loss": 1.3008, + "step": 770 + }, + { + "epoch": 0.04, + "grad_norm": 0.049573584190090544, + "learning_rate": 9.73620253164557e-06, + "loss": 2.9609, + "step": 772 + }, + { + "epoch": 0.04, + "grad_norm": 0.056698471570980416, + "learning_rate": 9.735189873417721e-06, + "loss": 2.2383, + "step": 774 + }, + { + "epoch": 0.04, + "grad_norm": 0.04851942949305104, + "learning_rate": 9.734177215189873e-06, + "loss": 3.0234, + "step": 776 + }, + { + "epoch": 0.04, + "grad_norm": 0.05230940565676036, + "learning_rate": 9.733164556962025e-06, + "loss": 2.4453, + "step": 778 + }, + { + "epoch": 0.04, + "grad_norm": 0.05314584973307411, + "learning_rate": 9.732151898734177e-06, + "loss": 2.6914, + "step": 780 + }, + { + "epoch": 0.04, + "grad_norm": 0.0411558012354048, + "learning_rate": 9.731139240506329e-06, + "loss": 2.9922, + "step": 782 + }, + { + "epoch": 0.04, + "grad_norm": 0.08371388601948121, + "learning_rate": 9.730126582278481e-06, + "loss": 1.3887, + "step": 784 + }, + { + "epoch": 0.04, + "grad_norm": 0.051910963285288374, + "learning_rate": 9.729113924050635e-06, + "loss": 2.2695, + "step": 786 + }, + { + "epoch": 0.04, + "grad_norm": 0.0828403888156875, + "learning_rate": 9.728101265822787e-06, + "loss": 1.623, + "step": 788 + }, + { + "epoch": 0.04, + "grad_norm": 0.0981728100247187, + "learning_rate": 9.727088607594938e-06, + "loss": 1.5234, + "step": 790 + }, + { + "epoch": 0.04, + "grad_norm": 0.04084709431955858, + "learning_rate": 9.72607594936709e-06, + "loss": 1.8711, + "step": 792 + }, + { + "epoch": 0.04, + "grad_norm": 0.07636058795930367, + "learning_rate": 9.725063291139242e-06, + "loss": 4.625, + "step": 794 + }, + { + "epoch": 0.04, + "grad_norm": 0.04264657114245911, + "learning_rate": 9.724050632911394e-06, + "loss": 2.9941, + "step": 796 + }, + { + "epoch": 0.04, + "grad_norm": 0.05140111703740243, + "learning_rate": 9.723037974683544e-06, + "loss": 1.9727, + "step": 798 + }, + { + "epoch": 0.04, + "grad_norm": 0.04100141392608911, + "learning_rate": 9.722025316455696e-06, + "loss": 3.3281, + "step": 800 + }, + { + "epoch": 0.04, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.17064666748047, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4337, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.597, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 800 + }, + { + "epoch": 0.04, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.3984375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.1171875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 19.849475860595703, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5955, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.658, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.385, + "step": 800 + }, + { + "epoch": 0.04, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.8984375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.50338745117188, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9137, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.864, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 800 + }, + { + "epoch": 0.04, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.28125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.25, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.09928512573242, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5899, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.387, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 800 + }, + { + "epoch": 0.04, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.71888732910156, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9099, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.866, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 800 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.62092590332031, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0013, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.818, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 800 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.20923614501953, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7597, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.948, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 800 + }, + { + "epoch": 0.04, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.8671875, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.34375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.94634246826172, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3804, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.907, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 800 + }, + { + "epoch": 0.04, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 27.9296875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 63.28125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.87890625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 49.52406692504883, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.0823, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.593, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.197, + "step": 800 + }, + { + "epoch": 0.04, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.49986267089844, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3374, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.854, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, + "step": 800 + }, + { + "epoch": 0.04, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.51725769042969, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8596, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.17, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.206, + "step": 800 + }, + { + "epoch": 0.04, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.38294219970703, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2007, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.507, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 800 + }, + { + "epoch": 0.04, + "grad_norm": 0.09124114355291447, + "learning_rate": 9.721012658227848e-06, + "loss": 1.7773, + "step": 802 + }, + { + "epoch": 0.04, + "grad_norm": 0.054038518364640103, + "learning_rate": 9.72e-06, + "loss": 1.9258, + "step": 804 + }, + { + "epoch": 0.04, + "grad_norm": 0.039128087533217365, + "learning_rate": 9.718987341772152e-06, + "loss": 3.5234, + "step": 806 + }, + { + "epoch": 0.04, + "grad_norm": 0.08953747686898128, + "learning_rate": 9.717974683544304e-06, + "loss": 1.7305, + "step": 808 + }, + { + "epoch": 0.04, + "grad_norm": 0.08799844856665831, + "learning_rate": 9.716962025316456e-06, + "loss": 3.25, + "step": 810 + }, + { + "epoch": 0.04, + "grad_norm": 0.08959604406929138, + "learning_rate": 9.715949367088608e-06, + "loss": 1.707, + "step": 812 + }, + { + "epoch": 0.04, + "grad_norm": 0.07830294965984795, + "learning_rate": 9.71493670886076e-06, + "loss": 2.4766, + "step": 814 + }, + { + "epoch": 0.04, + "grad_norm": 0.08465842673927493, + "learning_rate": 9.713924050632913e-06, + "loss": 2.5508, + "step": 816 + }, + { + "epoch": 0.04, + "grad_norm": 0.04974901837385099, + "learning_rate": 9.712911392405065e-06, + "loss": 1.6953, + "step": 818 + }, + { + "epoch": 0.04, + "grad_norm": 0.0779470742221457, + "learning_rate": 9.711898734177217e-06, + "loss": 1.666, + "step": 820 + }, + { + "epoch": 0.04, + "grad_norm": 0.0599924747024499, + "learning_rate": 9.710886075949369e-06, + "loss": 4.0312, + "step": 822 + }, + { + "epoch": 0.04, + "grad_norm": 0.04762067121368817, + "learning_rate": 9.70987341772152e-06, + "loss": 2.1953, + "step": 824 + }, + { + "epoch": 0.04, + "grad_norm": 0.058344615339498314, + "learning_rate": 9.708860759493671e-06, + "loss": 2.9648, + "step": 826 + }, + { + "epoch": 0.04, + "grad_norm": 0.08011354558563408, + "learning_rate": 9.707848101265823e-06, + "loss": 1.1094, + "step": 828 + }, + { + "epoch": 0.04, + "grad_norm": 0.04945642122097788, + "learning_rate": 9.706835443037975e-06, + "loss": 2.6172, + "step": 830 + }, + { + "epoch": 0.04, + "grad_norm": 0.050330923165945715, + "learning_rate": 9.705822784810127e-06, + "loss": 1.3594, + "step": 832 + }, + { + "epoch": 0.04, + "grad_norm": 0.055200305484761326, + "learning_rate": 9.704810126582279e-06, + "loss": 2.8359, + "step": 834 + }, + { + "epoch": 0.04, + "grad_norm": 0.07118322079438398, + "learning_rate": 9.70379746835443e-06, + "loss": 2.7109, + "step": 836 + }, + { + "epoch": 0.04, + "grad_norm": 0.08242670176144666, + "learning_rate": 9.702784810126583e-06, + "loss": 3.1641, + "step": 838 + }, + { + "epoch": 0.04, + "grad_norm": 0.13921593334632498, + "learning_rate": 9.701772151898734e-06, + "loss": 2.293, + "step": 840 + }, + { + "epoch": 0.04, + "grad_norm": 0.04956587870814121, + "learning_rate": 9.700759493670886e-06, + "loss": 4.1875, + "step": 842 + }, + { + "epoch": 0.04, + "grad_norm": 0.06392285373789786, + "learning_rate": 9.699746835443038e-06, + "loss": 2.4727, + "step": 844 + }, + { + "epoch": 0.04, + "grad_norm": 0.05315458905982316, + "learning_rate": 9.698734177215192e-06, + "loss": 2.6484, + "step": 846 + }, + { + "epoch": 0.04, + "grad_norm": 0.0666317861392757, + "learning_rate": 9.697721518987344e-06, + "loss": 2.334, + "step": 848 + }, + { + "epoch": 0.04, + "grad_norm": 0.05001469588005142, + "learning_rate": 9.696708860759494e-06, + "loss": 2.2031, + "step": 850 + }, + { + "epoch": 0.04, + "grad_norm": 0.05907508517646925, + "learning_rate": 9.695696202531646e-06, + "loss": 2.3516, + "step": 852 + }, + { + "epoch": 0.04, + "grad_norm": 0.0905842584665758, + "learning_rate": 9.694683544303798e-06, + "loss": 3.1172, + "step": 854 + }, + { + "epoch": 0.04, + "grad_norm": 0.09929452015435283, + "learning_rate": 9.69367088607595e-06, + "loss": 2.6172, + "step": 856 + }, + { + "epoch": 0.04, + "grad_norm": 0.06623645068675771, + "learning_rate": 9.692658227848102e-06, + "loss": 3.875, + "step": 858 + }, + { + "epoch": 0.04, + "grad_norm": 0.048775790301383154, + "learning_rate": 9.691645569620254e-06, + "loss": 2.2676, + "step": 860 + }, + { + "epoch": 0.04, + "grad_norm": 0.06093985556060049, + "learning_rate": 9.690632911392405e-06, + "loss": 3.625, + "step": 862 + }, + { + "epoch": 0.04, + "grad_norm": 0.04870386047009894, + "learning_rate": 9.689620253164557e-06, + "loss": 2.2148, + "step": 864 + }, + { + "epoch": 0.04, + "grad_norm": 0.0576400145878143, + "learning_rate": 9.68860759493671e-06, + "loss": 3.4375, + "step": 866 + }, + { + "epoch": 0.04, + "grad_norm": 0.07046074934432127, + "learning_rate": 9.687594936708861e-06, + "loss": 2.8281, + "step": 868 + }, + { + "epoch": 0.04, + "grad_norm": 0.028188252640599154, + "learning_rate": 9.686582278481013e-06, + "loss": 3.2188, + "step": 870 + }, + { + "epoch": 0.04, + "grad_norm": 0.041067254121758504, + "learning_rate": 9.685569620253165e-06, + "loss": 2.1641, + "step": 872 + }, + { + "epoch": 0.04, + "grad_norm": 0.0605214184651647, + "learning_rate": 9.684556962025317e-06, + "loss": 1.8828, + "step": 874 + }, + { + "epoch": 0.04, + "grad_norm": 0.11912955951896191, + "learning_rate": 9.68354430379747e-06, + "loss": 1.7461, + "step": 876 + }, + { + "epoch": 0.04, + "grad_norm": 0.061897867924752575, + "learning_rate": 9.68253164556962e-06, + "loss": 3.6094, + "step": 878 + }, + { + "epoch": 0.04, + "grad_norm": 0.047340252282043194, + "learning_rate": 9.681518987341773e-06, + "loss": 1.1426, + "step": 880 + }, + { + "epoch": 0.04, + "grad_norm": 0.053958879742531454, + "learning_rate": 9.680506329113925e-06, + "loss": 1.3281, + "step": 882 + }, + { + "epoch": 0.04, + "grad_norm": 0.05998403293897633, + "learning_rate": 9.679493670886076e-06, + "loss": 3.2031, + "step": 884 + }, + { + "epoch": 0.04, + "grad_norm": 0.0527429419199256, + "learning_rate": 9.678481012658228e-06, + "loss": 3.4297, + "step": 886 + }, + { + "epoch": 0.04, + "grad_norm": 0.05507073065314479, + "learning_rate": 9.67746835443038e-06, + "loss": 4.0234, + "step": 888 + }, + { + "epoch": 0.04, + "grad_norm": 0.04697289030607602, + "learning_rate": 9.676455696202532e-06, + "loss": 2.3047, + "step": 890 + }, + { + "epoch": 0.04, + "grad_norm": 0.06050305742169189, + "learning_rate": 9.675443037974684e-06, + "loss": 3.7109, + "step": 892 + }, + { + "epoch": 0.04, + "grad_norm": 0.08237426148645782, + "learning_rate": 9.674430379746836e-06, + "loss": 2.5312, + "step": 894 + }, + { + "epoch": 0.04, + "grad_norm": 0.05394054450123622, + "learning_rate": 9.673417721518988e-06, + "loss": 2.6484, + "step": 896 + }, + { + "epoch": 0.04, + "grad_norm": 0.047424890816645304, + "learning_rate": 9.67240506329114e-06, + "loss": 3.4141, + "step": 898 + }, + { + "epoch": 0.04, + "grad_norm": 0.045427998247335184, + "learning_rate": 9.671392405063292e-06, + "loss": 2.3906, + "step": 900 + }, + { + "epoch": 0.04, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.9453125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.8203125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.61907958984375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1916, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.719, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 900 + }, + { + "epoch": 0.04, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.067502975463867, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.9599, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 21.622, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.338, + "step": 900 + }, + { + "epoch": 0.04, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.28506469726562, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7191, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.971, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 900 + }, + { + "epoch": 0.04, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.890625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.8828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.593994140625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5927, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.386, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 900 + }, + { + "epoch": 0.04, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.39681243896484, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0105, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.813, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 900 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.64515686035156, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6221, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.025, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 900 + }, + { + "epoch": 0.04, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.52625274658203, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.2361, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.696, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 900 + }, + { + "epoch": 0.04, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.265625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.78397369384766, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0766, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.981, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 900 + }, + { + "epoch": 0.04, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.6953125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.8828125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.25852966308594, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.6182, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.392, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.178, + "step": 900 + }, + { + "epoch": 0.04, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 95.5078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.48451232910156, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.4162, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.63, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.135, + "step": 900 + }, + { + "epoch": 0.04, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 90.8203125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.97981262207031, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.6943, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.634, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.213, + "step": 900 + }, + { + "epoch": 0.04, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.25, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.2890625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8671875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.27470016479492, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2779, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.482, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 900 + }, + { + "epoch": 0.05, + "grad_norm": 0.050036580052467146, + "learning_rate": 9.670379746835444e-06, + "loss": 1.7422, + "step": 902 + }, + { + "epoch": 0.05, + "grad_norm": 0.08750306884033199, + "learning_rate": 9.669367088607596e-06, + "loss": 3.0234, + "step": 904 + }, + { + "epoch": 0.05, + "grad_norm": 0.054223431084887544, + "learning_rate": 9.668354430379747e-06, + "loss": 1.2734, + "step": 906 + }, + { + "epoch": 0.05, + "grad_norm": 0.04860806227931339, + "learning_rate": 9.6673417721519e-06, + "loss": 2.5, + "step": 908 + }, + { + "epoch": 0.05, + "grad_norm": 0.046013887268149076, + "learning_rate": 9.666329113924051e-06, + "loss": 2.9844, + "step": 910 + }, + { + "epoch": 0.05, + "grad_norm": 0.050160615800086725, + "learning_rate": 9.665316455696203e-06, + "loss": 2.5547, + "step": 912 + }, + { + "epoch": 0.05, + "grad_norm": 0.04468121541121031, + "learning_rate": 9.664303797468355e-06, + "loss": 1.9375, + "step": 914 + }, + { + "epoch": 0.05, + "grad_norm": 0.04783497737037472, + "learning_rate": 9.663291139240507e-06, + "loss": 2.0352, + "step": 916 + }, + { + "epoch": 0.05, + "grad_norm": 0.09345820833778327, + "learning_rate": 9.662278481012659e-06, + "loss": 2.168, + "step": 918 + }, + { + "epoch": 0.05, + "grad_norm": 0.06951969940170355, + "learning_rate": 9.66126582278481e-06, + "loss": 2.3984, + "step": 920 + }, + { + "epoch": 0.05, + "grad_norm": 0.06585958155557771, + "learning_rate": 9.660253164556963e-06, + "loss": 2.8086, + "step": 922 + }, + { + "epoch": 0.05, + "grad_norm": 0.049140557647652856, + "learning_rate": 9.659240506329115e-06, + "loss": 3.2344, + "step": 924 + }, + { + "epoch": 0.05, + "grad_norm": 0.059633313539989644, + "learning_rate": 9.658227848101267e-06, + "loss": 2.4727, + "step": 926 + }, + { + "epoch": 0.05, + "grad_norm": 0.05415301028520223, + "learning_rate": 9.657215189873418e-06, + "loss": 1.9766, + "step": 928 + }, + { + "epoch": 0.05, + "grad_norm": 0.14115480122341062, + "learning_rate": 9.65620253164557e-06, + "loss": 1.0664, + "step": 930 + }, + { + "epoch": 0.05, + "grad_norm": 0.07533532273665657, + "learning_rate": 9.655189873417722e-06, + "loss": 1.7285, + "step": 932 + }, + { + "epoch": 0.05, + "grad_norm": 0.07803117363115943, + "learning_rate": 9.654177215189874e-06, + "loss": 2.8867, + "step": 934 + }, + { + "epoch": 0.05, + "grad_norm": 0.05760761157164807, + "learning_rate": 9.653164556962026e-06, + "loss": 3.5547, + "step": 936 + }, + { + "epoch": 0.05, + "grad_norm": 0.047013965888968307, + "learning_rate": 9.652151898734178e-06, + "loss": 2.2969, + "step": 938 + }, + { + "epoch": 0.05, + "grad_norm": 0.056829253521407774, + "learning_rate": 9.65113924050633e-06, + "loss": 2.7656, + "step": 940 + }, + { + "epoch": 0.05, + "grad_norm": 0.05157490792319996, + "learning_rate": 9.650126582278482e-06, + "loss": 3.3789, + "step": 942 + }, + { + "epoch": 0.05, + "grad_norm": 0.046949382218649915, + "learning_rate": 9.649113924050634e-06, + "loss": 3.2266, + "step": 944 + }, + { + "epoch": 0.05, + "grad_norm": 0.04753575432612994, + "learning_rate": 9.648101265822786e-06, + "loss": 3.7656, + "step": 946 + }, + { + "epoch": 0.05, + "grad_norm": 0.10061038747859499, + "learning_rate": 9.647088607594938e-06, + "loss": 2.6094, + "step": 948 + }, + { + "epoch": 0.05, + "grad_norm": 0.06412995845649233, + "learning_rate": 9.64607594936709e-06, + "loss": 3.4062, + "step": 950 + }, + { + "epoch": 0.05, + "grad_norm": 0.08623045600554047, + "learning_rate": 9.645063291139241e-06, + "loss": 2.4766, + "step": 952 + }, + { + "epoch": 0.05, + "grad_norm": 0.04602576410444357, + "learning_rate": 9.644050632911393e-06, + "loss": 1.9336, + "step": 954 + }, + { + "epoch": 0.05, + "grad_norm": 0.04350935406033656, + "learning_rate": 9.643037974683545e-06, + "loss": 3.2734, + "step": 956 + }, + { + "epoch": 0.05, + "grad_norm": 0.09300895711041486, + "learning_rate": 9.642025316455697e-06, + "loss": 1.8359, + "step": 958 + }, + { + "epoch": 0.05, + "grad_norm": 0.07751284619736806, + "learning_rate": 9.641012658227849e-06, + "loss": 2.8477, + "step": 960 + }, + { + "epoch": 0.05, + "grad_norm": 0.07144651596010991, + "learning_rate": 9.640000000000001e-06, + "loss": 2.1484, + "step": 962 + }, + { + "epoch": 0.05, + "grad_norm": 0.0453484485109134, + "learning_rate": 9.638987341772153e-06, + "loss": 2.4805, + "step": 964 + }, + { + "epoch": 0.05, + "grad_norm": 0.06045196577803476, + "learning_rate": 9.637974683544305e-06, + "loss": 3.1406, + "step": 966 + }, + { + "epoch": 0.05, + "grad_norm": 0.0781837361792116, + "learning_rate": 9.636962025316457e-06, + "loss": 2.0078, + "step": 968 + }, + { + "epoch": 0.05, + "grad_norm": 0.05631230003710945, + "learning_rate": 9.635949367088609e-06, + "loss": 4.2266, + "step": 970 + }, + { + "epoch": 0.05, + "grad_norm": 0.048879455765286225, + "learning_rate": 9.63493670886076e-06, + "loss": 2.4883, + "step": 972 + }, + { + "epoch": 0.05, + "grad_norm": 0.056321168124699894, + "learning_rate": 9.633924050632912e-06, + "loss": 2.7969, + "step": 974 + }, + { + "epoch": 0.05, + "grad_norm": 0.06945744800844852, + "learning_rate": 9.632911392405064e-06, + "loss": 1.6406, + "step": 976 + }, + { + "epoch": 0.05, + "grad_norm": 0.05138501149962276, + "learning_rate": 9.631898734177216e-06, + "loss": 2.0156, + "step": 978 + }, + { + "epoch": 0.05, + "grad_norm": 0.04910087937385888, + "learning_rate": 9.630886075949368e-06, + "loss": 1.9805, + "step": 980 + }, + { + "epoch": 0.05, + "grad_norm": 0.040761615221141585, + "learning_rate": 9.62987341772152e-06, + "loss": 2.2109, + "step": 982 + }, + { + "epoch": 0.05, + "grad_norm": 0.06756451544128708, + "learning_rate": 9.628860759493672e-06, + "loss": 3.4922, + "step": 984 + }, + { + "epoch": 0.05, + "grad_norm": 0.06468878276562036, + "learning_rate": 9.627848101265824e-06, + "loss": 1.6211, + "step": 986 + }, + { + "epoch": 0.05, + "grad_norm": 0.058701650293584054, + "learning_rate": 9.626835443037976e-06, + "loss": 3.3359, + "step": 988 + }, + { + "epoch": 0.05, + "grad_norm": 0.05275824015630596, + "learning_rate": 9.625822784810128e-06, + "loss": 2.8359, + "step": 990 + }, + { + "epoch": 0.05, + "grad_norm": 0.08322458852258427, + "learning_rate": 9.62481012658228e-06, + "loss": 2.8203, + "step": 992 + }, + { + "epoch": 0.05, + "grad_norm": 0.058744221438640755, + "learning_rate": 9.623797468354431e-06, + "loss": 3.2188, + "step": 994 + }, + { + "epoch": 0.05, + "grad_norm": 0.04628008079307602, + "learning_rate": 9.622784810126583e-06, + "loss": 3.4453, + "step": 996 + }, + { + "epoch": 0.05, + "grad_norm": 0.0431775552063716, + "learning_rate": 9.621772151898735e-06, + "loss": 2.9844, + "step": 998 + }, + { + "epoch": 0.05, + "grad_norm": 0.04889821743736598, + "learning_rate": 9.620759493670887e-06, + "loss": 1.9961, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.9453125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.015625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.9426040649414, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5594, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.537, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.9375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.5859375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.294132232666016, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6865, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.823, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.372, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.5078125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.62860107421875, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0182, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.809, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.03125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.479881286621094, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3734, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.453, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.765625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.69393920898438, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7185, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.971, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.77542114257812, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7697, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.943, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.74609375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.02581787109375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9732, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.832, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.109375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.328125, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.22736358642578, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.149, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.963, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.8125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.93122863769531, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.3156, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.04, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.188, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.3125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.1875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.69808197021484, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.1307, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.975, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.14, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.140625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.6015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.21117401123047, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0989, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.552, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.09375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.50191116333008, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1417, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.526, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, + "step": 1000 + }, + { + "epoch": 0.05, + "grad_norm": 0.1051790754037906, + "learning_rate": 9.619746835443039e-06, + "loss": 3.2422, + "step": 1002 + }, + { + "epoch": 0.05, + "grad_norm": 0.048465183349567016, + "learning_rate": 9.618734177215191e-06, + "loss": 2.8438, + "step": 1004 + }, + { + "epoch": 0.05, + "grad_norm": 0.09111892155303337, + "learning_rate": 9.617721518987343e-06, + "loss": 2.2812, + "step": 1006 + }, + { + "epoch": 0.05, + "grad_norm": 0.04420698460934349, + "learning_rate": 9.616708860759495e-06, + "loss": 1.623, + "step": 1008 + }, + { + "epoch": 0.05, + "grad_norm": 0.06009480694948726, + "learning_rate": 9.615696202531645e-06, + "loss": 3.6016, + "step": 1010 + }, + { + "epoch": 0.05, + "grad_norm": 0.05852060321964003, + "learning_rate": 9.614683544303799e-06, + "loss": 1.9648, + "step": 1012 + }, + { + "epoch": 0.05, + "grad_norm": 0.05124931163805301, + "learning_rate": 9.61367088607595e-06, + "loss": 2.1523, + "step": 1014 + }, + { + "epoch": 0.05, + "grad_norm": 0.04208769859193327, + "learning_rate": 9.612658227848102e-06, + "loss": 3.1094, + "step": 1016 + }, + { + "epoch": 0.05, + "grad_norm": 0.04342283285576621, + "learning_rate": 9.611645569620254e-06, + "loss": 2.0156, + "step": 1018 + }, + { + "epoch": 0.05, + "grad_norm": 0.06310583574382549, + "learning_rate": 9.610632911392406e-06, + "loss": 2.4336, + "step": 1020 + }, + { + "epoch": 0.05, + "grad_norm": 0.0555905224403037, + "learning_rate": 9.609620253164558e-06, + "loss": 1.9141, + "step": 1022 + }, + { + "epoch": 0.05, + "grad_norm": 0.09598113085999012, + "learning_rate": 9.60860759493671e-06, + "loss": 2.4375, + "step": 1024 + }, + { + "epoch": 0.05, + "grad_norm": 0.04483096367031134, + "learning_rate": 9.607594936708862e-06, + "loss": 2.6016, + "step": 1026 + }, + { + "epoch": 0.05, + "grad_norm": 0.06407673346174275, + "learning_rate": 9.606582278481014e-06, + "loss": 3.0781, + "step": 1028 + }, + { + "epoch": 0.05, + "grad_norm": 0.04087651737640146, + "learning_rate": 9.605569620253166e-06, + "loss": 3.0391, + "step": 1030 + }, + { + "epoch": 0.05, + "grad_norm": 0.03836897528630472, + "learning_rate": 9.604556962025318e-06, + "loss": 2.1992, + "step": 1032 + }, + { + "epoch": 0.05, + "grad_norm": 0.05442708888502564, + "learning_rate": 9.60354430379747e-06, + "loss": 2.332, + "step": 1034 + }, + { + "epoch": 0.05, + "grad_norm": 0.06122074521504932, + "learning_rate": 9.602531645569621e-06, + "loss": 3.2031, + "step": 1036 + }, + { + "epoch": 0.05, + "grad_norm": 0.06290658191568707, + "learning_rate": 9.601518987341772e-06, + "loss": 2.4922, + "step": 1038 + }, + { + "epoch": 0.05, + "grad_norm": 0.05438893210599195, + "learning_rate": 9.600506329113924e-06, + "loss": 4.0859, + "step": 1040 + }, + { + "epoch": 0.05, + "grad_norm": 0.049195805473244494, + "learning_rate": 9.599493670886077e-06, + "loss": 2.9766, + "step": 1042 + }, + { + "epoch": 0.05, + "grad_norm": 0.037989359450545684, + "learning_rate": 9.598481012658229e-06, + "loss": 3.1172, + "step": 1044 + }, + { + "epoch": 0.05, + "grad_norm": 0.04042347846520677, + "learning_rate": 9.597468354430381e-06, + "loss": 3.3828, + "step": 1046 + }, + { + "epoch": 0.05, + "grad_norm": 0.056616150978125716, + "learning_rate": 9.596455696202533e-06, + "loss": 3.5156, + "step": 1048 + }, + { + "epoch": 0.05, + "grad_norm": 0.06247782872755643, + "learning_rate": 9.595443037974685e-06, + "loss": 2.5625, + "step": 1050 + }, + { + "epoch": 0.05, + "grad_norm": 0.04270352660849398, + "learning_rate": 9.594430379746837e-06, + "loss": 3.0547, + "step": 1052 + }, + { + "epoch": 0.05, + "grad_norm": 0.06908827189697059, + "learning_rate": 9.593417721518989e-06, + "loss": 2.0293, + "step": 1054 + }, + { + "epoch": 0.05, + "grad_norm": 0.043921742476219626, + "learning_rate": 9.59240506329114e-06, + "loss": 2.2656, + "step": 1056 + }, + { + "epoch": 0.05, + "grad_norm": 0.04369788983343829, + "learning_rate": 9.591392405063292e-06, + "loss": 2.6836, + "step": 1058 + }, + { + "epoch": 0.05, + "grad_norm": 0.08091988614439645, + "learning_rate": 9.590379746835444e-06, + "loss": 3.2656, + "step": 1060 + }, + { + "epoch": 0.05, + "grad_norm": 0.04862948304771229, + "learning_rate": 9.589367088607596e-06, + "loss": 2.8906, + "step": 1062 + }, + { + "epoch": 0.05, + "grad_norm": 0.05063830715511126, + "learning_rate": 9.588354430379747e-06, + "loss": 3.5781, + "step": 1064 + }, + { + "epoch": 0.05, + "grad_norm": 0.06719689705471395, + "learning_rate": 9.587341772151898e-06, + "loss": 2.625, + "step": 1066 + }, + { + "epoch": 0.05, + "grad_norm": 0.042284904505660866, + "learning_rate": 9.58632911392405e-06, + "loss": 3.0859, + "step": 1068 + }, + { + "epoch": 0.05, + "grad_norm": 0.07499442266051606, + "learning_rate": 9.585316455696202e-06, + "loss": 2.9922, + "step": 1070 + }, + { + "epoch": 0.05, + "grad_norm": 0.04871753327913073, + "learning_rate": 9.584303797468356e-06, + "loss": 2.8047, + "step": 1072 + }, + { + "epoch": 0.05, + "grad_norm": 0.05076539268863179, + "learning_rate": 9.583291139240508e-06, + "loss": 1.0117, + "step": 1074 + }, + { + "epoch": 0.05, + "grad_norm": 0.07091122924332464, + "learning_rate": 9.58227848101266e-06, + "loss": 4.0938, + "step": 1076 + }, + { + "epoch": 0.05, + "grad_norm": 0.0532042729713229, + "learning_rate": 9.581265822784812e-06, + "loss": 1.3438, + "step": 1078 + }, + { + "epoch": 0.05, + "grad_norm": 0.04624432240575401, + "learning_rate": 9.580253164556963e-06, + "loss": 2.2188, + "step": 1080 + }, + { + "epoch": 0.05, + "grad_norm": 0.07944419616558628, + "learning_rate": 9.579240506329115e-06, + "loss": 3.0234, + "step": 1082 + }, + { + "epoch": 0.05, + "grad_norm": 0.09200908602869431, + "learning_rate": 9.578227848101267e-06, + "loss": 3.0078, + "step": 1084 + }, + { + "epoch": 0.05, + "grad_norm": 0.07169391038822008, + "learning_rate": 9.57721518987342e-06, + "loss": 3.0977, + "step": 1086 + }, + { + "epoch": 0.05, + "grad_norm": 0.05875376864911037, + "learning_rate": 9.576202531645571e-06, + "loss": 2.8516, + "step": 1088 + }, + { + "epoch": 0.05, + "grad_norm": 0.05735444721093439, + "learning_rate": 9.575189873417721e-06, + "loss": 3.8359, + "step": 1090 + }, + { + "epoch": 0.05, + "grad_norm": 0.07550008738233234, + "learning_rate": 9.574177215189873e-06, + "loss": 3.168, + "step": 1092 + }, + { + "epoch": 0.05, + "grad_norm": 0.0585409741347276, + "learning_rate": 9.573164556962025e-06, + "loss": 3.25, + "step": 1094 + }, + { + "epoch": 0.05, + "grad_norm": 0.052561465436180746, + "learning_rate": 9.572151898734177e-06, + "loss": 1.668, + "step": 1096 + }, + { + "epoch": 0.05, + "grad_norm": 0.06026978886216898, + "learning_rate": 9.571139240506329e-06, + "loss": 3.4219, + "step": 1098 + }, + { + "epoch": 0.06, + "grad_norm": 0.04588854459918244, + "learning_rate": 9.570126582278481e-06, + "loss": 3.2266, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.140625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.69398498535156, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1096, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.761, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.127887725830078, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 3.101, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 20.639, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.322, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.921875, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.43641662597656, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8465, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.901, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.203125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.347381591796875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4238, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.437, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.74319458007812, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0489, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.792, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.18213653564453, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6349, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.018, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.6484375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.3984375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.26298141479492, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.1767, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.726, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.6953125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.21875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 64.98330688476562, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9442, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 77.9296875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.63578414916992, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.6196, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.389, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.178, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.3359375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.015625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.1395263671875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.0366, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.377, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.1, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.68175506591797, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1925, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.326, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.193, + "step": 1100 + }, + { + "epoch": 0.06, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.515625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 65.234375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.298561096191406, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3039, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.474, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1100 + }, + { + "epoch": 0.06, + "grad_norm": 0.05279039568598626, + "learning_rate": 9.569113924050634e-06, + "loss": 1.3438, + "step": 1102 + }, + { + "epoch": 0.06, + "grad_norm": 0.07920092652592756, + "learning_rate": 9.568101265822786e-06, + "loss": 2.4219, + "step": 1104 + }, + { + "epoch": 0.06, + "grad_norm": 0.05941126846638345, + "learning_rate": 9.567088607594938e-06, + "loss": 2.6836, + "step": 1106 + }, + { + "epoch": 0.06, + "grad_norm": 0.04922287399559275, + "learning_rate": 9.56607594936709e-06, + "loss": 3.1406, + "step": 1108 + }, + { + "epoch": 0.06, + "grad_norm": 0.04368593973418131, + "learning_rate": 9.565063291139242e-06, + "loss": 1.9141, + "step": 1110 + }, + { + "epoch": 0.06, + "grad_norm": 0.05961420052796132, + "learning_rate": 9.564050632911394e-06, + "loss": 3.6953, + "step": 1112 + }, + { + "epoch": 0.06, + "grad_norm": 0.057590175212679104, + "learning_rate": 9.563037974683546e-06, + "loss": 3.9531, + "step": 1114 + }, + { + "epoch": 0.06, + "grad_norm": 0.045713870301452, + "learning_rate": 9.562025316455698e-06, + "loss": 2.2109, + "step": 1116 + }, + { + "epoch": 0.06, + "grad_norm": 0.04966124044261182, + "learning_rate": 9.561012658227848e-06, + "loss": 1.5859, + "step": 1118 + }, + { + "epoch": 0.06, + "grad_norm": 0.04931794060312549, + "learning_rate": 9.56e-06, + "loss": 2.6719, + "step": 1120 + }, + { + "epoch": 0.06, + "grad_norm": 0.056823119950011, + "learning_rate": 9.558987341772152e-06, + "loss": 2.375, + "step": 1122 + }, + { + "epoch": 0.06, + "grad_norm": 0.03956124379267595, + "learning_rate": 9.557974683544304e-06, + "loss": 2.7578, + "step": 1124 + }, + { + "epoch": 0.06, + "grad_norm": 0.05956590745616876, + "learning_rate": 9.556962025316456e-06, + "loss": 1.1328, + "step": 1126 + }, + { + "epoch": 0.06, + "grad_norm": 0.05229656063756952, + "learning_rate": 9.555949367088608e-06, + "loss": 2.6602, + "step": 1128 + }, + { + "epoch": 0.06, + "grad_norm": 0.05015559554787169, + "learning_rate": 9.55493670886076e-06, + "loss": 3.3516, + "step": 1130 + }, + { + "epoch": 0.06, + "grad_norm": 0.053045505149946784, + "learning_rate": 9.553924050632913e-06, + "loss": 1.3164, + "step": 1132 + }, + { + "epoch": 0.06, + "grad_norm": 0.049300460314765836, + "learning_rate": 9.552911392405065e-06, + "loss": 3.3359, + "step": 1134 + }, + { + "epoch": 0.06, + "grad_norm": 0.0509355475045662, + "learning_rate": 9.551898734177217e-06, + "loss": 3.2266, + "step": 1136 + }, + { + "epoch": 0.06, + "grad_norm": 0.05684153506318937, + "learning_rate": 9.550886075949369e-06, + "loss": 3.5078, + "step": 1138 + }, + { + "epoch": 0.06, + "grad_norm": 0.06443433054086191, + "learning_rate": 9.54987341772152e-06, + "loss": 1.709, + "step": 1140 + }, + { + "epoch": 0.06, + "grad_norm": 0.08719597593676645, + "learning_rate": 9.548860759493673e-06, + "loss": 1.9453, + "step": 1142 + }, + { + "epoch": 0.06, + "grad_norm": 0.04787175813071486, + "learning_rate": 9.547848101265823e-06, + "loss": 2.6562, + "step": 1144 + }, + { + "epoch": 0.06, + "grad_norm": 0.08201162069341617, + "learning_rate": 9.546835443037975e-06, + "loss": 2.9336, + "step": 1146 + }, + { + "epoch": 0.06, + "grad_norm": 0.1180019803959435, + "learning_rate": 9.545822784810127e-06, + "loss": 2.2656, + "step": 1148 + }, + { + "epoch": 0.06, + "grad_norm": 0.04238823903190658, + "learning_rate": 9.544810126582279e-06, + "loss": 3.2266, + "step": 1150 + }, + { + "epoch": 0.06, + "grad_norm": 0.06004974253490697, + "learning_rate": 9.54379746835443e-06, + "loss": 3.6797, + "step": 1152 + }, + { + "epoch": 0.06, + "grad_norm": 0.058263064089826816, + "learning_rate": 9.542784810126582e-06, + "loss": 3.4688, + "step": 1154 + }, + { + "epoch": 0.06, + "grad_norm": 0.05411841624054384, + "learning_rate": 9.541772151898734e-06, + "loss": 1.75, + "step": 1156 + }, + { + "epoch": 0.06, + "grad_norm": 0.041361774375433126, + "learning_rate": 9.540759493670886e-06, + "loss": 2.9844, + "step": 1158 + }, + { + "epoch": 0.06, + "grad_norm": 0.06539487268354072, + "learning_rate": 9.539746835443038e-06, + "loss": 2.8438, + "step": 1160 + }, + { + "epoch": 0.06, + "grad_norm": 0.0581568382694382, + "learning_rate": 9.538734177215192e-06, + "loss": 2.7734, + "step": 1162 + }, + { + "epoch": 0.06, + "grad_norm": 0.06767073223000843, + "learning_rate": 9.537721518987344e-06, + "loss": 2.0039, + "step": 1164 + }, + { + "epoch": 0.06, + "grad_norm": 0.052823466084387614, + "learning_rate": 9.536708860759496e-06, + "loss": 3.1719, + "step": 1166 + }, + { + "epoch": 0.06, + "grad_norm": 0.09770782062698304, + "learning_rate": 9.535696202531647e-06, + "loss": 2.9375, + "step": 1168 + }, + { + "epoch": 0.06, + "grad_norm": 0.054946196005232324, + "learning_rate": 9.534683544303798e-06, + "loss": 3.1562, + "step": 1170 + }, + { + "epoch": 0.06, + "grad_norm": 0.058723423439784234, + "learning_rate": 9.53367088607595e-06, + "loss": 2.4082, + "step": 1172 + }, + { + "epoch": 0.06, + "grad_norm": 0.05838846764255246, + "learning_rate": 9.532658227848101e-06, + "loss": 3.2734, + "step": 1174 + }, + { + "epoch": 0.06, + "grad_norm": 0.08719324695026287, + "learning_rate": 9.531645569620253e-06, + "loss": 2.9141, + "step": 1176 + }, + { + "epoch": 0.06, + "grad_norm": 0.047306934247421356, + "learning_rate": 9.530632911392405e-06, + "loss": 2.4297, + "step": 1178 + }, + { + "epoch": 0.06, + "grad_norm": 0.050959286466225565, + "learning_rate": 9.529620253164557e-06, + "loss": 1.9648, + "step": 1180 + }, + { + "epoch": 0.06, + "grad_norm": 0.0431167454308949, + "learning_rate": 9.528607594936709e-06, + "loss": 2.5859, + "step": 1182 + }, + { + "epoch": 0.06, + "grad_norm": 0.058744259091542635, + "learning_rate": 9.527594936708861e-06, + "loss": 4.5781, + "step": 1184 + }, + { + "epoch": 0.06, + "grad_norm": 0.04515997872087969, + "learning_rate": 9.526582278481013e-06, + "loss": 2.7422, + "step": 1186 + }, + { + "epoch": 0.06, + "grad_norm": 0.052408032035697646, + "learning_rate": 9.525569620253165e-06, + "loss": 2.4062, + "step": 1188 + }, + { + "epoch": 0.06, + "grad_norm": 0.04951277906648105, + "learning_rate": 9.524556962025317e-06, + "loss": 1.3359, + "step": 1190 + }, + { + "epoch": 0.06, + "grad_norm": 0.0534640838422442, + "learning_rate": 9.52354430379747e-06, + "loss": 1.8633, + "step": 1192 + }, + { + "epoch": 0.06, + "grad_norm": 0.1104300439314054, + "learning_rate": 9.522531645569622e-06, + "loss": 3.7266, + "step": 1194 + }, + { + "epoch": 0.06, + "grad_norm": 0.06492888179270434, + "learning_rate": 9.521518987341772e-06, + "loss": 4.1016, + "step": 1196 + }, + { + "epoch": 0.06, + "grad_norm": 0.05672739428401729, + "learning_rate": 9.520506329113924e-06, + "loss": 1.7734, + "step": 1198 + }, + { + "epoch": 0.06, + "grad_norm": 0.11597608027192513, + "learning_rate": 9.519493670886076e-06, + "loss": 3.2734, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.75, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.00003814697266, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2038, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.712, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.8359375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 16.9921875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.5, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.340431213378906, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7204, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.526, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.368, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.53125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.92469787597656, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8603, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.893, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.0625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.8359375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.614593505859375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3938, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.446, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.26858520507812, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.6973, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.983, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.19760131835938, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.1286, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.751, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.546875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.35404205322266, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7198, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.97, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.2109375, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.4765625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.25, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.49232482910156, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.4377, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.893, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 31.640625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 70.1171875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 53.657962799072266, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.3677, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.051, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.157, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.94774627685547, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.3442, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.187, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87773132324219, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1139, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.515, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, + "step": 1200 + }, + { + "epoch": 0.06, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.03125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 72.0703125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.3194694519043, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2679, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.486, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1200 + }, + { + "epoch": 0.06, + "grad_norm": 0.056524486518832275, + "learning_rate": 9.518481012658228e-06, + "loss": 2.8906, + "step": 1202 + }, + { + "epoch": 0.06, + "grad_norm": 0.06884576916644804, + "learning_rate": 9.51746835443038e-06, + "loss": 2.3203, + "step": 1204 + }, + { + "epoch": 0.06, + "grad_norm": 0.05492846585742184, + "learning_rate": 9.516455696202532e-06, + "loss": 2.4922, + "step": 1206 + }, + { + "epoch": 0.06, + "grad_norm": 0.05842822257833723, + "learning_rate": 9.515443037974684e-06, + "loss": 2.0664, + "step": 1208 + }, + { + "epoch": 0.06, + "grad_norm": 0.05828818675606454, + "learning_rate": 9.514430379746836e-06, + "loss": 3.4531, + "step": 1210 + }, + { + "epoch": 0.06, + "grad_norm": 0.04523097341723584, + "learning_rate": 9.513417721518988e-06, + "loss": 1.3281, + "step": 1212 + }, + { + "epoch": 0.06, + "grad_norm": 0.06246682120852486, + "learning_rate": 9.51240506329114e-06, + "loss": 2.8594, + "step": 1214 + }, + { + "epoch": 0.06, + "grad_norm": 0.059914705380358084, + "learning_rate": 9.511392405063292e-06, + "loss": 2.7891, + "step": 1216 + }, + { + "epoch": 0.06, + "grad_norm": 0.07123881358873205, + "learning_rate": 9.510379746835443e-06, + "loss": 2.9766, + "step": 1218 + }, + { + "epoch": 0.06, + "grad_norm": 0.05839221388528459, + "learning_rate": 9.509367088607595e-06, + "loss": 2.3359, + "step": 1220 + }, + { + "epoch": 0.06, + "grad_norm": 0.06452918622039662, + "learning_rate": 9.508354430379749e-06, + "loss": 2.6523, + "step": 1222 + }, + { + "epoch": 0.06, + "grad_norm": 0.1029877383143472, + "learning_rate": 9.5073417721519e-06, + "loss": 3.0391, + "step": 1224 + }, + { + "epoch": 0.06, + "grad_norm": 0.057089100933038525, + "learning_rate": 9.506329113924051e-06, + "loss": 1.8223, + "step": 1226 + }, + { + "epoch": 0.06, + "grad_norm": 0.05166973185716093, + "learning_rate": 9.505316455696203e-06, + "loss": 1.8984, + "step": 1228 + }, + { + "epoch": 0.06, + "grad_norm": 0.0513550181895363, + "learning_rate": 9.504303797468355e-06, + "loss": 2.2773, + "step": 1230 + }, + { + "epoch": 0.06, + "grad_norm": 0.04847340483605805, + "learning_rate": 9.503291139240507e-06, + "loss": 2.3633, + "step": 1232 + }, + { + "epoch": 0.06, + "grad_norm": 0.04858819186140292, + "learning_rate": 9.502278481012659e-06, + "loss": 2.3047, + "step": 1234 + }, + { + "epoch": 0.06, + "grad_norm": 0.07721735194112753, + "learning_rate": 9.50126582278481e-06, + "loss": 1.8027, + "step": 1236 + }, + { + "epoch": 0.06, + "grad_norm": 0.059295450113036254, + "learning_rate": 9.500253164556963e-06, + "loss": 3.7891, + "step": 1238 + }, + { + "epoch": 0.06, + "grad_norm": 0.05862288743975563, + "learning_rate": 9.499240506329114e-06, + "loss": 2.3359, + "step": 1240 + }, + { + "epoch": 0.06, + "grad_norm": 0.049113536200244313, + "learning_rate": 9.498227848101266e-06, + "loss": 4.2344, + "step": 1242 + }, + { + "epoch": 0.06, + "grad_norm": 0.05863364570109533, + "learning_rate": 9.497215189873418e-06, + "loss": 4.2812, + "step": 1244 + }, + { + "epoch": 0.06, + "grad_norm": 0.058465190758347246, + "learning_rate": 9.49620253164557e-06, + "loss": 3.8906, + "step": 1246 + }, + { + "epoch": 0.06, + "grad_norm": 0.05745728983604673, + "learning_rate": 9.495189873417722e-06, + "loss": 2.4609, + "step": 1248 + }, + { + "epoch": 0.06, + "grad_norm": 0.048940790847378936, + "learning_rate": 9.494177215189874e-06, + "loss": 3.0859, + "step": 1250 + }, + { + "epoch": 0.06, + "grad_norm": 0.04515179075015809, + "learning_rate": 9.493164556962026e-06, + "loss": 3.5078, + "step": 1252 + }, + { + "epoch": 0.06, + "grad_norm": 0.05148185633372578, + "learning_rate": 9.492151898734178e-06, + "loss": 3.8828, + "step": 1254 + }, + { + "epoch": 0.06, + "grad_norm": 0.044922590250043165, + "learning_rate": 9.49113924050633e-06, + "loss": 2.5742, + "step": 1256 + }, + { + "epoch": 0.06, + "grad_norm": 0.049305376386839274, + "learning_rate": 9.490126582278482e-06, + "loss": 4.1719, + "step": 1258 + }, + { + "epoch": 0.06, + "grad_norm": 0.0482360612014124, + "learning_rate": 9.489113924050634e-06, + "loss": 3.2812, + "step": 1260 + }, + { + "epoch": 0.06, + "grad_norm": 0.07422881936481786, + "learning_rate": 9.488101265822785e-06, + "loss": 2.4844, + "step": 1262 + }, + { + "epoch": 0.06, + "grad_norm": 0.04932619932404644, + "learning_rate": 9.487088607594937e-06, + "loss": 1.7988, + "step": 1264 + }, + { + "epoch": 0.06, + "grad_norm": 0.0466248073944634, + "learning_rate": 9.48607594936709e-06, + "loss": 3.2188, + "step": 1266 + }, + { + "epoch": 0.06, + "grad_norm": 0.05597919637602129, + "learning_rate": 9.485063291139241e-06, + "loss": 2.4297, + "step": 1268 + }, + { + "epoch": 0.06, + "grad_norm": 0.05073715551356432, + "learning_rate": 9.484050632911393e-06, + "loss": 1.3438, + "step": 1270 + }, + { + "epoch": 0.06, + "grad_norm": 0.08772414122295938, + "learning_rate": 9.483037974683545e-06, + "loss": 1.6367, + "step": 1272 + }, + { + "epoch": 0.06, + "grad_norm": 0.05087985039711778, + "learning_rate": 9.482025316455697e-06, + "loss": 2.2109, + "step": 1274 + }, + { + "epoch": 0.06, + "grad_norm": 0.08264556266724159, + "learning_rate": 9.481012658227849e-06, + "loss": 3.4297, + "step": 1276 + }, + { + "epoch": 0.06, + "grad_norm": 0.04929517062153266, + "learning_rate": 9.48e-06, + "loss": 1.6758, + "step": 1278 + }, + { + "epoch": 0.06, + "grad_norm": 0.06794824948761272, + "learning_rate": 9.478987341772153e-06, + "loss": 3.2344, + "step": 1280 + }, + { + "epoch": 0.06, + "grad_norm": 0.04781934324935959, + "learning_rate": 9.477974683544305e-06, + "loss": 1.875, + "step": 1282 + }, + { + "epoch": 0.06, + "grad_norm": 0.12420030807854848, + "learning_rate": 9.476962025316456e-06, + "loss": 1.8809, + "step": 1284 + }, + { + "epoch": 0.06, + "grad_norm": 0.10396640173006078, + "learning_rate": 9.475949367088608e-06, + "loss": 3.1992, + "step": 1286 + }, + { + "epoch": 0.06, + "grad_norm": 0.05578843452162679, + "learning_rate": 9.47493670886076e-06, + "loss": 2.5586, + "step": 1288 + }, + { + "epoch": 0.06, + "grad_norm": 0.053276918030993055, + "learning_rate": 9.473924050632912e-06, + "loss": 2.2227, + "step": 1290 + }, + { + "epoch": 0.06, + "grad_norm": 0.04548986934593862, + "learning_rate": 9.472911392405064e-06, + "loss": 2.3203, + "step": 1292 + }, + { + "epoch": 0.06, + "grad_norm": 0.05489945634450488, + "learning_rate": 9.471898734177216e-06, + "loss": 3.2969, + "step": 1294 + }, + { + "epoch": 0.06, + "grad_norm": 0.07382813673170693, + "learning_rate": 9.470886075949368e-06, + "loss": 3.2656, + "step": 1296 + }, + { + "epoch": 0.06, + "grad_norm": 0.059025889512360236, + "learning_rate": 9.46987341772152e-06, + "loss": 2.1094, + "step": 1298 + }, + { + "epoch": 0.07, + "grad_norm": 0.04774363910554431, + "learning_rate": 9.468860759493672e-06, + "loss": 3.3281, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.1875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.9459457397461, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4723, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.579, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.3984375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.09375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.150863647460938, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5115, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.483, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.398, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.8671875, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 67.16609954833984, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8744, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.885, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.84375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.2265625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.61825942993164, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3521, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.459, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.84114837646484, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9535, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.843, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.85847473144531, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6944, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.984, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 87.890625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.45761108398438, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.7009, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.597, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.103, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.203125, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.21710205078125, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2036, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.95, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 77.5390625, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.69415283203125, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7199, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.189, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.96875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.08352661132812, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.2862, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.724, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.121, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.140625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.10311889648438, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.4982, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.64, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.182, + "step": 1300 + }, + { + "epoch": 0.07, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.90625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.7734375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.01654052734375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.261, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.488, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1300 + }, + { + "epoch": 0.07, + "grad_norm": 0.04688377347936095, + "learning_rate": 9.467848101265824e-06, + "loss": 2.5625, + "step": 1302 + }, + { + "epoch": 0.07, + "grad_norm": 0.0535641115572392, + "learning_rate": 9.466835443037976e-06, + "loss": 2.2617, + "step": 1304 + }, + { + "epoch": 0.07, + "grad_norm": 0.04625202248233324, + "learning_rate": 9.465822784810127e-06, + "loss": 2.1016, + "step": 1306 + }, + { + "epoch": 0.07, + "grad_norm": 0.043697642071917904, + "learning_rate": 9.46481012658228e-06, + "loss": 1.168, + "step": 1308 + }, + { + "epoch": 0.07, + "grad_norm": 0.05362168829827214, + "learning_rate": 9.463797468354431e-06, + "loss": 2.5625, + "step": 1310 + }, + { + "epoch": 0.07, + "grad_norm": 0.06975665191217256, + "learning_rate": 9.462784810126583e-06, + "loss": 3.9844, + "step": 1312 + }, + { + "epoch": 0.07, + "grad_norm": 0.046593111547768955, + "learning_rate": 9.461772151898735e-06, + "loss": 2.8438, + "step": 1314 + }, + { + "epoch": 0.07, + "grad_norm": 0.05971039459331579, + "learning_rate": 9.460759493670887e-06, + "loss": 3.0469, + "step": 1316 + }, + { + "epoch": 0.07, + "grad_norm": 0.05051453173596236, + "learning_rate": 9.459746835443039e-06, + "loss": 2.0156, + "step": 1318 + }, + { + "epoch": 0.07, + "grad_norm": 0.07619243735540052, + "learning_rate": 9.45873417721519e-06, + "loss": 3.5625, + "step": 1320 + }, + { + "epoch": 0.07, + "grad_norm": 0.046237781745614805, + "learning_rate": 9.457721518987343e-06, + "loss": 2.3125, + "step": 1322 + }, + { + "epoch": 0.07, + "grad_norm": 0.04485813314355889, + "learning_rate": 9.456708860759495e-06, + "loss": 2.2305, + "step": 1324 + }, + { + "epoch": 0.07, + "grad_norm": 0.06336058879470456, + "learning_rate": 9.455696202531647e-06, + "loss": 1.9297, + "step": 1326 + }, + { + "epoch": 0.07, + "grad_norm": 0.049017667050821896, + "learning_rate": 9.454683544303798e-06, + "loss": 1.8242, + "step": 1328 + }, + { + "epoch": 0.07, + "grad_norm": 0.074566984117343, + "learning_rate": 9.45367088607595e-06, + "loss": 1.793, + "step": 1330 + }, + { + "epoch": 0.07, + "grad_norm": 0.08532472260482739, + "learning_rate": 9.452658227848102e-06, + "loss": 1.4082, + "step": 1332 + }, + { + "epoch": 0.07, + "grad_norm": 0.04695226837525632, + "learning_rate": 9.451645569620254e-06, + "loss": 2.3438, + "step": 1334 + }, + { + "epoch": 0.07, + "grad_norm": 0.04484711521428046, + "learning_rate": 9.450632911392406e-06, + "loss": 3.1562, + "step": 1336 + }, + { + "epoch": 0.07, + "grad_norm": 0.04490462262118974, + "learning_rate": 9.449620253164558e-06, + "loss": 3.8438, + "step": 1338 + }, + { + "epoch": 0.07, + "grad_norm": 0.054645353919445395, + "learning_rate": 9.44860759493671e-06, + "loss": 4.4766, + "step": 1340 + }, + { + "epoch": 0.07, + "grad_norm": 0.09472661165845717, + "learning_rate": 9.447594936708862e-06, + "loss": 2.1719, + "step": 1342 + }, + { + "epoch": 0.07, + "grad_norm": 0.05608091164616737, + "learning_rate": 9.446582278481014e-06, + "loss": 3.0391, + "step": 1344 + }, + { + "epoch": 0.07, + "grad_norm": 0.05956456868043248, + "learning_rate": 9.445569620253166e-06, + "loss": 3.5469, + "step": 1346 + }, + { + "epoch": 0.07, + "grad_norm": 0.0620189985372643, + "learning_rate": 9.444556962025318e-06, + "loss": 3.125, + "step": 1348 + }, + { + "epoch": 0.07, + "grad_norm": 0.04915043549864365, + "learning_rate": 9.44354430379747e-06, + "loss": 3.1875, + "step": 1350 + }, + { + "epoch": 0.07, + "grad_norm": 0.04903168447437554, + "learning_rate": 9.442531645569621e-06, + "loss": 3.5391, + "step": 1352 + }, + { + "epoch": 0.07, + "grad_norm": 0.07603047463550469, + "learning_rate": 9.441518987341773e-06, + "loss": 1.748, + "step": 1354 + }, + { + "epoch": 0.07, + "grad_norm": 0.08795396323964288, + "learning_rate": 9.440506329113923e-06, + "loss": 2.2148, + "step": 1356 + }, + { + "epoch": 0.07, + "grad_norm": 0.06394860010428646, + "learning_rate": 9.439493670886077e-06, + "loss": 2.1758, + "step": 1358 + }, + { + "epoch": 0.07, + "grad_norm": 0.048523865697111014, + "learning_rate": 9.438481012658229e-06, + "loss": 2.6094, + "step": 1360 + }, + { + "epoch": 0.07, + "grad_norm": 0.044744379334593756, + "learning_rate": 9.437468354430381e-06, + "loss": 1.2461, + "step": 1362 + }, + { + "epoch": 0.07, + "grad_norm": 0.06115671450767947, + "learning_rate": 9.436455696202533e-06, + "loss": 1.0859, + "step": 1364 + }, + { + "epoch": 0.07, + "grad_norm": 0.05822151080901583, + "learning_rate": 9.435443037974685e-06, + "loss": 3.4766, + "step": 1366 + }, + { + "epoch": 0.07, + "grad_norm": 0.07973667069949618, + "learning_rate": 9.434430379746837e-06, + "loss": 1.6836, + "step": 1368 + }, + { + "epoch": 0.07, + "grad_norm": 0.05697131019979492, + "learning_rate": 9.433417721518989e-06, + "loss": 3.3516, + "step": 1370 + }, + { + "epoch": 0.07, + "grad_norm": 0.047056398472087486, + "learning_rate": 9.43240506329114e-06, + "loss": 1.9688, + "step": 1372 + }, + { + "epoch": 0.07, + "grad_norm": 0.07977068722005347, + "learning_rate": 9.431392405063292e-06, + "loss": 2.6289, + "step": 1374 + }, + { + "epoch": 0.07, + "grad_norm": 0.05825913569638537, + "learning_rate": 9.430379746835444e-06, + "loss": 3.3594, + "step": 1376 + }, + { + "epoch": 0.07, + "grad_norm": 0.043133510537226256, + "learning_rate": 9.429367088607596e-06, + "loss": 2.8984, + "step": 1378 + }, + { + "epoch": 0.07, + "grad_norm": 0.07160798353070594, + "learning_rate": 9.428354430379748e-06, + "loss": 1.9961, + "step": 1380 + }, + { + "epoch": 0.07, + "grad_norm": 0.043526473443776595, + "learning_rate": 9.4273417721519e-06, + "loss": 2.6641, + "step": 1382 + }, + { + "epoch": 0.07, + "grad_norm": 0.09154773952846151, + "learning_rate": 9.42632911392405e-06, + "loss": 2.6328, + "step": 1384 + }, + { + "epoch": 0.07, + "grad_norm": 0.04607802371298212, + "learning_rate": 9.425316455696202e-06, + "loss": 2.043, + "step": 1386 + }, + { + "epoch": 0.07, + "grad_norm": 0.04583141069326187, + "learning_rate": 9.424303797468356e-06, + "loss": 2.5625, + "step": 1388 + }, + { + "epoch": 0.07, + "grad_norm": 0.047856476589815795, + "learning_rate": 9.423291139240508e-06, + "loss": 2.1719, + "step": 1390 + }, + { + "epoch": 0.07, + "grad_norm": 0.045235948292313685, + "learning_rate": 9.42227848101266e-06, + "loss": 1.7793, + "step": 1392 + }, + { + "epoch": 0.07, + "grad_norm": 0.0728387370082503, + "learning_rate": 9.421265822784811e-06, + "loss": 3.2422, + "step": 1394 + }, + { + "epoch": 0.07, + "grad_norm": 0.038953904358725366, + "learning_rate": 9.420253164556963e-06, + "loss": 2.8125, + "step": 1396 + }, + { + "epoch": 0.07, + "grad_norm": 0.04573349280022919, + "learning_rate": 9.419240506329115e-06, + "loss": 2.6016, + "step": 1398 + }, + { + "epoch": 0.07, + "grad_norm": 0.11480601349227437, + "learning_rate": 9.418227848101267e-06, + "loss": 2.9766, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.1888427734375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2467, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.691, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.984375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.4609375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.395097732543945, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5738, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.866, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.389, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.35719299316406, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9538, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.843, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.2578125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.25, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.98875045776367, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.7454, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.34, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.83100128173828, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9759, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.831, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.09375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.359375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.10130310058594, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.758, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.949, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.9375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.8533935546875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6245, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.024, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 43.359375, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 90.625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.265625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.08191680908203, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.273, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.933, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 87.5, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 66.17656707763672, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7472, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.136, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.174, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.3359375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.69469451904297, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 6.5911, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.71, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.152, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.02898406982422, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9021, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.056, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.204, + "step": 1400 + }, + { + "epoch": 0.07, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 66.6015625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.64873504638672, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.5054, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.412, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1400 + }, + { + "epoch": 0.07, + "grad_norm": 0.055843672095774365, + "learning_rate": 9.417215189873419e-06, + "loss": 2.7344, + "step": 1402 + }, + { + "epoch": 0.07, + "grad_norm": 0.05515526710896152, + "learning_rate": 9.416202531645571e-06, + "loss": 2.4395, + "step": 1404 + }, + { + "epoch": 0.07, + "grad_norm": 0.08982747905012575, + "learning_rate": 9.415189873417723e-06, + "loss": 3.6719, + "step": 1406 + }, + { + "epoch": 0.07, + "grad_norm": 0.05196739669400878, + "learning_rate": 9.414177215189875e-06, + "loss": 1.8359, + "step": 1408 + }, + { + "epoch": 0.07, + "grad_norm": 0.0627278444078641, + "learning_rate": 9.413164556962025e-06, + "loss": 2.2461, + "step": 1410 + }, + { + "epoch": 0.07, + "grad_norm": 0.05884919908680651, + "learning_rate": 9.412151898734177e-06, + "loss": 2.5234, + "step": 1412 + }, + { + "epoch": 0.07, + "grad_norm": 0.05744260264645288, + "learning_rate": 9.411139240506329e-06, + "loss": 2.8867, + "step": 1414 + }, + { + "epoch": 0.07, + "grad_norm": 0.051897095461469187, + "learning_rate": 9.41012658227848e-06, + "loss": 2.0156, + "step": 1416 + }, + { + "epoch": 0.07, + "grad_norm": 0.05879349636390874, + "learning_rate": 9.409113924050634e-06, + "loss": 2.4766, + "step": 1418 + }, + { + "epoch": 0.07, + "grad_norm": 0.04729306777999818, + "learning_rate": 9.408101265822786e-06, + "loss": 1.8867, + "step": 1420 + }, + { + "epoch": 0.07, + "grad_norm": 0.0421728790409431, + "learning_rate": 9.407088607594938e-06, + "loss": 2.5, + "step": 1422 + }, + { + "epoch": 0.07, + "grad_norm": 0.12666988320747452, + "learning_rate": 9.40607594936709e-06, + "loss": 2.2227, + "step": 1424 + }, + { + "epoch": 0.07, + "grad_norm": 0.06732281487807565, + "learning_rate": 9.405063291139242e-06, + "loss": 2.8477, + "step": 1426 + }, + { + "epoch": 0.07, + "grad_norm": 0.03916516818938533, + "learning_rate": 9.404050632911394e-06, + "loss": 2.7266, + "step": 1428 + }, + { + "epoch": 0.07, + "grad_norm": 0.04706147969736972, + "learning_rate": 9.403037974683546e-06, + "loss": 2.6992, + "step": 1430 + }, + { + "epoch": 0.07, + "grad_norm": 0.04955077897200769, + "learning_rate": 9.402025316455698e-06, + "loss": 2.9688, + "step": 1432 + }, + { + "epoch": 0.07, + "grad_norm": 0.04341814358564929, + "learning_rate": 9.40101265822785e-06, + "loss": 2.4688, + "step": 1434 + }, + { + "epoch": 0.07, + "grad_norm": 0.05735382001404066, + "learning_rate": 9.4e-06, + "loss": 2.6055, + "step": 1436 + }, + { + "epoch": 0.07, + "grad_norm": 0.07465431435276457, + "learning_rate": 9.398987341772152e-06, + "loss": 4.1406, + "step": 1438 + }, + { + "epoch": 0.07, + "grad_norm": 0.0420444573512173, + "learning_rate": 9.397974683544304e-06, + "loss": 3.2422, + "step": 1440 + }, + { + "epoch": 0.07, + "grad_norm": 0.05085789187930648, + "learning_rate": 9.396962025316456e-06, + "loss": 1.6133, + "step": 1442 + }, + { + "epoch": 0.07, + "grad_norm": 0.08454571029108997, + "learning_rate": 9.395949367088607e-06, + "loss": 2.0781, + "step": 1444 + }, + { + "epoch": 0.07, + "grad_norm": 0.07139657679173401, + "learning_rate": 9.39493670886076e-06, + "loss": 2.1387, + "step": 1446 + }, + { + "epoch": 0.07, + "grad_norm": 0.04756116820932081, + "learning_rate": 9.393924050632913e-06, + "loss": 2.168, + "step": 1448 + }, + { + "epoch": 0.07, + "grad_norm": 0.04401770959594053, + "learning_rate": 9.392911392405065e-06, + "loss": 3.5078, + "step": 1450 + }, + { + "epoch": 0.07, + "grad_norm": 0.045790448883903524, + "learning_rate": 9.391898734177217e-06, + "loss": 1.9648, + "step": 1452 + }, + { + "epoch": 0.07, + "grad_norm": 0.05377943050007024, + "learning_rate": 9.390886075949369e-06, + "loss": 3.875, + "step": 1454 + }, + { + "epoch": 0.07, + "grad_norm": 0.04606808093150142, + "learning_rate": 9.38987341772152e-06, + "loss": 2.8438, + "step": 1456 + }, + { + "epoch": 0.07, + "grad_norm": 0.04546882059970107, + "learning_rate": 9.388860759493672e-06, + "loss": 1.6523, + "step": 1458 + }, + { + "epoch": 0.07, + "grad_norm": 0.07704364625121993, + "learning_rate": 9.387848101265824e-06, + "loss": 1.1895, + "step": 1460 + }, + { + "epoch": 0.07, + "grad_norm": 0.07143045147801531, + "learning_rate": 9.386835443037976e-06, + "loss": 3.6172, + "step": 1462 + }, + { + "epoch": 0.07, + "grad_norm": 0.06199252590022181, + "learning_rate": 9.385822784810127e-06, + "loss": 2.3066, + "step": 1464 + }, + { + "epoch": 0.07, + "grad_norm": 0.05668568463084169, + "learning_rate": 9.384810126582278e-06, + "loss": 1.084, + "step": 1466 + }, + { + "epoch": 0.07, + "grad_norm": 0.12850516952142793, + "learning_rate": 9.38379746835443e-06, + "loss": 1.5801, + "step": 1468 + }, + { + "epoch": 0.07, + "grad_norm": 0.04329007335741428, + "learning_rate": 9.382784810126582e-06, + "loss": 2.5781, + "step": 1470 + }, + { + "epoch": 0.07, + "grad_norm": 0.09555574181806, + "learning_rate": 9.381772151898734e-06, + "loss": 3.2812, + "step": 1472 + }, + { + "epoch": 0.07, + "grad_norm": 0.05880979405295948, + "learning_rate": 9.380759493670886e-06, + "loss": 2.8203, + "step": 1474 + }, + { + "epoch": 0.07, + "grad_norm": 0.049091451604602945, + "learning_rate": 9.379746835443038e-06, + "loss": 2.9141, + "step": 1476 + }, + { + "epoch": 0.07, + "grad_norm": 0.07680641181557653, + "learning_rate": 9.378734177215192e-06, + "loss": 1.9922, + "step": 1478 + }, + { + "epoch": 0.07, + "grad_norm": 0.057593120381473364, + "learning_rate": 9.377721518987343e-06, + "loss": 2.6523, + "step": 1480 + }, + { + "epoch": 0.07, + "grad_norm": 0.09099344445781292, + "learning_rate": 9.376708860759495e-06, + "loss": 3.0547, + "step": 1482 + }, + { + "epoch": 0.07, + "grad_norm": 0.07127214583778112, + "learning_rate": 9.375696202531647e-06, + "loss": 1.7109, + "step": 1484 + }, + { + "epoch": 0.07, + "grad_norm": 0.04901323516280243, + "learning_rate": 9.3746835443038e-06, + "loss": 1.9648, + "step": 1486 + }, + { + "epoch": 0.07, + "grad_norm": 0.06218888476222176, + "learning_rate": 9.373670886075951e-06, + "loss": 1.1504, + "step": 1488 + }, + { + "epoch": 0.07, + "grad_norm": 0.043001537655645475, + "learning_rate": 9.372658227848101e-06, + "loss": 2.1172, + "step": 1490 + }, + { + "epoch": 0.07, + "grad_norm": 0.058695091585682646, + "learning_rate": 9.371645569620253e-06, + "loss": 3.3984, + "step": 1492 + }, + { + "epoch": 0.07, + "grad_norm": 0.048734630242664174, + "learning_rate": 9.370632911392405e-06, + "loss": 2.9062, + "step": 1494 + }, + { + "epoch": 0.07, + "grad_norm": 0.05142460712334133, + "learning_rate": 9.369620253164557e-06, + "loss": 2.9219, + "step": 1496 + }, + { + "epoch": 0.07, + "grad_norm": 0.048336168592599454, + "learning_rate": 9.368607594936709e-06, + "loss": 1.3008, + "step": 1498 + }, + { + "epoch": 0.07, + "grad_norm": 0.07034287194094196, + "learning_rate": 9.367594936708861e-06, + "loss": 1.1387, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.29191589355469, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3089, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.659, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.984375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.484375, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.72687530517578, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.65, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.151, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.377, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.34400939941406, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0356, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.799, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 85.7421875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.21460723876953, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4385, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.433, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.3046875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.57454681396484, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7972, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.927, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.02752685546875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.861, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.893, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.015625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.59078979492188, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.5358, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.074, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.095, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.6484375, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.9140625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.234375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.64114379882812, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9459, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.15625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 75.78125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 57.90193557739258, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.3191, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.032, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.188, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.2109375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.68386840820312, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.3583, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.179, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.1875, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.2659912109375, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0954, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.56, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.6640625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.1171875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.90107727050781, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.4006, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.444, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1500 + }, + { + "epoch": 0.08, + "grad_norm": 0.058479193123082245, + "learning_rate": 9.366582278481013e-06, + "loss": 3.1406, + "step": 1502 + }, + { + "epoch": 0.08, + "grad_norm": 0.05144200831789539, + "learning_rate": 9.365569620253165e-06, + "loss": 1.9023, + "step": 1504 + }, + { + "epoch": 0.08, + "grad_norm": 0.0415613001994433, + "learning_rate": 9.364556962025317e-06, + "loss": 2.9141, + "step": 1506 + }, + { + "epoch": 0.08, + "grad_norm": 0.05266671122440496, + "learning_rate": 9.36354430379747e-06, + "loss": 2.2188, + "step": 1508 + }, + { + "epoch": 0.08, + "grad_norm": 0.06444994239463296, + "learning_rate": 9.362531645569622e-06, + "loss": 2.6758, + "step": 1510 + }, + { + "epoch": 0.08, + "grad_norm": 0.06834124597210364, + "learning_rate": 9.361518987341774e-06, + "loss": 1.9805, + "step": 1512 + }, + { + "epoch": 0.08, + "grad_norm": 0.06300774724452027, + "learning_rate": 9.360506329113926e-06, + "loss": 2.9531, + "step": 1514 + }, + { + "epoch": 0.08, + "grad_norm": 0.04840310128919646, + "learning_rate": 9.359493670886076e-06, + "loss": 3.5703, + "step": 1516 + }, + { + "epoch": 0.08, + "grad_norm": 0.054293535407710335, + "learning_rate": 9.358481012658228e-06, + "loss": 2.5938, + "step": 1518 + }, + { + "epoch": 0.08, + "grad_norm": 0.04978796968184069, + "learning_rate": 9.35746835443038e-06, + "loss": 3.375, + "step": 1520 + }, + { + "epoch": 0.08, + "grad_norm": 0.06343862426870277, + "learning_rate": 9.356455696202532e-06, + "loss": 2.6953, + "step": 1522 + }, + { + "epoch": 0.08, + "grad_norm": 0.04267087281847612, + "learning_rate": 9.355443037974684e-06, + "loss": 2.5312, + "step": 1524 + }, + { + "epoch": 0.08, + "grad_norm": 0.05536594787449037, + "learning_rate": 9.354430379746836e-06, + "loss": 3.0469, + "step": 1526 + }, + { + "epoch": 0.08, + "grad_norm": 0.048765163359707465, + "learning_rate": 9.353417721518988e-06, + "loss": 1.375, + "step": 1528 + }, + { + "epoch": 0.08, + "grad_norm": 0.03965024597525289, + "learning_rate": 9.35240506329114e-06, + "loss": 2.2383, + "step": 1530 + }, + { + "epoch": 0.08, + "grad_norm": 0.05573923071709513, + "learning_rate": 9.351392405063291e-06, + "loss": 3.3203, + "step": 1532 + }, + { + "epoch": 0.08, + "grad_norm": 0.0479853933542111, + "learning_rate": 9.350379746835443e-06, + "loss": 2.2305, + "step": 1534 + }, + { + "epoch": 0.08, + "grad_norm": 0.040700681106451066, + "learning_rate": 9.349367088607595e-06, + "loss": 3.1953, + "step": 1536 + }, + { + "epoch": 0.08, + "grad_norm": 0.04475551099431461, + "learning_rate": 9.348354430379749e-06, + "loss": 2.1875, + "step": 1538 + }, + { + "epoch": 0.08, + "grad_norm": 0.049553725050291646, + "learning_rate": 9.3473417721519e-06, + "loss": 3.2344, + "step": 1540 + }, + { + "epoch": 0.08, + "grad_norm": 0.04670156490937008, + "learning_rate": 9.346329113924051e-06, + "loss": 1.9141, + "step": 1542 + }, + { + "epoch": 0.08, + "grad_norm": 0.05663231468073976, + "learning_rate": 9.345316455696203e-06, + "loss": 2.2871, + "step": 1544 + }, + { + "epoch": 0.08, + "grad_norm": 0.06318704899297736, + "learning_rate": 9.344303797468355e-06, + "loss": 2.9453, + "step": 1546 + }, + { + "epoch": 0.08, + "grad_norm": 0.059460718933132796, + "learning_rate": 9.343291139240507e-06, + "loss": 3.1484, + "step": 1548 + }, + { + "epoch": 0.08, + "grad_norm": 0.0711919992524241, + "learning_rate": 9.342278481012659e-06, + "loss": 1.1777, + "step": 1550 + }, + { + "epoch": 0.08, + "grad_norm": 0.07330078641859443, + "learning_rate": 9.34126582278481e-06, + "loss": 2.1367, + "step": 1552 + }, + { + "epoch": 0.08, + "grad_norm": 0.08338509817001034, + "learning_rate": 9.340253164556962e-06, + "loss": 2.0977, + "step": 1554 + }, + { + "epoch": 0.08, + "grad_norm": 0.08714453539283397, + "learning_rate": 9.339240506329114e-06, + "loss": 1.6953, + "step": 1556 + }, + { + "epoch": 0.08, + "grad_norm": 0.09924684731192908, + "learning_rate": 9.338227848101266e-06, + "loss": 1.9922, + "step": 1558 + }, + { + "epoch": 0.08, + "grad_norm": 0.038239102309762, + "learning_rate": 9.337215189873418e-06, + "loss": 2.7891, + "step": 1560 + }, + { + "epoch": 0.08, + "grad_norm": 0.05314827892824287, + "learning_rate": 9.33620253164557e-06, + "loss": 2.7109, + "step": 1562 + }, + { + "epoch": 0.08, + "grad_norm": 0.05981892354958667, + "learning_rate": 9.335189873417722e-06, + "loss": 1.1152, + "step": 1564 + }, + { + "epoch": 0.08, + "grad_norm": 0.10543378939286623, + "learning_rate": 9.334177215189874e-06, + "loss": 2.334, + "step": 1566 + }, + { + "epoch": 0.08, + "grad_norm": 0.12095726689494823, + "learning_rate": 9.333164556962027e-06, + "loss": 2.5273, + "step": 1568 + }, + { + "epoch": 0.08, + "grad_norm": 0.04771441150677178, + "learning_rate": 9.332151898734178e-06, + "loss": 3.293, + "step": 1570 + }, + { + "epoch": 0.08, + "grad_norm": 0.05849347076840012, + "learning_rate": 9.33113924050633e-06, + "loss": 3.2422, + "step": 1572 + }, + { + "epoch": 0.08, + "grad_norm": 0.05734448026106365, + "learning_rate": 9.330126582278481e-06, + "loss": 3.1797, + "step": 1574 + }, + { + "epoch": 0.08, + "grad_norm": 0.051223518523306436, + "learning_rate": 9.329113924050633e-06, + "loss": 2.5, + "step": 1576 + }, + { + "epoch": 0.08, + "grad_norm": 0.09701413768488983, + "learning_rate": 9.328101265822785e-06, + "loss": 1.7031, + "step": 1578 + }, + { + "epoch": 0.08, + "grad_norm": 0.048150416298882136, + "learning_rate": 9.327088607594937e-06, + "loss": 2.2734, + "step": 1580 + }, + { + "epoch": 0.08, + "grad_norm": 0.09993600585851185, + "learning_rate": 9.326075949367089e-06, + "loss": 3.0391, + "step": 1582 + }, + { + "epoch": 0.08, + "grad_norm": 0.05125470855427429, + "learning_rate": 9.325063291139241e-06, + "loss": 3.7031, + "step": 1584 + }, + { + "epoch": 0.08, + "grad_norm": 0.09079844714057336, + "learning_rate": 9.324050632911393e-06, + "loss": 1.7188, + "step": 1586 + }, + { + "epoch": 0.08, + "grad_norm": 0.041271469047698434, + "learning_rate": 9.323037974683545e-06, + "loss": 2.4805, + "step": 1588 + }, + { + "epoch": 0.08, + "grad_norm": 0.046549116257717855, + "learning_rate": 9.322025316455697e-06, + "loss": 2.8203, + "step": 1590 + }, + { + "epoch": 0.08, + "grad_norm": 0.04959674743954773, + "learning_rate": 9.321012658227849e-06, + "loss": 2.7188, + "step": 1592 + }, + { + "epoch": 0.08, + "grad_norm": 0.061474583923480014, + "learning_rate": 9.32e-06, + "loss": 3.3984, + "step": 1594 + }, + { + "epoch": 0.08, + "grad_norm": 0.09163191359628134, + "learning_rate": 9.318987341772152e-06, + "loss": 1.6719, + "step": 1596 + }, + { + "epoch": 0.08, + "grad_norm": 0.045633306114650114, + "learning_rate": 9.317974683544304e-06, + "loss": 2.9531, + "step": 1598 + }, + { + "epoch": 0.08, + "grad_norm": 0.044980513613666485, + "learning_rate": 9.316962025316456e-06, + "loss": 2.6484, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.578125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.30206298828125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4303, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.599, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.203125, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.3125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.398147583007812, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6925, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.77, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.371, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.140625, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87109375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.9178695678711, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.066, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.783, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.1796875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.6385498046875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4703, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.3515625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.1825942993164, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8864, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.879, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.75, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.52351379394531, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.4642, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.116, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.096, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.84375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.7890625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.52267837524414, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7581, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.949, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.1875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.20020294189453, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9427, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.3515625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.5625, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.69310760498047, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.267, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.151, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.19, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.140625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.4296875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.63660430908203, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.4904, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.538, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.118, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 45.8984375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 93.359375, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.04869079589844, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0113, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.771, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.2, + "step": 1600 + }, + { + "epoch": 0.08, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 72.65625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.984291076660156, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3615, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.456, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 1600 + }, + { + "epoch": 0.08, + "grad_norm": 0.06245480333735374, + "learning_rate": 9.315949367088608e-06, + "loss": 3.1953, + "step": 1602 + }, + { + "epoch": 0.08, + "grad_norm": 0.055605246607420805, + "learning_rate": 9.31493670886076e-06, + "loss": 2.3262, + "step": 1604 + }, + { + "epoch": 0.08, + "grad_norm": 0.0501153666439784, + "learning_rate": 9.313924050632912e-06, + "loss": 2.6094, + "step": 1606 + }, + { + "epoch": 0.08, + "grad_norm": 0.07462734447902677, + "learning_rate": 9.312911392405064e-06, + "loss": 2.5078, + "step": 1608 + }, + { + "epoch": 0.08, + "grad_norm": 0.05460964561976558, + "learning_rate": 9.311898734177216e-06, + "loss": 2.1641, + "step": 1610 + }, + { + "epoch": 0.08, + "grad_norm": 0.04699422654304627, + "learning_rate": 9.310886075949368e-06, + "loss": 2.8047, + "step": 1612 + }, + { + "epoch": 0.08, + "grad_norm": 0.05171422202408438, + "learning_rate": 9.30987341772152e-06, + "loss": 3.1562, + "step": 1614 + }, + { + "epoch": 0.08, + "grad_norm": 0.053386026278170844, + "learning_rate": 9.308860759493672e-06, + "loss": 1.0215, + "step": 1616 + }, + { + "epoch": 0.08, + "grad_norm": 0.051189341730886406, + "learning_rate": 9.307848101265823e-06, + "loss": 1.9531, + "step": 1618 + }, + { + "epoch": 0.08, + "grad_norm": 0.05831005838918187, + "learning_rate": 9.306835443037975e-06, + "loss": 3.2656, + "step": 1620 + }, + { + "epoch": 0.08, + "grad_norm": 0.04834690668545663, + "learning_rate": 9.305822784810127e-06, + "loss": 2.4219, + "step": 1622 + }, + { + "epoch": 0.08, + "grad_norm": 0.04807847581015744, + "learning_rate": 9.30481012658228e-06, + "loss": 1.6738, + "step": 1624 + }, + { + "epoch": 0.08, + "grad_norm": 0.05549176218970007, + "learning_rate": 9.303797468354431e-06, + "loss": 3.2188, + "step": 1626 + }, + { + "epoch": 0.08, + "grad_norm": 0.07532291608421006, + "learning_rate": 9.302784810126583e-06, + "loss": 3.3516, + "step": 1628 + }, + { + "epoch": 0.08, + "grad_norm": 0.05981979151667419, + "learning_rate": 9.301772151898735e-06, + "loss": 3.7891, + "step": 1630 + }, + { + "epoch": 0.08, + "grad_norm": 0.049378118050634336, + "learning_rate": 9.300759493670887e-06, + "loss": 2.3281, + "step": 1632 + }, + { + "epoch": 0.08, + "grad_norm": 0.05510804145055033, + "learning_rate": 9.299746835443039e-06, + "loss": 2.9375, + "step": 1634 + }, + { + "epoch": 0.08, + "grad_norm": 0.05964761549202754, + "learning_rate": 9.29873417721519e-06, + "loss": 2.6172, + "step": 1636 + }, + { + "epoch": 0.08, + "grad_norm": 0.05041859270183951, + "learning_rate": 9.297721518987343e-06, + "loss": 3.1641, + "step": 1638 + }, + { + "epoch": 0.08, + "grad_norm": 0.04798209622883979, + "learning_rate": 9.296708860759494e-06, + "loss": 1.8809, + "step": 1640 + }, + { + "epoch": 0.08, + "grad_norm": 0.05573792952238686, + "learning_rate": 9.295696202531646e-06, + "loss": 3.1094, + "step": 1642 + }, + { + "epoch": 0.08, + "grad_norm": 0.04795052794632855, + "learning_rate": 9.294683544303798e-06, + "loss": 2.7969, + "step": 1644 + }, + { + "epoch": 0.08, + "grad_norm": 0.12025901515926583, + "learning_rate": 9.29367088607595e-06, + "loss": 2.6758, + "step": 1646 + }, + { + "epoch": 0.08, + "grad_norm": 0.06672087285753964, + "learning_rate": 9.292658227848102e-06, + "loss": 3.9844, + "step": 1648 + }, + { + "epoch": 0.08, + "grad_norm": 0.06282660709028401, + "learning_rate": 9.291645569620254e-06, + "loss": 3.1328, + "step": 1650 + }, + { + "epoch": 0.08, + "grad_norm": 0.058677668875888905, + "learning_rate": 9.290632911392406e-06, + "loss": 3.2188, + "step": 1652 + }, + { + "epoch": 0.08, + "grad_norm": 0.05333625411921265, + "learning_rate": 9.289620253164558e-06, + "loss": 3.5547, + "step": 1654 + }, + { + "epoch": 0.08, + "grad_norm": 0.04452332145815023, + "learning_rate": 9.28860759493671e-06, + "loss": 1.6543, + "step": 1656 + }, + { + "epoch": 0.08, + "grad_norm": 0.12435004639940082, + "learning_rate": 9.287594936708862e-06, + "loss": 2.0605, + "step": 1658 + }, + { + "epoch": 0.08, + "grad_norm": 0.059831108966171234, + "learning_rate": 9.286582278481014e-06, + "loss": 2.6836, + "step": 1660 + }, + { + "epoch": 0.08, + "grad_norm": 0.053703210014712156, + "learning_rate": 9.285569620253165e-06, + "loss": 2.0977, + "step": 1662 + }, + { + "epoch": 0.08, + "grad_norm": 0.0901318471300885, + "learning_rate": 9.284556962025317e-06, + "loss": 2.2734, + "step": 1664 + }, + { + "epoch": 0.08, + "grad_norm": 0.05244016033163033, + "learning_rate": 9.28354430379747e-06, + "loss": 2.9375, + "step": 1666 + }, + { + "epoch": 0.08, + "grad_norm": 0.08174274152119362, + "learning_rate": 9.282531645569621e-06, + "loss": 1.625, + "step": 1668 + }, + { + "epoch": 0.08, + "grad_norm": 0.058284333958605467, + "learning_rate": 9.281518987341773e-06, + "loss": 3.8438, + "step": 1670 + }, + { + "epoch": 0.08, + "grad_norm": 0.04967242799542885, + "learning_rate": 9.280506329113925e-06, + "loss": 1.9492, + "step": 1672 + }, + { + "epoch": 0.08, + "grad_norm": 0.04943855662398272, + "learning_rate": 9.279493670886077e-06, + "loss": 2.582, + "step": 1674 + }, + { + "epoch": 0.08, + "grad_norm": 0.05505936079786661, + "learning_rate": 9.278481012658229e-06, + "loss": 2.6211, + "step": 1676 + }, + { + "epoch": 0.08, + "grad_norm": 0.0461512612947597, + "learning_rate": 9.27746835443038e-06, + "loss": 2.6562, + "step": 1678 + }, + { + "epoch": 0.08, + "grad_norm": 0.04974146645078623, + "learning_rate": 9.276455696202533e-06, + "loss": 3.0703, + "step": 1680 + }, + { + "epoch": 0.08, + "grad_norm": 0.056930263889751004, + "learning_rate": 9.275443037974685e-06, + "loss": 3.5, + "step": 1682 + }, + { + "epoch": 0.08, + "grad_norm": 0.12321897233652232, + "learning_rate": 9.274430379746836e-06, + "loss": 0.9648, + "step": 1684 + }, + { + "epoch": 0.08, + "grad_norm": 0.06047479315003769, + "learning_rate": 9.273417721518988e-06, + "loss": 2.8594, + "step": 1686 + }, + { + "epoch": 0.08, + "grad_norm": 0.048279307732973814, + "learning_rate": 9.27240506329114e-06, + "loss": 2.0195, + "step": 1688 + }, + { + "epoch": 0.08, + "grad_norm": 0.05167384804499145, + "learning_rate": 9.271392405063292e-06, + "loss": 1.3398, + "step": 1690 + }, + { + "epoch": 0.08, + "grad_norm": 0.06352858612327222, + "learning_rate": 9.270379746835444e-06, + "loss": 3.7422, + "step": 1692 + }, + { + "epoch": 0.08, + "grad_norm": 0.06069008364933182, + "learning_rate": 9.269367088607596e-06, + "loss": 3.2969, + "step": 1694 + }, + { + "epoch": 0.08, + "grad_norm": 0.04565140006110923, + "learning_rate": 9.268354430379748e-06, + "loss": 2.3047, + "step": 1696 + }, + { + "epoch": 0.08, + "grad_norm": 0.06061226444027013, + "learning_rate": 9.2673417721519e-06, + "loss": 2.6641, + "step": 1698 + }, + { + "epoch": 0.09, + "grad_norm": 0.06055021661335569, + "learning_rate": 9.266329113924052e-06, + "loss": 2.6523, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.34986877441406, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4029, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.613, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.203125, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 19.810283660888672, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7101, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.615, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.369, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.3125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.96875, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.57865142822266, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.1636, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.733, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.43778228759766, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4692, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.38245391845703, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.761, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.947, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.3125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.38700866699219, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8454, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.901, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.765625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.84552764892578, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8499, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.899, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.6484375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.03125, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.33321380615234, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0186, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.995, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 87.109375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.82962799072266, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.8881, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.869, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.17, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.1640625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.27595520019531, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.3487, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.666, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.12, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87727355957031, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8468, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.205, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.206, + "step": 1700 + }, + { + "epoch": 0.09, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.8984375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.73493194580078, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1071, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.537, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, + "step": 1700 + }, + { + "epoch": 0.09, + "grad_norm": 0.04881214834140313, + "learning_rate": 9.265316455696202e-06, + "loss": 1.5977, + "step": 1702 + }, + { + "epoch": 0.09, + "grad_norm": 0.05453646827872593, + "learning_rate": 9.264303797468356e-06, + "loss": 2.5781, + "step": 1704 + }, + { + "epoch": 0.09, + "grad_norm": 0.06013464957874684, + "learning_rate": 9.263291139240507e-06, + "loss": 0.8926, + "step": 1706 + }, + { + "epoch": 0.09, + "grad_norm": 0.06752355707010797, + "learning_rate": 9.26227848101266e-06, + "loss": 2.2012, + "step": 1708 + }, + { + "epoch": 0.09, + "grad_norm": 0.04740198019318174, + "learning_rate": 9.261265822784811e-06, + "loss": 2.1914, + "step": 1710 + }, + { + "epoch": 0.09, + "grad_norm": 0.06168270499607577, + "learning_rate": 9.260253164556963e-06, + "loss": 3.4688, + "step": 1712 + }, + { + "epoch": 0.09, + "grad_norm": 0.05425417322129795, + "learning_rate": 9.259240506329115e-06, + "loss": 3.1719, + "step": 1714 + }, + { + "epoch": 0.09, + "grad_norm": 0.06609753416942989, + "learning_rate": 9.258227848101267e-06, + "loss": 2.1836, + "step": 1716 + }, + { + "epoch": 0.09, + "grad_norm": 0.04879519305718599, + "learning_rate": 9.257215189873419e-06, + "loss": 1.3203, + "step": 1718 + }, + { + "epoch": 0.09, + "grad_norm": 0.06707616844340519, + "learning_rate": 9.25620253164557e-06, + "loss": 1.1934, + "step": 1720 + }, + { + "epoch": 0.09, + "grad_norm": 0.04621434055706063, + "learning_rate": 9.255189873417723e-06, + "loss": 2.0195, + "step": 1722 + }, + { + "epoch": 0.09, + "grad_norm": 0.07293254248042395, + "learning_rate": 9.254177215189875e-06, + "loss": 2.5, + "step": 1724 + }, + { + "epoch": 0.09, + "grad_norm": 0.048994009049322364, + "learning_rate": 9.253164556962027e-06, + "loss": 2.5898, + "step": 1726 + }, + { + "epoch": 0.09, + "grad_norm": 0.04525483393881031, + "learning_rate": 9.252151898734178e-06, + "loss": 1.2891, + "step": 1728 + }, + { + "epoch": 0.09, + "grad_norm": 0.07582602480871231, + "learning_rate": 9.251139240506329e-06, + "loss": 3.5938, + "step": 1730 + }, + { + "epoch": 0.09, + "grad_norm": 0.050116497906216896, + "learning_rate": 9.25012658227848e-06, + "loss": 1.2734, + "step": 1732 + }, + { + "epoch": 0.09, + "grad_norm": 0.06075701854481855, + "learning_rate": 9.249113924050634e-06, + "loss": 3.4375, + "step": 1734 + }, + { + "epoch": 0.09, + "grad_norm": 0.07519665939809185, + "learning_rate": 9.248101265822786e-06, + "loss": 2.8008, + "step": 1736 + }, + { + "epoch": 0.09, + "grad_norm": 0.056641325864073055, + "learning_rate": 9.247088607594938e-06, + "loss": 2.918, + "step": 1738 + }, + { + "epoch": 0.09, + "grad_norm": 0.042566341445080966, + "learning_rate": 9.24607594936709e-06, + "loss": 2.5625, + "step": 1740 + }, + { + "epoch": 0.09, + "grad_norm": 0.0574531442464776, + "learning_rate": 9.245063291139242e-06, + "loss": 3.5391, + "step": 1742 + }, + { + "epoch": 0.09, + "grad_norm": 0.07581751572601363, + "learning_rate": 9.244050632911394e-06, + "loss": 2.2578, + "step": 1744 + }, + { + "epoch": 0.09, + "grad_norm": 0.04726256151754022, + "learning_rate": 9.243037974683546e-06, + "loss": 4.1797, + "step": 1746 + }, + { + "epoch": 0.09, + "grad_norm": 0.059749398568774544, + "learning_rate": 9.242025316455698e-06, + "loss": 2.9922, + "step": 1748 + }, + { + "epoch": 0.09, + "grad_norm": 0.04809433552690452, + "learning_rate": 9.24101265822785e-06, + "loss": 2.2031, + "step": 1750 + }, + { + "epoch": 0.09, + "grad_norm": 0.04826640079361988, + "learning_rate": 9.240000000000001e-06, + "loss": 1.9219, + "step": 1752 + }, + { + "epoch": 0.09, + "grad_norm": 0.046456755919968916, + "learning_rate": 9.238987341772153e-06, + "loss": 2.7031, + "step": 1754 + }, + { + "epoch": 0.09, + "grad_norm": 0.044265683952215536, + "learning_rate": 9.237974683544303e-06, + "loss": 2.5625, + "step": 1756 + }, + { + "epoch": 0.09, + "grad_norm": 0.073608300381369, + "learning_rate": 9.236962025316455e-06, + "loss": 1.9961, + "step": 1758 + }, + { + "epoch": 0.09, + "grad_norm": 0.048301831144158945, + "learning_rate": 9.235949367088607e-06, + "loss": 3.0078, + "step": 1760 + }, + { + "epoch": 0.09, + "grad_norm": 0.0501519559569957, + "learning_rate": 9.23493670886076e-06, + "loss": 2.0586, + "step": 1762 + }, + { + "epoch": 0.09, + "grad_norm": 0.04577847285196145, + "learning_rate": 9.233924050632913e-06, + "loss": 3.6562, + "step": 1764 + }, + { + "epoch": 0.09, + "grad_norm": 0.04563139274633882, + "learning_rate": 9.232911392405065e-06, + "loss": 2.5703, + "step": 1766 + }, + { + "epoch": 0.09, + "grad_norm": 0.04125628654212963, + "learning_rate": 9.231898734177217e-06, + "loss": 2.8047, + "step": 1768 + }, + { + "epoch": 0.09, + "grad_norm": 0.04008124908648469, + "learning_rate": 9.230886075949368e-06, + "loss": 1.6816, + "step": 1770 + }, + { + "epoch": 0.09, + "grad_norm": 0.05475037868292045, + "learning_rate": 9.22987341772152e-06, + "loss": 4.0625, + "step": 1772 + }, + { + "epoch": 0.09, + "grad_norm": 0.09761848673236384, + "learning_rate": 9.228860759493672e-06, + "loss": 1.5488, + "step": 1774 + }, + { + "epoch": 0.09, + "grad_norm": 0.03963164610530929, + "learning_rate": 9.227848101265824e-06, + "loss": 2.7656, + "step": 1776 + }, + { + "epoch": 0.09, + "grad_norm": 0.05981526276955224, + "learning_rate": 9.226835443037976e-06, + "loss": 2.9609, + "step": 1778 + }, + { + "epoch": 0.09, + "grad_norm": 0.05052165933225442, + "learning_rate": 9.225822784810128e-06, + "loss": 1.3398, + "step": 1780 + }, + { + "epoch": 0.09, + "grad_norm": 0.029083882392062527, + "learning_rate": 9.224810126582278e-06, + "loss": 3.5781, + "step": 1782 + }, + { + "epoch": 0.09, + "grad_norm": 0.05857585196525194, + "learning_rate": 9.22379746835443e-06, + "loss": 3.6719, + "step": 1784 + }, + { + "epoch": 0.09, + "grad_norm": 0.04577420801792604, + "learning_rate": 9.222784810126582e-06, + "loss": 1.7109, + "step": 1786 + }, + { + "epoch": 0.09, + "grad_norm": 0.09168536433981106, + "learning_rate": 9.221772151898734e-06, + "loss": 1.6562, + "step": 1788 + }, + { + "epoch": 0.09, + "grad_norm": 0.0643609862212717, + "learning_rate": 9.220759493670886e-06, + "loss": 3.1484, + "step": 1790 + }, + { + "epoch": 0.09, + "grad_norm": 0.04811850844606396, + "learning_rate": 9.219746835443038e-06, + "loss": 1.1309, + "step": 1792 + }, + { + "epoch": 0.09, + "grad_norm": 0.09587399643250918, + "learning_rate": 9.218734177215191e-06, + "loss": 3.6406, + "step": 1794 + }, + { + "epoch": 0.09, + "grad_norm": 0.05002566010695197, + "learning_rate": 9.217721518987343e-06, + "loss": 2.8125, + "step": 1796 + }, + { + "epoch": 0.09, + "grad_norm": 0.06295928731342211, + "learning_rate": 9.216708860759495e-06, + "loss": 0.9199, + "step": 1798 + }, + { + "epoch": 0.09, + "grad_norm": 0.04770305027941822, + "learning_rate": 9.215696202531647e-06, + "loss": 2.3984, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.3125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.5546875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.60789489746094, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3753, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.626, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.590526580810547, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.557, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.03, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.391, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.578125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.26657104492188, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.6898, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.987, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.8125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.29865264892578, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5334, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.404, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.24208068847656, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.2523, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.688, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.32222747802734, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.9238, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.859, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.2109375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.3515625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.73181915283203, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7931, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.93, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.03125, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.28300476074219, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9696, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.008, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.7578125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.691566467285156, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1729, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.372, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.193, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.1640625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.11920928955078, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.6727, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.38, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.115, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.05628204345703, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.7868, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.37, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.209, + "step": 1800 + }, + { + "epoch": 0.09, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.2734375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.5546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.30010223388672, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 13.8704, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.614, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.072, + "step": 1800 + }, + { + "epoch": 0.09, + "grad_norm": 0.04950437980448833, + "learning_rate": 9.214683544303799e-06, + "loss": 2.0, + "step": 1802 + }, + { + "epoch": 0.09, + "grad_norm": 0.04629660184842734, + "learning_rate": 9.213670886075951e-06, + "loss": 1.6211, + "step": 1804 + }, + { + "epoch": 0.09, + "grad_norm": 0.04969426845613648, + "learning_rate": 9.212658227848103e-06, + "loss": 1.3438, + "step": 1806 + }, + { + "epoch": 0.09, + "grad_norm": 0.06037668180690101, + "learning_rate": 9.211645569620255e-06, + "loss": 2.8711, + "step": 1808 + }, + { + "epoch": 0.09, + "grad_norm": 0.04550840404027804, + "learning_rate": 9.210632911392405e-06, + "loss": 0.9297, + "step": 1810 + }, + { + "epoch": 0.09, + "grad_norm": 0.05465350201216224, + "learning_rate": 9.209620253164557e-06, + "loss": 2.1621, + "step": 1812 + }, + { + "epoch": 0.09, + "grad_norm": 0.0486391200337659, + "learning_rate": 9.208607594936709e-06, + "loss": 2.7812, + "step": 1814 + }, + { + "epoch": 0.09, + "grad_norm": 0.04918824198346791, + "learning_rate": 9.20759493670886e-06, + "loss": 3.1328, + "step": 1816 + }, + { + "epoch": 0.09, + "grad_norm": 0.05974561661788206, + "learning_rate": 9.206582278481013e-06, + "loss": 2.4414, + "step": 1818 + }, + { + "epoch": 0.09, + "grad_norm": 0.0600907115810377, + "learning_rate": 9.205569620253165e-06, + "loss": 2.7109, + "step": 1820 + }, + { + "epoch": 0.09, + "grad_norm": 0.10297627561908523, + "learning_rate": 9.204556962025316e-06, + "loss": 3.5859, + "step": 1822 + }, + { + "epoch": 0.09, + "grad_norm": 0.048003279768414474, + "learning_rate": 9.20354430379747e-06, + "loss": 1.9453, + "step": 1824 + }, + { + "epoch": 0.09, + "grad_norm": 0.05041769450216464, + "learning_rate": 9.202531645569622e-06, + "loss": 2.4531, + "step": 1826 + }, + { + "epoch": 0.09, + "grad_norm": 0.0550317635952239, + "learning_rate": 9.201518987341774e-06, + "loss": 3.0312, + "step": 1828 + }, + { + "epoch": 0.09, + "grad_norm": 0.053536765593840685, + "learning_rate": 9.200506329113926e-06, + "loss": 1.9258, + "step": 1830 + }, + { + "epoch": 0.09, + "grad_norm": 0.06965423447002039, + "learning_rate": 9.199493670886078e-06, + "loss": 3.9922, + "step": 1832 + }, + { + "epoch": 0.09, + "grad_norm": 0.049126054691502434, + "learning_rate": 9.19848101265823e-06, + "loss": 2.2617, + "step": 1834 + }, + { + "epoch": 0.09, + "grad_norm": 0.06392229281055582, + "learning_rate": 9.19746835443038e-06, + "loss": 3.7344, + "step": 1836 + }, + { + "epoch": 0.09, + "grad_norm": 0.056924425469819684, + "learning_rate": 9.196455696202532e-06, + "loss": 3.4531, + "step": 1838 + }, + { + "epoch": 0.09, + "grad_norm": 0.05449698462956753, + "learning_rate": 9.195443037974684e-06, + "loss": 3.8984, + "step": 1840 + }, + { + "epoch": 0.09, + "grad_norm": 0.04942440256487059, + "learning_rate": 9.194430379746836e-06, + "loss": 2.8359, + "step": 1842 + }, + { + "epoch": 0.09, + "grad_norm": 0.038117458370006276, + "learning_rate": 9.193417721518987e-06, + "loss": 3.0078, + "step": 1844 + }, + { + "epoch": 0.09, + "grad_norm": 0.08574255865012372, + "learning_rate": 9.19240506329114e-06, + "loss": 1.7461, + "step": 1846 + }, + { + "epoch": 0.09, + "grad_norm": 0.06234107636512481, + "learning_rate": 9.191392405063291e-06, + "loss": 2.5898, + "step": 1848 + }, + { + "epoch": 0.09, + "grad_norm": 0.037910434556300145, + "learning_rate": 9.190379746835443e-06, + "loss": 2.8594, + "step": 1850 + }, + { + "epoch": 0.09, + "grad_norm": 0.07145740607425113, + "learning_rate": 9.189367088607595e-06, + "loss": 3.1094, + "step": 1852 + }, + { + "epoch": 0.09, + "grad_norm": 0.04669389907423529, + "learning_rate": 9.188354430379749e-06, + "loss": 2.1016, + "step": 1854 + }, + { + "epoch": 0.09, + "grad_norm": 0.04517261096123241, + "learning_rate": 9.1873417721519e-06, + "loss": 1.2969, + "step": 1856 + }, + { + "epoch": 0.09, + "grad_norm": 0.04764797244335215, + "learning_rate": 9.186329113924052e-06, + "loss": 1.3242, + "step": 1858 + }, + { + "epoch": 0.09, + "grad_norm": 0.05860796398372702, + "learning_rate": 9.185316455696204e-06, + "loss": 2.5586, + "step": 1860 + }, + { + "epoch": 0.09, + "grad_norm": 0.06007110457642946, + "learning_rate": 9.184303797468355e-06, + "loss": 3.7266, + "step": 1862 + }, + { + "epoch": 0.09, + "grad_norm": 0.044862363101762844, + "learning_rate": 9.183291139240506e-06, + "loss": 3.3906, + "step": 1864 + }, + { + "epoch": 0.09, + "grad_norm": 0.06256240098904445, + "learning_rate": 9.182278481012658e-06, + "loss": 4.0781, + "step": 1866 + }, + { + "epoch": 0.09, + "grad_norm": 0.049413462774685786, + "learning_rate": 9.18126582278481e-06, + "loss": 2.5625, + "step": 1868 + }, + { + "epoch": 0.09, + "grad_norm": 0.07872266149646259, + "learning_rate": 9.180253164556962e-06, + "loss": 3.0625, + "step": 1870 + }, + { + "epoch": 0.09, + "grad_norm": 0.051236237205907774, + "learning_rate": 9.179240506329114e-06, + "loss": 2.0938, + "step": 1872 + }, + { + "epoch": 0.09, + "grad_norm": 0.06067358494293436, + "learning_rate": 9.178227848101266e-06, + "loss": 2.6484, + "step": 1874 + }, + { + "epoch": 0.09, + "grad_norm": 0.051347557367408814, + "learning_rate": 9.177215189873418e-06, + "loss": 2.25, + "step": 1876 + }, + { + "epoch": 0.09, + "grad_norm": 0.04541107915125041, + "learning_rate": 9.17620253164557e-06, + "loss": 1.5117, + "step": 1878 + }, + { + "epoch": 0.09, + "grad_norm": 0.061742002673103556, + "learning_rate": 9.175189873417722e-06, + "loss": 3.3477, + "step": 1880 + }, + { + "epoch": 0.09, + "grad_norm": 0.04388859983545845, + "learning_rate": 9.174177215189874e-06, + "loss": 2.7969, + "step": 1882 + }, + { + "epoch": 0.09, + "grad_norm": 0.05028624860068555, + "learning_rate": 9.173164556962027e-06, + "loss": 1.3867, + "step": 1884 + }, + { + "epoch": 0.09, + "grad_norm": 0.037567079793479514, + "learning_rate": 9.17215189873418e-06, + "loss": 2.1758, + "step": 1886 + }, + { + "epoch": 0.09, + "grad_norm": 0.07804356741746, + "learning_rate": 9.17113924050633e-06, + "loss": 2.3438, + "step": 1888 + }, + { + "epoch": 0.09, + "grad_norm": 0.060313828023189554, + "learning_rate": 9.170126582278481e-06, + "loss": 3.2109, + "step": 1890 + }, + { + "epoch": 0.09, + "grad_norm": 0.04613124112724252, + "learning_rate": 9.169113924050633e-06, + "loss": 2.7109, + "step": 1892 + }, + { + "epoch": 0.09, + "grad_norm": 0.06529932058523641, + "learning_rate": 9.168101265822785e-06, + "loss": 4.5625, + "step": 1894 + }, + { + "epoch": 0.09, + "grad_norm": 0.027167055894885384, + "learning_rate": 9.167088607594937e-06, + "loss": 3.2383, + "step": 1896 + }, + { + "epoch": 0.09, + "grad_norm": 0.08827428262780188, + "learning_rate": 9.166075949367089e-06, + "loss": 2.5859, + "step": 1898 + }, + { + "epoch": 0.1, + "grad_norm": 0.05137435098021163, + "learning_rate": 9.165063291139241e-06, + "loss": 2.7031, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.140625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.92784118652344, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1295, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.75, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.729602813720703, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7467, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.301, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.364, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.4765625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.89068603515625, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8056, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.923, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 36.1328125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 77.1484375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0078125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 59.08802795410156, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.583, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.389, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.4296875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.7890625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.918670654296875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7193, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.971, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.03221130371094, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6832, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.991, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.09347534179688, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8683, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.889, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.046875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.58374786376953, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0313, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.992, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 43.359375, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 88.671875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 67.02096557617188, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1522, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.422, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.194, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.7265625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.8203125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.63248443603516, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.4707, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.555, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.118, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.39515686035156, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9344, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.97, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.203, + "step": 1900 + }, + { + "epoch": 0.1, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.6640625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.5078125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.77684020996094, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.5273, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.405, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, + "step": 1900 + }, + { + "epoch": 0.1, + "grad_norm": 0.053785170928229424, + "learning_rate": 9.164050632911393e-06, + "loss": 3.8828, + "step": 1902 + }, + { + "epoch": 0.1, + "grad_norm": 0.0461868598426467, + "learning_rate": 9.163037974683545e-06, + "loss": 1.375, + "step": 1904 + }, + { + "epoch": 0.1, + "grad_norm": 0.05002143622006523, + "learning_rate": 9.162025316455697e-06, + "loss": 1.875, + "step": 1906 + }, + { + "epoch": 0.1, + "grad_norm": 0.04870023429722329, + "learning_rate": 9.161012658227848e-06, + "loss": 3.2969, + "step": 1908 + }, + { + "epoch": 0.1, + "grad_norm": 0.05839344995308881, + "learning_rate": 9.16e-06, + "loss": 3.9062, + "step": 1910 + }, + { + "epoch": 0.1, + "grad_norm": 0.08302669871278688, + "learning_rate": 9.158987341772152e-06, + "loss": 1.6152, + "step": 1912 + }, + { + "epoch": 0.1, + "grad_norm": 0.046646267906599995, + "learning_rate": 9.157974683544306e-06, + "loss": 3.2656, + "step": 1914 + }, + { + "epoch": 0.1, + "grad_norm": 0.07758324358919483, + "learning_rate": 9.156962025316456e-06, + "loss": 2.6406, + "step": 1916 + }, + { + "epoch": 0.1, + "grad_norm": 0.07478548742257349, + "learning_rate": 9.155949367088608e-06, + "loss": 2.5898, + "step": 1918 + }, + { + "epoch": 0.1, + "grad_norm": 0.04206241097580769, + "learning_rate": 9.15493670886076e-06, + "loss": 2.6328, + "step": 1920 + }, + { + "epoch": 0.1, + "grad_norm": 0.04248502997322808, + "learning_rate": 9.153924050632912e-06, + "loss": 2.7734, + "step": 1922 + }, + { + "epoch": 0.1, + "grad_norm": 0.0966406030354209, + "learning_rate": 9.152911392405064e-06, + "loss": 3.0234, + "step": 1924 + }, + { + "epoch": 0.1, + "grad_norm": 0.04103611961996913, + "learning_rate": 9.151898734177216e-06, + "loss": 2.2148, + "step": 1926 + }, + { + "epoch": 0.1, + "grad_norm": 0.037762788617241944, + "learning_rate": 9.150886075949368e-06, + "loss": 4.0156, + "step": 1928 + }, + { + "epoch": 0.1, + "grad_norm": 0.05577557670502644, + "learning_rate": 9.14987341772152e-06, + "loss": 3.9766, + "step": 1930 + }, + { + "epoch": 0.1, + "grad_norm": 0.047125922095544774, + "learning_rate": 9.148860759493671e-06, + "loss": 1.9453, + "step": 1932 + }, + { + "epoch": 0.1, + "grad_norm": 0.06720927600831134, + "learning_rate": 9.147848101265823e-06, + "loss": 2.1562, + "step": 1934 + }, + { + "epoch": 0.1, + "grad_norm": 0.026518614274956994, + "learning_rate": 9.146835443037975e-06, + "loss": 3.2344, + "step": 1936 + }, + { + "epoch": 0.1, + "grad_norm": 0.04774969312496838, + "learning_rate": 9.145822784810127e-06, + "loss": 2.8594, + "step": 1938 + }, + { + "epoch": 0.1, + "grad_norm": 0.05511600074734071, + "learning_rate": 9.144810126582279e-06, + "loss": 4.2578, + "step": 1940 + }, + { + "epoch": 0.1, + "grad_norm": 0.048985406763597, + "learning_rate": 9.143797468354431e-06, + "loss": 2.6406, + "step": 1942 + }, + { + "epoch": 0.1, + "grad_norm": 0.05636077840457455, + "learning_rate": 9.142784810126583e-06, + "loss": 3.6094, + "step": 1944 + }, + { + "epoch": 0.1, + "grad_norm": 0.05418663050549037, + "learning_rate": 9.141772151898735e-06, + "loss": 1.3633, + "step": 1946 + }, + { + "epoch": 0.1, + "grad_norm": 0.04458754979305412, + "learning_rate": 9.140759493670887e-06, + "loss": 1.9141, + "step": 1948 + }, + { + "epoch": 0.1, + "grad_norm": 0.12327256395105521, + "learning_rate": 9.139746835443039e-06, + "loss": 3.2422, + "step": 1950 + }, + { + "epoch": 0.1, + "grad_norm": 0.050617511112204895, + "learning_rate": 9.13873417721519e-06, + "loss": 1.793, + "step": 1952 + }, + { + "epoch": 0.1, + "grad_norm": 0.043921037433058696, + "learning_rate": 9.137721518987342e-06, + "loss": 2.6172, + "step": 1954 + }, + { + "epoch": 0.1, + "grad_norm": 0.044706298993781236, + "learning_rate": 9.136708860759494e-06, + "loss": 1.5742, + "step": 1956 + }, + { + "epoch": 0.1, + "grad_norm": 0.09443373497653566, + "learning_rate": 9.135696202531646e-06, + "loss": 2.7266, + "step": 1958 + }, + { + "epoch": 0.1, + "grad_norm": 0.04091474069685536, + "learning_rate": 9.134683544303798e-06, + "loss": 1.6895, + "step": 1960 + }, + { + "epoch": 0.1, + "grad_norm": 0.07368260844955699, + "learning_rate": 9.13367088607595e-06, + "loss": 2.3438, + "step": 1962 + }, + { + "epoch": 0.1, + "grad_norm": 0.04875191940753315, + "learning_rate": 9.132658227848102e-06, + "loss": 2.0039, + "step": 1964 + }, + { + "epoch": 0.1, + "grad_norm": 0.04480736068493538, + "learning_rate": 9.131645569620254e-06, + "loss": 1.9961, + "step": 1966 + }, + { + "epoch": 0.1, + "grad_norm": 0.03828623414436692, + "learning_rate": 9.130632911392406e-06, + "loss": 1.9297, + "step": 1968 + }, + { + "epoch": 0.1, + "grad_norm": 0.05066921180757211, + "learning_rate": 9.129620253164558e-06, + "loss": 1.3828, + "step": 1970 + }, + { + "epoch": 0.1, + "grad_norm": 0.051897470073736116, + "learning_rate": 9.12860759493671e-06, + "loss": 2.8516, + "step": 1972 + }, + { + "epoch": 0.1, + "grad_norm": 0.08454973915446604, + "learning_rate": 9.127594936708861e-06, + "loss": 2.3281, + "step": 1974 + }, + { + "epoch": 0.1, + "grad_norm": 0.051129258223901825, + "learning_rate": 9.126582278481013e-06, + "loss": 3.4531, + "step": 1976 + }, + { + "epoch": 0.1, + "grad_norm": 0.055346604302165245, + "learning_rate": 9.125569620253165e-06, + "loss": 2.3535, + "step": 1978 + }, + { + "epoch": 0.1, + "grad_norm": 0.05440316191711523, + "learning_rate": 9.124556962025317e-06, + "loss": 3.2344, + "step": 1980 + }, + { + "epoch": 0.1, + "grad_norm": 0.04880116433640819, + "learning_rate": 9.123544303797469e-06, + "loss": 1.9531, + "step": 1982 + }, + { + "epoch": 0.1, + "grad_norm": 0.13712174422285836, + "learning_rate": 9.122531645569621e-06, + "loss": 2.0703, + "step": 1984 + }, + { + "epoch": 0.1, + "grad_norm": 0.05736294109541064, + "learning_rate": 9.121518987341773e-06, + "loss": 2.5859, + "step": 1986 + }, + { + "epoch": 0.1, + "grad_norm": 0.05249456121664606, + "learning_rate": 9.120506329113925e-06, + "loss": 2.582, + "step": 1988 + }, + { + "epoch": 0.1, + "grad_norm": 0.05620227956798809, + "learning_rate": 9.119493670886077e-06, + "loss": 3.5391, + "step": 1990 + }, + { + "epoch": 0.1, + "grad_norm": 0.02457150950267937, + "learning_rate": 9.118481012658229e-06, + "loss": 3.2578, + "step": 1992 + }, + { + "epoch": 0.1, + "grad_norm": 0.04719030609534645, + "learning_rate": 9.11746835443038e-06, + "loss": 1.9531, + "step": 1994 + }, + { + "epoch": 0.1, + "grad_norm": 0.1192003241257573, + "learning_rate": 9.116455696202532e-06, + "loss": 3.5938, + "step": 1996 + }, + { + "epoch": 0.1, + "grad_norm": 0.11820927850372065, + "learning_rate": 9.115443037974684e-06, + "loss": 3.5, + "step": 1998 + }, + { + "epoch": 0.1, + "grad_norm": 0.04342049766791241, + "learning_rate": 9.114430379746836e-06, + "loss": 1.9102, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.31722259521484, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1798, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.725, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.0078125, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 19.140625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.005353927612305, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.56, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.0, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.391, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.8515625, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.37664794921875, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9675, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.835, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.671875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.859375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.15817642211914, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.2599, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.488, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.7421875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.71186828613281, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.928, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.856, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.5078125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.7751693725586, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.902, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.87, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.7734146118164, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.75, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.953, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.4296875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.9140625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9296875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.1298828125, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9037, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.024, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.890625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 80.859375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.76201629638672, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1301, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.475, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.195, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.6015625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.22639465332031, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.6422, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.638, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.104, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.24501037597656, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1615, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.4, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.194, + "step": 2000 + }, + { + "epoch": 0.1, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.0078125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 74.0234375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 56.28733444213867, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3188, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.47, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2000 + }, + { + "epoch": 0.1, + "grad_norm": 0.05222602039717748, + "learning_rate": 9.113417721518988e-06, + "loss": 3.9219, + "step": 2002 + }, + { + "epoch": 0.1, + "grad_norm": 0.08077667713595828, + "learning_rate": 9.11240506329114e-06, + "loss": 3.5, + "step": 2004 + }, + { + "epoch": 0.1, + "grad_norm": 0.0742884923114933, + "learning_rate": 9.111392405063292e-06, + "loss": 3.1172, + "step": 2006 + }, + { + "epoch": 0.1, + "grad_norm": 0.11188361384486585, + "learning_rate": 9.110379746835444e-06, + "loss": 1.6562, + "step": 2008 + }, + { + "epoch": 0.1, + "grad_norm": 0.06437902091949863, + "learning_rate": 9.109367088607596e-06, + "loss": 1.2559, + "step": 2010 + }, + { + "epoch": 0.1, + "grad_norm": 0.06530103204629609, + "learning_rate": 9.108354430379748e-06, + "loss": 3.7578, + "step": 2012 + }, + { + "epoch": 0.1, + "grad_norm": 0.060746099738566636, + "learning_rate": 9.1073417721519e-06, + "loss": 2.1758, + "step": 2014 + }, + { + "epoch": 0.1, + "grad_norm": 0.08138424450084206, + "learning_rate": 9.106329113924052e-06, + "loss": 2.5703, + "step": 2016 + }, + { + "epoch": 0.1, + "grad_norm": 0.056468531591402575, + "learning_rate": 9.105316455696203e-06, + "loss": 2.5781, + "step": 2018 + }, + { + "epoch": 0.1, + "grad_norm": 0.042063520801115216, + "learning_rate": 9.104303797468355e-06, + "loss": 3.3672, + "step": 2020 + }, + { + "epoch": 0.1, + "grad_norm": 0.043034170481913506, + "learning_rate": 9.103291139240507e-06, + "loss": 3.8203, + "step": 2022 + }, + { + "epoch": 0.1, + "grad_norm": 0.05769275098821008, + "learning_rate": 9.10227848101266e-06, + "loss": 3.4609, + "step": 2024 + }, + { + "epoch": 0.1, + "grad_norm": 0.04417989427229762, + "learning_rate": 9.101265822784811e-06, + "loss": 2.8594, + "step": 2026 + }, + { + "epoch": 0.1, + "grad_norm": 0.07052014737004543, + "learning_rate": 9.100253164556963e-06, + "loss": 2.1562, + "step": 2028 + }, + { + "epoch": 0.1, + "grad_norm": 0.09688645225943193, + "learning_rate": 9.099240506329115e-06, + "loss": 4.0234, + "step": 2030 + }, + { + "epoch": 0.1, + "grad_norm": 0.04689199169468624, + "learning_rate": 9.098227848101267e-06, + "loss": 2.6406, + "step": 2032 + }, + { + "epoch": 0.1, + "grad_norm": 0.04508824741386778, + "learning_rate": 9.097215189873419e-06, + "loss": 3.1484, + "step": 2034 + }, + { + "epoch": 0.1, + "grad_norm": 0.03670749802610796, + "learning_rate": 9.09620253164557e-06, + "loss": 3.5547, + "step": 2036 + }, + { + "epoch": 0.1, + "grad_norm": 0.0516138977486577, + "learning_rate": 9.095189873417723e-06, + "loss": 1.543, + "step": 2038 + }, + { + "epoch": 0.1, + "grad_norm": 0.0538854278902059, + "learning_rate": 9.094177215189874e-06, + "loss": 2.1797, + "step": 2040 + }, + { + "epoch": 0.1, + "grad_norm": 0.08355958376302385, + "learning_rate": 9.093164556962026e-06, + "loss": 3.7031, + "step": 2042 + }, + { + "epoch": 0.1, + "grad_norm": 0.07888511195179149, + "learning_rate": 9.092151898734178e-06, + "loss": 3.1562, + "step": 2044 + }, + { + "epoch": 0.1, + "grad_norm": 0.044843880698050134, + "learning_rate": 9.09113924050633e-06, + "loss": 1.9375, + "step": 2046 + }, + { + "epoch": 0.1, + "grad_norm": 0.0844975914287397, + "learning_rate": 9.09012658227848e-06, + "loss": 3.4961, + "step": 2048 + }, + { + "epoch": 0.1, + "grad_norm": 0.0488064193164904, + "learning_rate": 9.089113924050634e-06, + "loss": 2.0195, + "step": 2050 + }, + { + "epoch": 0.1, + "grad_norm": 0.08197222584340348, + "learning_rate": 9.088101265822786e-06, + "loss": 2.3711, + "step": 2052 + }, + { + "epoch": 0.1, + "grad_norm": 0.04561496764349859, + "learning_rate": 9.087088607594938e-06, + "loss": 2.6445, + "step": 2054 + }, + { + "epoch": 0.1, + "grad_norm": 0.044886212083239126, + "learning_rate": 9.08607594936709e-06, + "loss": 2.9141, + "step": 2056 + }, + { + "epoch": 0.1, + "grad_norm": 0.050742476130656494, + "learning_rate": 9.085063291139242e-06, + "loss": 2.9531, + "step": 2058 + }, + { + "epoch": 0.1, + "grad_norm": 0.05563293598990602, + "learning_rate": 9.084050632911394e-06, + "loss": 3.5703, + "step": 2060 + }, + { + "epoch": 0.1, + "grad_norm": 0.051513608597992276, + "learning_rate": 9.083037974683545e-06, + "loss": 1.0918, + "step": 2062 + }, + { + "epoch": 0.1, + "grad_norm": 0.039394328240253074, + "learning_rate": 9.082025316455697e-06, + "loss": 2.9141, + "step": 2064 + }, + { + "epoch": 0.1, + "grad_norm": 0.04942604191134841, + "learning_rate": 9.08101265822785e-06, + "loss": 2.3398, + "step": 2066 + }, + { + "epoch": 0.1, + "grad_norm": 0.053334874655355094, + "learning_rate": 9.080000000000001e-06, + "loss": 3.9453, + "step": 2068 + }, + { + "epoch": 0.1, + "grad_norm": 0.067724388386229, + "learning_rate": 9.078987341772153e-06, + "loss": 1.1211, + "step": 2070 + }, + { + "epoch": 0.1, + "grad_norm": 0.046290634426354706, + "learning_rate": 9.077974683544305e-06, + "loss": 1.3906, + "step": 2072 + }, + { + "epoch": 0.1, + "grad_norm": 0.07936121862274997, + "learning_rate": 9.076962025316457e-06, + "loss": 1.373, + "step": 2074 + }, + { + "epoch": 0.1, + "grad_norm": 0.049220064217566195, + "learning_rate": 9.075949367088607e-06, + "loss": 4.2734, + "step": 2076 + }, + { + "epoch": 0.1, + "grad_norm": 0.04754414632707146, + "learning_rate": 9.074936708860759e-06, + "loss": 2.4922, + "step": 2078 + }, + { + "epoch": 0.1, + "grad_norm": 0.04787318801734571, + "learning_rate": 9.073924050632913e-06, + "loss": 1.3906, + "step": 2080 + }, + { + "epoch": 0.1, + "grad_norm": 0.1139301835727937, + "learning_rate": 9.072911392405065e-06, + "loss": 2.3945, + "step": 2082 + }, + { + "epoch": 0.1, + "grad_norm": 0.04584280660291769, + "learning_rate": 9.071898734177216e-06, + "loss": 1.8242, + "step": 2084 + }, + { + "epoch": 0.1, + "grad_norm": 0.10964906789491767, + "learning_rate": 9.070886075949368e-06, + "loss": 1.5605, + "step": 2086 + }, + { + "epoch": 0.1, + "grad_norm": 0.04726672407716711, + "learning_rate": 9.06987341772152e-06, + "loss": 4.1562, + "step": 2088 + }, + { + "epoch": 0.1, + "grad_norm": 0.042038330796539077, + "learning_rate": 9.068860759493672e-06, + "loss": 3.1953, + "step": 2090 + }, + { + "epoch": 0.1, + "grad_norm": 0.05953614239851778, + "learning_rate": 9.067848101265824e-06, + "loss": 3.1562, + "step": 2092 + }, + { + "epoch": 0.1, + "grad_norm": 0.04562048501461819, + "learning_rate": 9.066835443037976e-06, + "loss": 2.2109, + "step": 2094 + }, + { + "epoch": 0.1, + "grad_norm": 0.07193685505579554, + "learning_rate": 9.065822784810128e-06, + "loss": 4.0078, + "step": 2096 + }, + { + "epoch": 0.1, + "grad_norm": 0.07965518457460447, + "learning_rate": 9.06481012658228e-06, + "loss": 2.0742, + "step": 2098 + }, + { + "epoch": 0.1, + "grad_norm": 0.048523563405114496, + "learning_rate": 9.063797468354432e-06, + "loss": 3.9141, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.40625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.81912994384766, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3194, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.654, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 11.328125, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 26.171875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4453125, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 23.504255294799805, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5172, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.425, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.397, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 91.796875, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.86204528808594, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0751, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.779, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.6953125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 78.90625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 59.85957717895508, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.2226, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.5, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.4296875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.3984375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.35692596435547, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8162, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.917, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.43053436279297, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7984, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.927, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.7734375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.671875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.77860260009766, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.7879, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.539, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.102, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.0625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9140625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.59601593017578, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3736, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.909, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.71875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.91558074951172, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7232, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.183, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.8203125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.49028015136719, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 6.6111, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.681, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.151, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.39799499511719, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0332, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.716, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.199, + "step": 2100 + }, + { + "epoch": 0.1, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.90625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.7280387878418, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1246, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.531, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, + "step": 2100 + }, + { + "epoch": 0.11, + "grad_norm": 0.07387735249473802, + "learning_rate": 9.062784810126582e-06, + "loss": 2.5391, + "step": 2102 + }, + { + "epoch": 0.11, + "grad_norm": 0.0458105010880384, + "learning_rate": 9.061772151898734e-06, + "loss": 2.8047, + "step": 2104 + }, + { + "epoch": 0.11, + "grad_norm": 0.12684186314779236, + "learning_rate": 9.060759493670886e-06, + "loss": 1.7695, + "step": 2106 + }, + { + "epoch": 0.11, + "grad_norm": 0.08325164548755534, + "learning_rate": 9.059746835443038e-06, + "loss": 3.1602, + "step": 2108 + }, + { + "epoch": 0.11, + "grad_norm": 0.04912619687489667, + "learning_rate": 9.058734177215191e-06, + "loss": 3.5547, + "step": 2110 + }, + { + "epoch": 0.11, + "grad_norm": 0.039371876159854616, + "learning_rate": 9.057721518987343e-06, + "loss": 3.1875, + "step": 2112 + }, + { + "epoch": 0.11, + "grad_norm": 0.055597342693624215, + "learning_rate": 9.056708860759495e-06, + "loss": 1.7031, + "step": 2114 + }, + { + "epoch": 0.11, + "grad_norm": 0.07703184419677474, + "learning_rate": 9.055696202531647e-06, + "loss": 4.5859, + "step": 2116 + }, + { + "epoch": 0.11, + "grad_norm": 0.05945928380987463, + "learning_rate": 9.054683544303799e-06, + "loss": 3.2578, + "step": 2118 + }, + { + "epoch": 0.11, + "grad_norm": 0.06653723000877092, + "learning_rate": 9.05367088607595e-06, + "loss": 3.5391, + "step": 2120 + }, + { + "epoch": 0.11, + "grad_norm": 0.0570261593728403, + "learning_rate": 9.052658227848103e-06, + "loss": 2.6641, + "step": 2122 + }, + { + "epoch": 0.11, + "grad_norm": 0.05441555245366215, + "learning_rate": 9.051645569620255e-06, + "loss": 2.793, + "step": 2124 + }, + { + "epoch": 0.11, + "grad_norm": 0.05697224198374822, + "learning_rate": 9.050632911392407e-06, + "loss": 2.6133, + "step": 2126 + }, + { + "epoch": 0.11, + "grad_norm": 0.04949291905010763, + "learning_rate": 9.049620253164557e-06, + "loss": 1.9219, + "step": 2128 + }, + { + "epoch": 0.11, + "grad_norm": 0.05592274580805365, + "learning_rate": 9.048607594936709e-06, + "loss": 2.6289, + "step": 2130 + }, + { + "epoch": 0.11, + "grad_norm": 0.0455284168193864, + "learning_rate": 9.04759493670886e-06, + "loss": 3.0234, + "step": 2132 + }, + { + "epoch": 0.11, + "grad_norm": 0.05619766438175372, + "learning_rate": 9.046582278481012e-06, + "loss": 3.0703, + "step": 2134 + }, + { + "epoch": 0.11, + "grad_norm": 0.06472864649519333, + "learning_rate": 9.045569620253164e-06, + "loss": 1.7246, + "step": 2136 + }, + { + "epoch": 0.11, + "grad_norm": 0.04752637126015769, + "learning_rate": 9.044556962025316e-06, + "loss": 2.3828, + "step": 2138 + }, + { + "epoch": 0.11, + "grad_norm": 0.04792697472111959, + "learning_rate": 9.04354430379747e-06, + "loss": 1.7344, + "step": 2140 + }, + { + "epoch": 0.11, + "grad_norm": 0.04742442196081877, + "learning_rate": 9.042531645569622e-06, + "loss": 2.875, + "step": 2142 + }, + { + "epoch": 0.11, + "grad_norm": 0.05231304647118661, + "learning_rate": 9.041518987341774e-06, + "loss": 2.5781, + "step": 2144 + }, + { + "epoch": 0.11, + "grad_norm": 0.04065182251997917, + "learning_rate": 9.040506329113926e-06, + "loss": 3.125, + "step": 2146 + }, + { + "epoch": 0.11, + "grad_norm": 0.05189222525590392, + "learning_rate": 9.039493670886077e-06, + "loss": 2.4062, + "step": 2148 + }, + { + "epoch": 0.11, + "grad_norm": 0.04470197614035536, + "learning_rate": 9.03848101265823e-06, + "loss": 1.3398, + "step": 2150 + }, + { + "epoch": 0.11, + "grad_norm": 0.04976772591738967, + "learning_rate": 9.037468354430381e-06, + "loss": 2.5781, + "step": 2152 + }, + { + "epoch": 0.11, + "grad_norm": 0.07822486038531574, + "learning_rate": 9.036455696202533e-06, + "loss": 2.1875, + "step": 2154 + }, + { + "epoch": 0.11, + "grad_norm": 0.04786407540653388, + "learning_rate": 9.035443037974683e-06, + "loss": 3.125, + "step": 2156 + }, + { + "epoch": 0.11, + "grad_norm": 0.043433457581407584, + "learning_rate": 9.034430379746835e-06, + "loss": 1.75, + "step": 2158 + }, + { + "epoch": 0.11, + "grad_norm": 0.057060320757604435, + "learning_rate": 9.033417721518987e-06, + "loss": 2.5781, + "step": 2160 + }, + { + "epoch": 0.11, + "grad_norm": 0.043933465473367316, + "learning_rate": 9.03240506329114e-06, + "loss": 3.4766, + "step": 2162 + }, + { + "epoch": 0.11, + "grad_norm": 0.04949802297665267, + "learning_rate": 9.031392405063291e-06, + "loss": 2.7891, + "step": 2164 + }, + { + "epoch": 0.11, + "grad_norm": 0.04954968413272497, + "learning_rate": 9.030379746835443e-06, + "loss": 2.1211, + "step": 2166 + }, + { + "epoch": 0.11, + "grad_norm": 0.05060258024534153, + "learning_rate": 9.029367088607595e-06, + "loss": 3.1641, + "step": 2168 + }, + { + "epoch": 0.11, + "grad_norm": 0.04724895305426068, + "learning_rate": 9.028354430379748e-06, + "loss": 3.4766, + "step": 2170 + }, + { + "epoch": 0.11, + "grad_norm": 0.03995820056141506, + "learning_rate": 9.0273417721519e-06, + "loss": 2.6797, + "step": 2172 + }, + { + "epoch": 0.11, + "grad_norm": 0.05166613358528966, + "learning_rate": 9.026329113924052e-06, + "loss": 1.9453, + "step": 2174 + }, + { + "epoch": 0.11, + "grad_norm": 0.07654649362821431, + "learning_rate": 9.025316455696204e-06, + "loss": 0.8867, + "step": 2176 + }, + { + "epoch": 0.11, + "grad_norm": 0.048252976519986994, + "learning_rate": 9.024303797468356e-06, + "loss": 2.6445, + "step": 2178 + }, + { + "epoch": 0.11, + "grad_norm": 0.08881036283074599, + "learning_rate": 9.023291139240508e-06, + "loss": 3.1016, + "step": 2180 + }, + { + "epoch": 0.11, + "grad_norm": 0.04284117329093428, + "learning_rate": 9.022278481012658e-06, + "loss": 2.3125, + "step": 2182 + }, + { + "epoch": 0.11, + "grad_norm": 0.04474664022993633, + "learning_rate": 9.02126582278481e-06, + "loss": 2.8828, + "step": 2184 + }, + { + "epoch": 0.11, + "grad_norm": 0.03174705200783846, + "learning_rate": 9.020253164556962e-06, + "loss": 4.5469, + "step": 2186 + }, + { + "epoch": 0.11, + "grad_norm": 0.04487331242944554, + "learning_rate": 9.019240506329114e-06, + "loss": 3.6172, + "step": 2188 + }, + { + "epoch": 0.11, + "grad_norm": 0.06609003075152096, + "learning_rate": 9.018227848101266e-06, + "loss": 2.5938, + "step": 2190 + }, + { + "epoch": 0.11, + "grad_norm": 0.07497844137842792, + "learning_rate": 9.017215189873418e-06, + "loss": 3.9453, + "step": 2192 + }, + { + "epoch": 0.11, + "grad_norm": 0.06233797267984074, + "learning_rate": 9.01620253164557e-06, + "loss": 2.3945, + "step": 2194 + }, + { + "epoch": 0.11, + "grad_norm": 0.05036065260023731, + "learning_rate": 9.015189873417722e-06, + "loss": 2.9766, + "step": 2196 + }, + { + "epoch": 0.11, + "grad_norm": 0.0546727501187819, + "learning_rate": 9.014177215189874e-06, + "loss": 3.8047, + "step": 2198 + }, + { + "epoch": 0.11, + "grad_norm": 0.08060857074075317, + "learning_rate": 9.013164556962027e-06, + "loss": 1.6562, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.1640625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.55633544921875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3241, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.652, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.7890625, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.3125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.747766494750977, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.93, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 21.843, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.341, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.8984375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.41649627685547, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8324, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.908, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.203125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.23518371582031, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.249, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.492, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.59375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.68541717529297, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.2193, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.704, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.95918273925781, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7864, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.933, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.048095703125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.8185, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.518, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.102, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9765625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.1825942993164, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0967, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.976, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.109375, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.7109375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.018646240234375, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.285, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.11, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.189, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.75, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.67472076416016, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.6155, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.656, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.104, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 41.796875, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.453125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.64289855957031, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.3255, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.018, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.188, + "step": 2200 + }, + { + "epoch": 0.11, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.4921875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.1875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8671875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.89695358276367, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3591, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.457, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2200 + }, + { + "epoch": 0.11, + "grad_norm": 0.040314831021747495, + "learning_rate": 9.012151898734179e-06, + "loss": 2.1406, + "step": 2202 + }, + { + "epoch": 0.11, + "grad_norm": 0.047171810341216105, + "learning_rate": 9.011139240506331e-06, + "loss": 1.418, + "step": 2204 + }, + { + "epoch": 0.11, + "grad_norm": 0.048262845473527605, + "learning_rate": 9.010126582278483e-06, + "loss": 1.4023, + "step": 2206 + }, + { + "epoch": 0.11, + "grad_norm": 0.05154581332768385, + "learning_rate": 9.009113924050633e-06, + "loss": 1.3281, + "step": 2208 + }, + { + "epoch": 0.11, + "grad_norm": 0.04717605493865237, + "learning_rate": 9.008101265822785e-06, + "loss": 3.1172, + "step": 2210 + }, + { + "epoch": 0.11, + "grad_norm": 0.0539025659615322, + "learning_rate": 9.007088607594937e-06, + "loss": 3.0234, + "step": 2212 + }, + { + "epoch": 0.11, + "grad_norm": 0.04953877670880636, + "learning_rate": 9.006075949367089e-06, + "loss": 3.3828, + "step": 2214 + }, + { + "epoch": 0.11, + "grad_norm": 0.04434792675801702, + "learning_rate": 9.00506329113924e-06, + "loss": 2.4766, + "step": 2216 + }, + { + "epoch": 0.11, + "grad_norm": 0.04454442496488195, + "learning_rate": 9.004050632911393e-06, + "loss": 1.3398, + "step": 2218 + }, + { + "epoch": 0.11, + "grad_norm": 0.06255015953249068, + "learning_rate": 9.003037974683545e-06, + "loss": 2.0781, + "step": 2220 + }, + { + "epoch": 0.11, + "grad_norm": 0.05884765805099793, + "learning_rate": 9.002025316455696e-06, + "loss": 2.5977, + "step": 2222 + }, + { + "epoch": 0.11, + "grad_norm": 0.04479049825022292, + "learning_rate": 9.001012658227848e-06, + "loss": 2.8984, + "step": 2224 + }, + { + "epoch": 0.11, + "grad_norm": 0.07160542458432002, + "learning_rate": 9e-06, + "loss": 3.0781, + "step": 2226 + }, + { + "epoch": 0.11, + "grad_norm": 0.06552493223138955, + "learning_rate": 8.998987341772152e-06, + "loss": 2.9062, + "step": 2228 + }, + { + "epoch": 0.11, + "grad_norm": 0.05722711956677415, + "learning_rate": 8.997974683544306e-06, + "loss": 3.8516, + "step": 2230 + }, + { + "epoch": 0.11, + "grad_norm": 0.06535531913348491, + "learning_rate": 8.996962025316458e-06, + "loss": 2.4883, + "step": 2232 + }, + { + "epoch": 0.11, + "grad_norm": 0.07461287917151682, + "learning_rate": 8.995949367088608e-06, + "loss": 2.1797, + "step": 2234 + }, + { + "epoch": 0.11, + "grad_norm": 0.1164959969597713, + "learning_rate": 8.99493670886076e-06, + "loss": 2.2891, + "step": 2236 + }, + { + "epoch": 0.11, + "grad_norm": 0.045805685191550656, + "learning_rate": 8.993924050632912e-06, + "loss": 1.2891, + "step": 2238 + }, + { + "epoch": 0.11, + "grad_norm": 0.12476488442819236, + "learning_rate": 8.992911392405064e-06, + "loss": 2.5742, + "step": 2240 + }, + { + "epoch": 0.11, + "grad_norm": 0.055790341758965224, + "learning_rate": 8.991898734177215e-06, + "loss": 3.75, + "step": 2242 + }, + { + "epoch": 0.11, + "grad_norm": 0.10690788499980759, + "learning_rate": 8.990886075949367e-06, + "loss": 2.041, + "step": 2244 + }, + { + "epoch": 0.11, + "grad_norm": 0.0468540914912707, + "learning_rate": 8.98987341772152e-06, + "loss": 2.3203, + "step": 2246 + }, + { + "epoch": 0.11, + "grad_norm": 0.0431115651648458, + "learning_rate": 8.988860759493671e-06, + "loss": 3.4219, + "step": 2248 + }, + { + "epoch": 0.11, + "grad_norm": 0.045633132639230405, + "learning_rate": 8.987848101265823e-06, + "loss": 2.5391, + "step": 2250 + }, + { + "epoch": 0.11, + "grad_norm": 0.04884449922669781, + "learning_rate": 8.986835443037975e-06, + "loss": 1.8633, + "step": 2252 + }, + { + "epoch": 0.11, + "grad_norm": 0.05455381581326918, + "learning_rate": 8.985822784810127e-06, + "loss": 2.1719, + "step": 2254 + }, + { + "epoch": 0.11, + "grad_norm": 0.05550927886059573, + "learning_rate": 8.984810126582279e-06, + "loss": 3.0625, + "step": 2256 + }, + { + "epoch": 0.11, + "grad_norm": 0.07083931236183479, + "learning_rate": 8.98379746835443e-06, + "loss": 2.4297, + "step": 2258 + }, + { + "epoch": 0.11, + "grad_norm": 0.0446496468728146, + "learning_rate": 8.982784810126584e-06, + "loss": 3.6328, + "step": 2260 + }, + { + "epoch": 0.11, + "grad_norm": 0.06287031438083145, + "learning_rate": 8.981772151898735e-06, + "loss": 2.8906, + "step": 2262 + }, + { + "epoch": 0.11, + "grad_norm": 0.052572239536472096, + "learning_rate": 8.980759493670886e-06, + "loss": 4.25, + "step": 2264 + }, + { + "epoch": 0.11, + "grad_norm": 0.053437322827513935, + "learning_rate": 8.979746835443038e-06, + "loss": 2.832, + "step": 2266 + }, + { + "epoch": 0.11, + "grad_norm": 0.060520529783522306, + "learning_rate": 8.97873417721519e-06, + "loss": 3.6172, + "step": 2268 + }, + { + "epoch": 0.11, + "grad_norm": 0.04523560344019551, + "learning_rate": 8.977721518987342e-06, + "loss": 1.3125, + "step": 2270 + }, + { + "epoch": 0.11, + "grad_norm": 0.05590894648628289, + "learning_rate": 8.976708860759494e-06, + "loss": 2.6523, + "step": 2272 + }, + { + "epoch": 0.11, + "grad_norm": 0.08261328512133413, + "learning_rate": 8.975696202531646e-06, + "loss": 3.0156, + "step": 2274 + }, + { + "epoch": 0.11, + "grad_norm": 0.07630333499348366, + "learning_rate": 8.974683544303798e-06, + "loss": 2.4922, + "step": 2276 + }, + { + "epoch": 0.11, + "grad_norm": 0.07586013753293634, + "learning_rate": 8.97367088607595e-06, + "loss": 1.3711, + "step": 2278 + }, + { + "epoch": 0.11, + "grad_norm": 0.041416353553989003, + "learning_rate": 8.972658227848102e-06, + "loss": 2.6406, + "step": 2280 + }, + { + "epoch": 0.11, + "grad_norm": 0.05881140734182516, + "learning_rate": 8.971645569620254e-06, + "loss": 2.625, + "step": 2282 + }, + { + "epoch": 0.11, + "grad_norm": 0.05417846806548981, + "learning_rate": 8.970632911392406e-06, + "loss": 3.5859, + "step": 2284 + }, + { + "epoch": 0.11, + "grad_norm": 0.05058786589212933, + "learning_rate": 8.969620253164557e-06, + "loss": 2.3828, + "step": 2286 + }, + { + "epoch": 0.11, + "grad_norm": 0.04866282377765518, + "learning_rate": 8.96860759493671e-06, + "loss": 2.6211, + "step": 2288 + }, + { + "epoch": 0.11, + "grad_norm": 0.044517872080821724, + "learning_rate": 8.967594936708861e-06, + "loss": 2.8359, + "step": 2290 + }, + { + "epoch": 0.11, + "grad_norm": 0.04184764002943263, + "learning_rate": 8.966582278481013e-06, + "loss": 2.4922, + "step": 2292 + }, + { + "epoch": 0.11, + "grad_norm": 0.04532587269240339, + "learning_rate": 8.965569620253165e-06, + "loss": 2.2891, + "step": 2294 + }, + { + "epoch": 0.11, + "grad_norm": 0.04478519315755515, + "learning_rate": 8.964556962025317e-06, + "loss": 1.207, + "step": 2296 + }, + { + "epoch": 0.11, + "grad_norm": 0.05864167836092175, + "learning_rate": 8.963544303797469e-06, + "loss": 3.8438, + "step": 2298 + }, + { + "epoch": 0.12, + "grad_norm": 0.04066455404468236, + "learning_rate": 8.96253164556962e-06, + "loss": 3.1484, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.921875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.7734375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.36105346679688, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4734, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.578, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.005577087402344, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6634, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.029, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.375, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.578125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87109375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.31172943115234, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0054, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.815, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.2109375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9765625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.35186767578125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3948, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.446, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.2877197265625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.626, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 6.023, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.0522689819336, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8329, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.908, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.5234375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.30658721923828, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9321, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.854, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.984375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.79490661621094, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2273, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.944, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.3046875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.4921875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.55287170410156, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.0029, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.793, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.2, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.578125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.79608154296875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.4517, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.771, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.106, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.1640625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 90.234375, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.19181060791016, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.4139, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.821, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.185, + "step": 2300 + }, + { + "epoch": 0.12, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.09375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.697837829589844, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3425, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.462, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2300 + }, + { + "epoch": 0.12, + "grad_norm": 0.043993231026225224, + "learning_rate": 8.961518987341773e-06, + "loss": 2.5391, + "step": 2302 + }, + { + "epoch": 0.12, + "grad_norm": 0.07591932256289878, + "learning_rate": 8.960506329113925e-06, + "loss": 2.6289, + "step": 2304 + }, + { + "epoch": 0.12, + "grad_norm": 0.04323148596719756, + "learning_rate": 8.959493670886077e-06, + "loss": 2.6133, + "step": 2306 + }, + { + "epoch": 0.12, + "grad_norm": 0.05536397134631646, + "learning_rate": 8.958481012658228e-06, + "loss": 2.9922, + "step": 2308 + }, + { + "epoch": 0.12, + "grad_norm": 0.062026491433053485, + "learning_rate": 8.95746835443038e-06, + "loss": 4.6875, + "step": 2310 + }, + { + "epoch": 0.12, + "grad_norm": 0.040402043492967124, + "learning_rate": 8.956455696202532e-06, + "loss": 3.0938, + "step": 2312 + }, + { + "epoch": 0.12, + "grad_norm": 0.06112096476692111, + "learning_rate": 8.955443037974684e-06, + "loss": 2.9453, + "step": 2314 + }, + { + "epoch": 0.12, + "grad_norm": 0.0541853801140486, + "learning_rate": 8.954430379746836e-06, + "loss": 3.5781, + "step": 2316 + }, + { + "epoch": 0.12, + "grad_norm": 0.03930248511010282, + "learning_rate": 8.953417721518988e-06, + "loss": 3.4375, + "step": 2318 + }, + { + "epoch": 0.12, + "grad_norm": 0.0992215450899402, + "learning_rate": 8.95240506329114e-06, + "loss": 3.2734, + "step": 2320 + }, + { + "epoch": 0.12, + "grad_norm": 0.04215743712338384, + "learning_rate": 8.951392405063292e-06, + "loss": 2.1719, + "step": 2322 + }, + { + "epoch": 0.12, + "grad_norm": 0.07320172463749858, + "learning_rate": 8.950379746835444e-06, + "loss": 2.3633, + "step": 2324 + }, + { + "epoch": 0.12, + "grad_norm": 0.04884678722418488, + "learning_rate": 8.949367088607596e-06, + "loss": 2.2852, + "step": 2326 + }, + { + "epoch": 0.12, + "grad_norm": 0.05442499270066741, + "learning_rate": 8.948354430379748e-06, + "loss": 1.0449, + "step": 2328 + }, + { + "epoch": 0.12, + "grad_norm": 0.04890507356916708, + "learning_rate": 8.9473417721519e-06, + "loss": 1.9414, + "step": 2330 + }, + { + "epoch": 0.12, + "grad_norm": 0.05921324433195504, + "learning_rate": 8.946329113924051e-06, + "loss": 3.4922, + "step": 2332 + }, + { + "epoch": 0.12, + "grad_norm": 0.05700700134972175, + "learning_rate": 8.945316455696203e-06, + "loss": 4.2266, + "step": 2334 + }, + { + "epoch": 0.12, + "grad_norm": 0.04541638034291379, + "learning_rate": 8.944303797468355e-06, + "loss": 1.3086, + "step": 2336 + }, + { + "epoch": 0.12, + "grad_norm": 0.07271819550368797, + "learning_rate": 8.943291139240507e-06, + "loss": 1.9688, + "step": 2338 + }, + { + "epoch": 0.12, + "grad_norm": 0.043434576598254726, + "learning_rate": 8.942278481012659e-06, + "loss": 2.5234, + "step": 2340 + }, + { + "epoch": 0.12, + "grad_norm": 0.08763628291954113, + "learning_rate": 8.941265822784811e-06, + "loss": 3.1094, + "step": 2342 + }, + { + "epoch": 0.12, + "grad_norm": 0.04318677253013843, + "learning_rate": 8.940253164556963e-06, + "loss": 1.8672, + "step": 2344 + }, + { + "epoch": 0.12, + "grad_norm": 0.04196947059950058, + "learning_rate": 8.939240506329115e-06, + "loss": 2.5781, + "step": 2346 + }, + { + "epoch": 0.12, + "grad_norm": 0.09224427543881804, + "learning_rate": 8.938227848101267e-06, + "loss": 2.3379, + "step": 2348 + }, + { + "epoch": 0.12, + "grad_norm": 0.04235448599344302, + "learning_rate": 8.937215189873419e-06, + "loss": 3.4609, + "step": 2350 + }, + { + "epoch": 0.12, + "grad_norm": 0.046992432997166135, + "learning_rate": 8.93620253164557e-06, + "loss": 1.8672, + "step": 2352 + }, + { + "epoch": 0.12, + "grad_norm": 0.08273873473919732, + "learning_rate": 8.935189873417722e-06, + "loss": 2.4688, + "step": 2354 + }, + { + "epoch": 0.12, + "grad_norm": 0.025447840598404273, + "learning_rate": 8.934177215189874e-06, + "loss": 4.2188, + "step": 2356 + }, + { + "epoch": 0.12, + "grad_norm": 0.08535506101635452, + "learning_rate": 8.933164556962026e-06, + "loss": 1.6816, + "step": 2358 + }, + { + "epoch": 0.12, + "grad_norm": 0.08728930553305825, + "learning_rate": 8.932151898734178e-06, + "loss": 1.1211, + "step": 2360 + }, + { + "epoch": 0.12, + "grad_norm": 0.04555766185257788, + "learning_rate": 8.93113924050633e-06, + "loss": 3.1328, + "step": 2362 + }, + { + "epoch": 0.12, + "grad_norm": 0.04353537890607082, + "learning_rate": 8.930126582278482e-06, + "loss": 2.0352, + "step": 2364 + }, + { + "epoch": 0.12, + "grad_norm": 0.0506482491708173, + "learning_rate": 8.929113924050634e-06, + "loss": 0.7012, + "step": 2366 + }, + { + "epoch": 0.12, + "grad_norm": 0.0651253804771874, + "learning_rate": 8.928101265822786e-06, + "loss": 1.4453, + "step": 2368 + }, + { + "epoch": 0.12, + "grad_norm": 0.03876162527654146, + "learning_rate": 8.927088607594938e-06, + "loss": 2.9375, + "step": 2370 + }, + { + "epoch": 0.12, + "grad_norm": 0.05776458612139321, + "learning_rate": 8.92607594936709e-06, + "loss": 3.5938, + "step": 2372 + }, + { + "epoch": 0.12, + "grad_norm": 0.04748463323682337, + "learning_rate": 8.925063291139241e-06, + "loss": 1.9531, + "step": 2374 + }, + { + "epoch": 0.12, + "grad_norm": 0.07599693468952615, + "learning_rate": 8.924050632911393e-06, + "loss": 2.9648, + "step": 2376 + }, + { + "epoch": 0.12, + "grad_norm": 0.043300969936869965, + "learning_rate": 8.923037974683545e-06, + "loss": 3.2656, + "step": 2378 + }, + { + "epoch": 0.12, + "grad_norm": 0.044206076071945946, + "learning_rate": 8.922025316455697e-06, + "loss": 2.8438, + "step": 2380 + }, + { + "epoch": 0.12, + "grad_norm": 0.1016382796671544, + "learning_rate": 8.921012658227849e-06, + "loss": 2.8828, + "step": 2382 + }, + { + "epoch": 0.12, + "grad_norm": 0.055427975006479106, + "learning_rate": 8.920000000000001e-06, + "loss": 2.5938, + "step": 2384 + }, + { + "epoch": 0.12, + "grad_norm": 0.05624485894723525, + "learning_rate": 8.918987341772153e-06, + "loss": 3.1797, + "step": 2386 + }, + { + "epoch": 0.12, + "grad_norm": 0.045076970655017604, + "learning_rate": 8.917974683544305e-06, + "loss": 1.8633, + "step": 2388 + }, + { + "epoch": 0.12, + "grad_norm": 0.046547060461273486, + "learning_rate": 8.916962025316457e-06, + "loss": 2.2148, + "step": 2390 + }, + { + "epoch": 0.12, + "grad_norm": 0.05549173701507732, + "learning_rate": 8.915949367088609e-06, + "loss": 4.1797, + "step": 2392 + }, + { + "epoch": 0.12, + "grad_norm": 0.05839806902651177, + "learning_rate": 8.914936708860759e-06, + "loss": 3.2109, + "step": 2394 + }, + { + "epoch": 0.12, + "grad_norm": 0.05540163940146562, + "learning_rate": 8.913924050632912e-06, + "loss": 3.8516, + "step": 2396 + }, + { + "epoch": 0.12, + "grad_norm": 0.054197388695415565, + "learning_rate": 8.912911392405064e-06, + "loss": 3.3438, + "step": 2398 + }, + { + "epoch": 0.12, + "grad_norm": 0.1269306456397681, + "learning_rate": 8.911898734177216e-06, + "loss": 1.6523, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.75, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.58004760742188, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 12.2259, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.235, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.082, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.8359375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 15.8203125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.5078125, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.98919105529785, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6358, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.281, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.379, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.703125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.98693084716797, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.2417, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.693, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.015625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.9609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.97265625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.15122985839844, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4484, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.43, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.5, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.5779800415039, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.1732, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.728, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.76953125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.19829559326172, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.8532, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.399, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.084, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 87.109375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.88831329345703, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.5908, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.522, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.086, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.8671875, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.0, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.49065399169922, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2019, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.95, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.84375, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.8125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.828125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.91026306152344, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7262, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.177, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.1171875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.796875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.19859313964844, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.9681, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.032, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.125, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.96875, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.14521789550781, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.5466, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.539, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.18, + "step": 2400 + }, + { + "epoch": 0.12, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.0078125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 74.0234375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 56.22562789916992, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3377, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.464, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2400 + }, + { + "epoch": 0.12, + "grad_norm": 0.054360585518445484, + "learning_rate": 8.910886075949368e-06, + "loss": 3.8828, + "step": 2402 + }, + { + "epoch": 0.12, + "grad_norm": 0.06887794058527433, + "learning_rate": 8.90987341772152e-06, + "loss": 3.0547, + "step": 2404 + }, + { + "epoch": 0.12, + "grad_norm": 0.060583557276451795, + "learning_rate": 8.908860759493672e-06, + "loss": 1.6855, + "step": 2406 + }, + { + "epoch": 0.12, + "grad_norm": 0.12613856817860564, + "learning_rate": 8.907848101265824e-06, + "loss": 2.3145, + "step": 2408 + }, + { + "epoch": 0.12, + "grad_norm": 0.05571721701243465, + "learning_rate": 8.906835443037976e-06, + "loss": 3.8281, + "step": 2410 + }, + { + "epoch": 0.12, + "grad_norm": 0.07231961952418126, + "learning_rate": 8.905822784810128e-06, + "loss": 2.9375, + "step": 2412 + }, + { + "epoch": 0.12, + "grad_norm": 0.050725319211885146, + "learning_rate": 8.90481012658228e-06, + "loss": 2.6875, + "step": 2414 + }, + { + "epoch": 0.12, + "grad_norm": 0.11104139412810117, + "learning_rate": 8.903797468354432e-06, + "loss": 1.5645, + "step": 2416 + }, + { + "epoch": 0.12, + "grad_norm": 0.046148625234935944, + "learning_rate": 8.902784810126583e-06, + "loss": 2.0156, + "step": 2418 + }, + { + "epoch": 0.12, + "grad_norm": 0.044795181597166084, + "learning_rate": 8.901772151898735e-06, + "loss": 1.3516, + "step": 2420 + }, + { + "epoch": 0.12, + "grad_norm": 0.04766981242618178, + "learning_rate": 8.900759493670886e-06, + "loss": 2.6406, + "step": 2422 + }, + { + "epoch": 0.12, + "grad_norm": 0.08883505544240511, + "learning_rate": 8.899746835443037e-06, + "loss": 2.5078, + "step": 2424 + }, + { + "epoch": 0.12, + "grad_norm": 0.04613727963014351, + "learning_rate": 8.898734177215191e-06, + "loss": 1.7188, + "step": 2426 + }, + { + "epoch": 0.12, + "grad_norm": 0.04830866347513251, + "learning_rate": 8.897721518987343e-06, + "loss": 2.25, + "step": 2428 + }, + { + "epoch": 0.12, + "grad_norm": 0.058420708565597675, + "learning_rate": 8.896708860759495e-06, + "loss": 2.4062, + "step": 2430 + }, + { + "epoch": 0.12, + "grad_norm": 0.06958523112172264, + "learning_rate": 8.895696202531647e-06, + "loss": 2.9648, + "step": 2432 + }, + { + "epoch": 0.12, + "grad_norm": 0.045121764078908125, + "learning_rate": 8.894683544303799e-06, + "loss": 1.6602, + "step": 2434 + }, + { + "epoch": 0.12, + "grad_norm": 0.04712074158473754, + "learning_rate": 8.89367088607595e-06, + "loss": 1.1914, + "step": 2436 + }, + { + "epoch": 0.12, + "grad_norm": 0.05144407669486078, + "learning_rate": 8.892658227848103e-06, + "loss": 3.2656, + "step": 2438 + }, + { + "epoch": 0.12, + "grad_norm": 0.050265401858983816, + "learning_rate": 8.891645569620254e-06, + "loss": 2.293, + "step": 2440 + }, + { + "epoch": 0.12, + "grad_norm": 0.054604631461465866, + "learning_rate": 8.890632911392406e-06, + "loss": 2.5312, + "step": 2442 + }, + { + "epoch": 0.12, + "grad_norm": 0.04667980062399209, + "learning_rate": 8.889620253164558e-06, + "loss": 1.6797, + "step": 2444 + }, + { + "epoch": 0.12, + "grad_norm": 0.05304347727193509, + "learning_rate": 8.88860759493671e-06, + "loss": 2.6992, + "step": 2446 + }, + { + "epoch": 0.12, + "grad_norm": 0.0840372069101572, + "learning_rate": 8.88759493670886e-06, + "loss": 0.8516, + "step": 2448 + }, + { + "epoch": 0.12, + "grad_norm": 0.04842070838375918, + "learning_rate": 8.886582278481012e-06, + "loss": 1.7148, + "step": 2450 + }, + { + "epoch": 0.12, + "grad_norm": 0.07344970867961509, + "learning_rate": 8.885569620253164e-06, + "loss": 2.8164, + "step": 2452 + }, + { + "epoch": 0.12, + "grad_norm": 0.08197126579605098, + "learning_rate": 8.884556962025316e-06, + "loss": 2.0508, + "step": 2454 + }, + { + "epoch": 0.12, + "grad_norm": 0.08906346433519001, + "learning_rate": 8.88354430379747e-06, + "loss": 1.9297, + "step": 2456 + }, + { + "epoch": 0.12, + "grad_norm": 0.07026800098562865, + "learning_rate": 8.882531645569622e-06, + "loss": 2.9141, + "step": 2458 + }, + { + "epoch": 0.12, + "grad_norm": 0.057852559290353545, + "learning_rate": 8.881518987341774e-06, + "loss": 1.2891, + "step": 2460 + }, + { + "epoch": 0.12, + "grad_norm": 0.07488084656189853, + "learning_rate": 8.880506329113925e-06, + "loss": 3.7734, + "step": 2462 + }, + { + "epoch": 0.12, + "grad_norm": 0.059345150098033324, + "learning_rate": 8.879493670886077e-06, + "loss": 2.3672, + "step": 2464 + }, + { + "epoch": 0.12, + "grad_norm": 0.046010342595922626, + "learning_rate": 8.87848101265823e-06, + "loss": 2.8281, + "step": 2466 + }, + { + "epoch": 0.12, + "grad_norm": 0.03510200766462188, + "learning_rate": 8.877468354430381e-06, + "loss": 4.4453, + "step": 2468 + }, + { + "epoch": 0.12, + "grad_norm": 0.0464438162298748, + "learning_rate": 8.876455696202533e-06, + "loss": 1.3281, + "step": 2470 + }, + { + "epoch": 0.12, + "grad_norm": 0.11354799763355634, + "learning_rate": 8.875443037974685e-06, + "loss": 2.2383, + "step": 2472 + }, + { + "epoch": 0.12, + "grad_norm": 0.05458230519319839, + "learning_rate": 8.874430379746835e-06, + "loss": 2.9102, + "step": 2474 + }, + { + "epoch": 0.12, + "grad_norm": 0.03803021377034928, + "learning_rate": 8.873417721518987e-06, + "loss": 2.75, + "step": 2476 + }, + { + "epoch": 0.12, + "grad_norm": 0.07906754079965607, + "learning_rate": 8.872405063291139e-06, + "loss": 3.4531, + "step": 2478 + }, + { + "epoch": 0.12, + "grad_norm": 0.07767897781324883, + "learning_rate": 8.871392405063291e-06, + "loss": 2.5703, + "step": 2480 + }, + { + "epoch": 0.12, + "grad_norm": 0.0978122466589843, + "learning_rate": 8.870379746835443e-06, + "loss": 4.2266, + "step": 2482 + }, + { + "epoch": 0.12, + "grad_norm": 0.04908720662041111, + "learning_rate": 8.869367088607595e-06, + "loss": 1.9023, + "step": 2484 + }, + { + "epoch": 0.12, + "grad_norm": 0.07665655710797971, + "learning_rate": 8.868354430379748e-06, + "loss": 2.3887, + "step": 2486 + }, + { + "epoch": 0.12, + "grad_norm": 0.08932946561581721, + "learning_rate": 8.8673417721519e-06, + "loss": 2.7031, + "step": 2488 + }, + { + "epoch": 0.12, + "grad_norm": 0.043585521695245454, + "learning_rate": 8.866329113924052e-06, + "loss": 1.9336, + "step": 2490 + }, + { + "epoch": 0.12, + "grad_norm": 0.0616416080110006, + "learning_rate": 8.865316455696204e-06, + "loss": 2.5312, + "step": 2492 + }, + { + "epoch": 0.12, + "grad_norm": 0.06618915511822913, + "learning_rate": 8.864303797468356e-06, + "loss": 3.6094, + "step": 2494 + }, + { + "epoch": 0.12, + "grad_norm": 0.04493140036724536, + "learning_rate": 8.863291139240508e-06, + "loss": 2.4531, + "step": 2496 + }, + { + "epoch": 0.12, + "grad_norm": 0.0589899002891497, + "learning_rate": 8.86227848101266e-06, + "loss": 3.6953, + "step": 2498 + }, + { + "epoch": 0.12, + "grad_norm": 0.07937925656330862, + "learning_rate": 8.861265822784812e-06, + "loss": 1.3164, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.1171875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.1640625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.6258316040039, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5075, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.562, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.6328125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.807819366455078, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7181, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.546, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.368, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.0703125, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.7265625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.43701171875, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7722, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.941, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.4765625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.2734375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.60811996459961, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5135, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.41, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.1796875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.69384002685547, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7142, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.973, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.17011260986328, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0668, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.783, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.09, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.0390625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.984375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.44486999511719, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.5564, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.063, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.095, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.921875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.27226257324219, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3154, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.923, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 27.5390625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 63.28125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.875, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 48.667640686035156, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 4.9663, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.887, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.201, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.75, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.84375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.84911346435547, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.5591, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.467, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.132, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.54449462890625, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0478, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.679, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.198, + "step": 2500 + }, + { + "epoch": 0.12, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.8828125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.3125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.18346405029297, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2854, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.48, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2500 + }, + { + "epoch": 0.13, + "grad_norm": 0.04633153091940758, + "learning_rate": 8.860253164556962e-06, + "loss": 2.1875, + "step": 2502 + }, + { + "epoch": 0.13, + "grad_norm": 0.05905298038997286, + "learning_rate": 8.859240506329114e-06, + "loss": 3.4844, + "step": 2504 + }, + { + "epoch": 0.13, + "grad_norm": 0.048094727656352754, + "learning_rate": 8.858227848101266e-06, + "loss": 2.1211, + "step": 2506 + }, + { + "epoch": 0.13, + "grad_norm": 0.035994165452193784, + "learning_rate": 8.857215189873418e-06, + "loss": 3.0547, + "step": 2508 + }, + { + "epoch": 0.13, + "grad_norm": 0.04289409263851401, + "learning_rate": 8.85620253164557e-06, + "loss": 3.1797, + "step": 2510 + }, + { + "epoch": 0.13, + "grad_norm": 0.04667679037496861, + "learning_rate": 8.855189873417721e-06, + "loss": 3.8359, + "step": 2512 + }, + { + "epoch": 0.13, + "grad_norm": 0.12455027862059778, + "learning_rate": 8.854177215189873e-06, + "loss": 3.7812, + "step": 2514 + }, + { + "epoch": 0.13, + "grad_norm": 0.04730664140523245, + "learning_rate": 8.853164556962027e-06, + "loss": 2.4297, + "step": 2516 + }, + { + "epoch": 0.13, + "grad_norm": 0.042202110155894375, + "learning_rate": 8.852151898734179e-06, + "loss": 1.2891, + "step": 2518 + }, + { + "epoch": 0.13, + "grad_norm": 0.04754586764286116, + "learning_rate": 8.85113924050633e-06, + "loss": 3.2109, + "step": 2520 + }, + { + "epoch": 0.13, + "grad_norm": 0.07343919643005098, + "learning_rate": 8.850126582278483e-06, + "loss": 2.25, + "step": 2522 + }, + { + "epoch": 0.13, + "grad_norm": 0.08709059692488305, + "learning_rate": 8.849113924050635e-06, + "loss": 2.3438, + "step": 2524 + }, + { + "epoch": 0.13, + "grad_norm": 0.04340495246498866, + "learning_rate": 8.848101265822786e-06, + "loss": 2.3633, + "step": 2526 + }, + { + "epoch": 0.13, + "grad_norm": 0.04469305828346309, + "learning_rate": 8.847088607594937e-06, + "loss": 1.6836, + "step": 2528 + }, + { + "epoch": 0.13, + "grad_norm": 0.088414459032459, + "learning_rate": 8.846075949367089e-06, + "loss": 2.4844, + "step": 2530 + }, + { + "epoch": 0.13, + "grad_norm": 0.043113690269789035, + "learning_rate": 8.84506329113924e-06, + "loss": 1.0469, + "step": 2532 + }, + { + "epoch": 0.13, + "grad_norm": 0.06503235750891347, + "learning_rate": 8.844050632911392e-06, + "loss": 2.8906, + "step": 2534 + }, + { + "epoch": 0.13, + "grad_norm": 0.05571664033619658, + "learning_rate": 8.843037974683544e-06, + "loss": 4.1797, + "step": 2536 + }, + { + "epoch": 0.13, + "grad_norm": 0.04358647922501268, + "learning_rate": 8.842025316455696e-06, + "loss": 1.0645, + "step": 2538 + }, + { + "epoch": 0.13, + "grad_norm": 0.09110864380118001, + "learning_rate": 8.841012658227848e-06, + "loss": 2.9844, + "step": 2540 + }, + { + "epoch": 0.13, + "grad_norm": 0.04784759923520609, + "learning_rate": 8.84e-06, + "loss": 1.8594, + "step": 2542 + }, + { + "epoch": 0.13, + "grad_norm": 0.04563810959116694, + "learning_rate": 8.838987341772152e-06, + "loss": 3.2188, + "step": 2544 + }, + { + "epoch": 0.13, + "grad_norm": 0.0461173796094796, + "learning_rate": 8.837974683544306e-06, + "loss": 2.5391, + "step": 2546 + }, + { + "epoch": 0.13, + "grad_norm": 0.07851545988606415, + "learning_rate": 8.836962025316457e-06, + "loss": 2.0742, + "step": 2548 + }, + { + "epoch": 0.13, + "grad_norm": 0.047102253116170514, + "learning_rate": 8.83594936708861e-06, + "loss": 2.3281, + "step": 2550 + }, + { + "epoch": 0.13, + "grad_norm": 0.04421632697742336, + "learning_rate": 8.834936708860761e-06, + "loss": 1.0996, + "step": 2552 + }, + { + "epoch": 0.13, + "grad_norm": 0.04662218312739471, + "learning_rate": 8.833924050632912e-06, + "loss": 2.8984, + "step": 2554 + }, + { + "epoch": 0.13, + "grad_norm": 0.041431262450878824, + "learning_rate": 8.832911392405063e-06, + "loss": 1.6582, + "step": 2556 + }, + { + "epoch": 0.13, + "grad_norm": 0.05535268065333397, + "learning_rate": 8.831898734177215e-06, + "loss": 3.7656, + "step": 2558 + }, + { + "epoch": 0.13, + "grad_norm": 0.04371699654044821, + "learning_rate": 8.830886075949367e-06, + "loss": 1.6758, + "step": 2560 + }, + { + "epoch": 0.13, + "grad_norm": 0.13086511115412275, + "learning_rate": 8.829873417721519e-06, + "loss": 2.0664, + "step": 2562 + }, + { + "epoch": 0.13, + "grad_norm": 0.06839763728577254, + "learning_rate": 8.828860759493671e-06, + "loss": 1.0723, + "step": 2564 + }, + { + "epoch": 0.13, + "grad_norm": 0.04184277562079915, + "learning_rate": 8.827848101265823e-06, + "loss": 1.6562, + "step": 2566 + }, + { + "epoch": 0.13, + "grad_norm": 0.058952200716529374, + "learning_rate": 8.826835443037975e-06, + "loss": 3.7734, + "step": 2568 + }, + { + "epoch": 0.13, + "grad_norm": 0.044434838225155884, + "learning_rate": 8.825822784810127e-06, + "loss": 3.1094, + "step": 2570 + }, + { + "epoch": 0.13, + "grad_norm": 0.05595388170602543, + "learning_rate": 8.824810126582279e-06, + "loss": 3.8359, + "step": 2572 + }, + { + "epoch": 0.13, + "grad_norm": 0.08113111202164194, + "learning_rate": 8.82379746835443e-06, + "loss": 2.1719, + "step": 2574 + }, + { + "epoch": 0.13, + "grad_norm": 0.05373612376884004, + "learning_rate": 8.822784810126584e-06, + "loss": 3.3281, + "step": 2576 + }, + { + "epoch": 0.13, + "grad_norm": 0.04954179399609412, + "learning_rate": 8.821772151898736e-06, + "loss": 2.5938, + "step": 2578 + }, + { + "epoch": 0.13, + "grad_norm": 0.04943208064397753, + "learning_rate": 8.820759493670888e-06, + "loss": 2.9141, + "step": 2580 + }, + { + "epoch": 0.13, + "grad_norm": 0.044735737967205216, + "learning_rate": 8.819746835443038e-06, + "loss": 3.1484, + "step": 2582 + }, + { + "epoch": 0.13, + "grad_norm": 0.10650528959169556, + "learning_rate": 8.81873417721519e-06, + "loss": 2.2461, + "step": 2584 + }, + { + "epoch": 0.13, + "grad_norm": 0.05377515075505955, + "learning_rate": 8.817721518987342e-06, + "loss": 3.6484, + "step": 2586 + }, + { + "epoch": 0.13, + "grad_norm": 0.044445382115991326, + "learning_rate": 8.816708860759494e-06, + "loss": 3.2188, + "step": 2588 + }, + { + "epoch": 0.13, + "grad_norm": 0.07149806803480696, + "learning_rate": 8.815696202531646e-06, + "loss": 3.3203, + "step": 2590 + }, + { + "epoch": 0.13, + "grad_norm": 0.07765096857543947, + "learning_rate": 8.814683544303798e-06, + "loss": 1.1152, + "step": 2592 + }, + { + "epoch": 0.13, + "grad_norm": 0.32396538727778484, + "learning_rate": 8.81367088607595e-06, + "loss": 2.1191, + "step": 2594 + }, + { + "epoch": 0.13, + "grad_norm": 0.0543477861736957, + "learning_rate": 8.812658227848102e-06, + "loss": 3.7969, + "step": 2596 + }, + { + "epoch": 0.13, + "grad_norm": 0.06229143061454939, + "learning_rate": 8.811645569620254e-06, + "loss": 3.0312, + "step": 2598 + }, + { + "epoch": 0.13, + "grad_norm": 0.0537317517438358, + "learning_rate": 8.810632911392405e-06, + "loss": 3.4531, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.84588623046875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3589, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.634, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.7421875, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.1953125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4453125, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.87902069091797, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.514, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.458, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.398, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.10218048095703, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8518, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.898, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.03125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.346885681152344, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 15.2287, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.203, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.066, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.28152465820312, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.8528, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.4, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.084, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.67933654785156, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.6791, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.48, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.086, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.671875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.86489868164062, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.7831, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.431, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.085, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.015625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.0167007446289, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 17.3656, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.685, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.058, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.1328125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.515625, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.20676040649414, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.2888, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.177, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.159, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.2578125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.05455017089844, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.1267, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.98, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.14, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.508056640625, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.2486, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.194, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.191, + "step": 2600 + }, + { + "epoch": 0.13, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.7109375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.83809280395508, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3416, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.463, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2600 + }, + { + "epoch": 0.13, + "grad_norm": 0.04951655968018335, + "learning_rate": 8.809620253164557e-06, + "loss": 1.7109, + "step": 2602 + }, + { + "epoch": 0.13, + "grad_norm": 0.045592901695699066, + "learning_rate": 8.80860759493671e-06, + "loss": 2.0273, + "step": 2604 + }, + { + "epoch": 0.13, + "grad_norm": 0.05395490767162895, + "learning_rate": 8.807594936708863e-06, + "loss": 4.0, + "step": 2606 + }, + { + "epoch": 0.13, + "grad_norm": 0.05350486279955657, + "learning_rate": 8.806582278481013e-06, + "loss": 3.9141, + "step": 2608 + }, + { + "epoch": 0.13, + "grad_norm": 0.04664756565512289, + "learning_rate": 8.805569620253165e-06, + "loss": 1.3555, + "step": 2610 + }, + { + "epoch": 0.13, + "grad_norm": 0.05610965889346614, + "learning_rate": 8.804556962025317e-06, + "loss": 3.7188, + "step": 2612 + }, + { + "epoch": 0.13, + "grad_norm": 0.099457180400491, + "learning_rate": 8.803544303797469e-06, + "loss": 2.6484, + "step": 2614 + }, + { + "epoch": 0.13, + "grad_norm": 0.05464773989198765, + "learning_rate": 8.80253164556962e-06, + "loss": 3.0469, + "step": 2616 + }, + { + "epoch": 0.13, + "grad_norm": 0.04492590979410205, + "learning_rate": 8.801518987341773e-06, + "loss": 2.5938, + "step": 2618 + }, + { + "epoch": 0.13, + "grad_norm": 0.06176717102157868, + "learning_rate": 8.800506329113924e-06, + "loss": 2.9453, + "step": 2620 + }, + { + "epoch": 0.13, + "grad_norm": 0.06297430356503986, + "learning_rate": 8.799493670886076e-06, + "loss": 1.5312, + "step": 2622 + }, + { + "epoch": 0.13, + "grad_norm": 0.07557707951512468, + "learning_rate": 8.798481012658228e-06, + "loss": 1.3008, + "step": 2624 + }, + { + "epoch": 0.13, + "grad_norm": 0.0828691249589272, + "learning_rate": 8.79746835443038e-06, + "loss": 3.4062, + "step": 2626 + }, + { + "epoch": 0.13, + "grad_norm": 0.08076606632694368, + "learning_rate": 8.796455696202532e-06, + "loss": 1.9844, + "step": 2628 + }, + { + "epoch": 0.13, + "grad_norm": 0.08741726827798928, + "learning_rate": 8.795443037974684e-06, + "loss": 1.6875, + "step": 2630 + }, + { + "epoch": 0.13, + "grad_norm": 0.0532597578727803, + "learning_rate": 8.794430379746836e-06, + "loss": 2.7031, + "step": 2632 + }, + { + "epoch": 0.13, + "grad_norm": 0.09681222962433436, + "learning_rate": 8.793417721518988e-06, + "loss": 2.0508, + "step": 2634 + }, + { + "epoch": 0.13, + "grad_norm": 0.05998016873778101, + "learning_rate": 8.79240506329114e-06, + "loss": 3.2266, + "step": 2636 + }, + { + "epoch": 0.13, + "grad_norm": 0.06809758398603295, + "learning_rate": 8.791392405063292e-06, + "loss": 1.3945, + "step": 2638 + }, + { + "epoch": 0.13, + "grad_norm": 0.07995097602847276, + "learning_rate": 8.790379746835444e-06, + "loss": 2.1094, + "step": 2640 + }, + { + "epoch": 0.13, + "grad_norm": 0.03780879612143424, + "learning_rate": 8.789367088607595e-06, + "loss": 3.1875, + "step": 2642 + }, + { + "epoch": 0.13, + "grad_norm": 0.05723412312437367, + "learning_rate": 8.788354430379747e-06, + "loss": 3.9297, + "step": 2644 + }, + { + "epoch": 0.13, + "grad_norm": 0.056302760632846574, + "learning_rate": 8.7873417721519e-06, + "loss": 2.4062, + "step": 2646 + }, + { + "epoch": 0.13, + "grad_norm": 0.047170378937362194, + "learning_rate": 8.786329113924051e-06, + "loss": 1.7383, + "step": 2648 + }, + { + "epoch": 0.13, + "grad_norm": 0.07832131159233159, + "learning_rate": 8.785316455696203e-06, + "loss": 1.0918, + "step": 2650 + }, + { + "epoch": 0.13, + "grad_norm": 0.10096020187857348, + "learning_rate": 8.784303797468355e-06, + "loss": 1.9297, + "step": 2652 + }, + { + "epoch": 0.13, + "grad_norm": 0.04348167647482904, + "learning_rate": 8.783291139240507e-06, + "loss": 2.168, + "step": 2654 + }, + { + "epoch": 0.13, + "grad_norm": 0.08173612172454091, + "learning_rate": 8.782278481012659e-06, + "loss": 2.9141, + "step": 2656 + }, + { + "epoch": 0.13, + "grad_norm": 0.04625064567488957, + "learning_rate": 8.78126582278481e-06, + "loss": 2.6484, + "step": 2658 + }, + { + "epoch": 0.13, + "grad_norm": 0.07516927701880775, + "learning_rate": 8.780253164556963e-06, + "loss": 4.1562, + "step": 2660 + }, + { + "epoch": 0.13, + "grad_norm": 0.046221992737433985, + "learning_rate": 8.779240506329115e-06, + "loss": 2.5312, + "step": 2662 + }, + { + "epoch": 0.13, + "grad_norm": 0.060891742062961385, + "learning_rate": 8.778227848101266e-06, + "loss": 2.832, + "step": 2664 + }, + { + "epoch": 0.13, + "grad_norm": 0.04855708235268953, + "learning_rate": 8.777215189873418e-06, + "loss": 3.1953, + "step": 2666 + }, + { + "epoch": 0.13, + "grad_norm": 0.05375008240335823, + "learning_rate": 8.77620253164557e-06, + "loss": 3.5312, + "step": 2668 + }, + { + "epoch": 0.13, + "grad_norm": 0.07130262494084699, + "learning_rate": 8.775189873417722e-06, + "loss": 2.5234, + "step": 2670 + }, + { + "epoch": 0.13, + "grad_norm": 0.05924101789563237, + "learning_rate": 8.774177215189874e-06, + "loss": 2.7031, + "step": 2672 + }, + { + "epoch": 0.13, + "grad_norm": 0.05020368159321286, + "learning_rate": 8.773164556962026e-06, + "loss": 2.4766, + "step": 2674 + }, + { + "epoch": 0.13, + "grad_norm": 0.0456321096310645, + "learning_rate": 8.772151898734178e-06, + "loss": 2.7578, + "step": 2676 + }, + { + "epoch": 0.13, + "grad_norm": 0.13466335645582303, + "learning_rate": 8.77113924050633e-06, + "loss": 1.0703, + "step": 2678 + }, + { + "epoch": 0.13, + "grad_norm": 0.04404695993934905, + "learning_rate": 8.770126582278482e-06, + "loss": 1.9277, + "step": 2680 + }, + { + "epoch": 0.13, + "grad_norm": 0.08263470157459507, + "learning_rate": 8.769113924050634e-06, + "loss": 2.3633, + "step": 2682 + }, + { + "epoch": 0.13, + "grad_norm": 0.05717499821710438, + "learning_rate": 8.768101265822786e-06, + "loss": 3.9766, + "step": 2684 + }, + { + "epoch": 0.13, + "grad_norm": 0.06888364635761927, + "learning_rate": 8.767088607594937e-06, + "loss": 1.6094, + "step": 2686 + }, + { + "epoch": 0.13, + "grad_norm": 0.09166897324708952, + "learning_rate": 8.76607594936709e-06, + "loss": 2.373, + "step": 2688 + }, + { + "epoch": 0.13, + "grad_norm": 0.04524041569108601, + "learning_rate": 8.765063291139241e-06, + "loss": 1.9453, + "step": 2690 + }, + { + "epoch": 0.13, + "grad_norm": 0.08062394717179651, + "learning_rate": 8.764050632911393e-06, + "loss": 2.2773, + "step": 2692 + }, + { + "epoch": 0.13, + "grad_norm": 0.12650775549602336, + "learning_rate": 8.763037974683545e-06, + "loss": 3.3828, + "step": 2694 + }, + { + "epoch": 0.13, + "grad_norm": 0.042981691741514276, + "learning_rate": 8.762025316455697e-06, + "loss": 3.0938, + "step": 2696 + }, + { + "epoch": 0.13, + "grad_norm": 0.05665027601956785, + "learning_rate": 8.761012658227849e-06, + "loss": 2.6094, + "step": 2698 + }, + { + "epoch": 0.14, + "grad_norm": 0.04791952921300498, + "learning_rate": 8.76e-06, + "loss": 2.1797, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.97989654541016, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.7028, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.469, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.085, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.15625, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 24.21875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.30203628540039, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6086, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.534, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.383, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 48.046875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.34609985351562, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7236, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.968, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.765625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.08637237548828, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5234, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.407, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 43.1640625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 88.4765625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.65151977539062, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0194, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.808, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.90335083007812, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.9697, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.834, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.20475006103516, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9022, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.87, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.6015625, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.6953125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.96875, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.48861694335938, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.8639, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.034, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.6953125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.6875, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.636932373046875, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 4.8688, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 13.145, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.205, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.9453125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.234375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.0864028930664, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.6733, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.341, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.13, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.5546875, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.655029296875, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9869, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.834, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.201, + "step": 2700 + }, + { + "epoch": 0.14, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 66.9921875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.773258209228516, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.4547, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.428, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2700 + }, + { + "epoch": 0.14, + "grad_norm": 0.07249195188316931, + "learning_rate": 8.758987341772153e-06, + "loss": 1.6055, + "step": 2702 + }, + { + "epoch": 0.14, + "grad_norm": 0.057633979419438035, + "learning_rate": 8.757974683544305e-06, + "loss": 3.25, + "step": 2704 + }, + { + "epoch": 0.14, + "grad_norm": 0.051040752051553895, + "learning_rate": 8.756962025316457e-06, + "loss": 1.9375, + "step": 2706 + }, + { + "epoch": 0.14, + "grad_norm": 0.07146589021940866, + "learning_rate": 8.755949367088608e-06, + "loss": 2.332, + "step": 2708 + }, + { + "epoch": 0.14, + "grad_norm": 0.04312966165838956, + "learning_rate": 8.75493670886076e-06, + "loss": 3.1016, + "step": 2710 + }, + { + "epoch": 0.14, + "grad_norm": 0.040291770833117316, + "learning_rate": 8.753924050632912e-06, + "loss": 1.8672, + "step": 2712 + }, + { + "epoch": 0.14, + "grad_norm": 0.04306660828359189, + "learning_rate": 8.752911392405064e-06, + "loss": 1.3164, + "step": 2714 + }, + { + "epoch": 0.14, + "grad_norm": 0.03797749676395882, + "learning_rate": 8.751898734177216e-06, + "loss": 2.5312, + "step": 2716 + }, + { + "epoch": 0.14, + "grad_norm": 0.08444159548591303, + "learning_rate": 8.750886075949368e-06, + "loss": 2.2188, + "step": 2718 + }, + { + "epoch": 0.14, + "grad_norm": 0.0380185965488091, + "learning_rate": 8.74987341772152e-06, + "loss": 3.2578, + "step": 2720 + }, + { + "epoch": 0.14, + "grad_norm": 0.06045313084504471, + "learning_rate": 8.748860759493672e-06, + "loss": 4.0703, + "step": 2722 + }, + { + "epoch": 0.14, + "grad_norm": 0.03966127813695506, + "learning_rate": 8.747848101265824e-06, + "loss": 2.2891, + "step": 2724 + }, + { + "epoch": 0.14, + "grad_norm": 0.055140091316243436, + "learning_rate": 8.746835443037976e-06, + "loss": 2.625, + "step": 2726 + }, + { + "epoch": 0.14, + "grad_norm": 0.05381506666307845, + "learning_rate": 8.745822784810128e-06, + "loss": 3.1719, + "step": 2728 + }, + { + "epoch": 0.14, + "grad_norm": 0.05021926655278173, + "learning_rate": 8.74481012658228e-06, + "loss": 2.6758, + "step": 2730 + }, + { + "epoch": 0.14, + "grad_norm": 0.04702400079967468, + "learning_rate": 8.743797468354431e-06, + "loss": 2.0078, + "step": 2732 + }, + { + "epoch": 0.14, + "grad_norm": 0.049133886013928964, + "learning_rate": 8.742784810126583e-06, + "loss": 1.3242, + "step": 2734 + }, + { + "epoch": 0.14, + "grad_norm": 0.04799777192883991, + "learning_rate": 8.741772151898735e-06, + "loss": 2.4375, + "step": 2736 + }, + { + "epoch": 0.14, + "grad_norm": 0.047888279471591924, + "learning_rate": 8.740759493670887e-06, + "loss": 2.4766, + "step": 2738 + }, + { + "epoch": 0.14, + "grad_norm": 0.05984335804292988, + "learning_rate": 8.739746835443037e-06, + "loss": 3.2969, + "step": 2740 + }, + { + "epoch": 0.14, + "grad_norm": 0.044926342534701, + "learning_rate": 8.738734177215191e-06, + "loss": 1.3516, + "step": 2742 + }, + { + "epoch": 0.14, + "grad_norm": 0.043959224671034046, + "learning_rate": 8.737721518987343e-06, + "loss": 1.3281, + "step": 2744 + }, + { + "epoch": 0.14, + "grad_norm": 0.0584937255169823, + "learning_rate": 8.736708860759495e-06, + "loss": 3.5547, + "step": 2746 + }, + { + "epoch": 0.14, + "grad_norm": 0.04613424912217633, + "learning_rate": 8.735696202531647e-06, + "loss": 2.4609, + "step": 2748 + }, + { + "epoch": 0.14, + "grad_norm": 0.0855576998250252, + "learning_rate": 8.734683544303799e-06, + "loss": 1.7539, + "step": 2750 + }, + { + "epoch": 0.14, + "grad_norm": 0.048632087568065925, + "learning_rate": 8.73367088607595e-06, + "loss": 1.5352, + "step": 2752 + }, + { + "epoch": 0.14, + "grad_norm": 0.04691725802992881, + "learning_rate": 8.732658227848102e-06, + "loss": 1.9258, + "step": 2754 + }, + { + "epoch": 0.14, + "grad_norm": 0.06921217510974044, + "learning_rate": 8.731645569620254e-06, + "loss": 0.8477, + "step": 2756 + }, + { + "epoch": 0.14, + "grad_norm": 0.047910364669956074, + "learning_rate": 8.730632911392406e-06, + "loss": 3.0391, + "step": 2758 + }, + { + "epoch": 0.14, + "grad_norm": 0.047306808743848086, + "learning_rate": 8.729620253164558e-06, + "loss": 3.2266, + "step": 2760 + }, + { + "epoch": 0.14, + "grad_norm": 0.05737040696259312, + "learning_rate": 8.72860759493671e-06, + "loss": 3.4531, + "step": 2762 + }, + { + "epoch": 0.14, + "grad_norm": 0.04767645208347041, + "learning_rate": 8.727594936708862e-06, + "loss": 1.3906, + "step": 2764 + }, + { + "epoch": 0.14, + "grad_norm": 0.05579559780102283, + "learning_rate": 8.726582278481014e-06, + "loss": 1.8477, + "step": 2766 + }, + { + "epoch": 0.14, + "grad_norm": 0.05636799699713594, + "learning_rate": 8.725569620253164e-06, + "loss": 3.5234, + "step": 2768 + }, + { + "epoch": 0.14, + "grad_norm": 0.046882581594394686, + "learning_rate": 8.724556962025316e-06, + "loss": 1.0723, + "step": 2770 + }, + { + "epoch": 0.14, + "grad_norm": 0.0750346728236882, + "learning_rate": 8.72354430379747e-06, + "loss": 1.7559, + "step": 2772 + }, + { + "epoch": 0.14, + "grad_norm": 0.06177951297877184, + "learning_rate": 8.722531645569621e-06, + "loss": 2.9297, + "step": 2774 + }, + { + "epoch": 0.14, + "grad_norm": 0.054191608152368814, + "learning_rate": 8.721518987341773e-06, + "loss": 3.3828, + "step": 2776 + }, + { + "epoch": 0.14, + "grad_norm": 0.04471650291574515, + "learning_rate": 8.720506329113925e-06, + "loss": 2.9141, + "step": 2778 + }, + { + "epoch": 0.14, + "grad_norm": 0.04413088584531061, + "learning_rate": 8.719493670886077e-06, + "loss": 3.2578, + "step": 2780 + }, + { + "epoch": 0.14, + "grad_norm": 0.05601101380153402, + "learning_rate": 8.718481012658229e-06, + "loss": 1.9375, + "step": 2782 + }, + { + "epoch": 0.14, + "grad_norm": 0.05927547007243715, + "learning_rate": 8.717468354430381e-06, + "loss": 3.6328, + "step": 2784 + }, + { + "epoch": 0.14, + "grad_norm": 0.05938647976436854, + "learning_rate": 8.716455696202533e-06, + "loss": 2.6367, + "step": 2786 + }, + { + "epoch": 0.14, + "grad_norm": 0.05064732975993634, + "learning_rate": 8.715443037974685e-06, + "loss": 2.5547, + "step": 2788 + }, + { + "epoch": 0.14, + "grad_norm": 0.0514953544556108, + "learning_rate": 8.714430379746837e-06, + "loss": 1.3711, + "step": 2790 + }, + { + "epoch": 0.14, + "grad_norm": 0.04363196503218346, + "learning_rate": 8.713417721518989e-06, + "loss": 2.668, + "step": 2792 + }, + { + "epoch": 0.14, + "grad_norm": 0.05115391091986226, + "learning_rate": 8.712405063291139e-06, + "loss": 2.5703, + "step": 2794 + }, + { + "epoch": 0.14, + "grad_norm": 0.06085458556991963, + "learning_rate": 8.71139240506329e-06, + "loss": 2.3867, + "step": 2796 + }, + { + "epoch": 0.14, + "grad_norm": 0.05528478699682591, + "learning_rate": 8.710379746835443e-06, + "loss": 3.1406, + "step": 2798 + }, + { + "epoch": 0.14, + "grad_norm": 0.05841235254993294, + "learning_rate": 8.709367088607595e-06, + "loss": 3.3203, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.3828125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.36053466796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2348, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.697, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.60693359375, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5021, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.579, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.4, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.19718170166016, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0144, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.811, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.32621765136719, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5473, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.399, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.546875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.07398223876953, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8712, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.887, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.140625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.9749755859375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7689, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.943, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.234375, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.5703125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.956912994384766, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7182, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.971, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, + "eval_fever_top15HN_validation.jsonl.gz_loss": 2.015625, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.83201599121094, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0975, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.976, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 38.8671875, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.03125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83203125, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.253936767578125, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1807, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.353, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.193, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.359375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.453125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.39067077636719, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.625, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.393, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.131, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87017059326172, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0609, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.646, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.198, + "step": 2800 + }, + { + "epoch": 0.14, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.75, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.45774459838867, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.6129, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.38, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.068, + "step": 2800 + }, + { + "epoch": 0.14, + "grad_norm": 0.04261711767454922, + "learning_rate": 8.708354430379748e-06, + "loss": 2.4062, + "step": 2802 + }, + { + "epoch": 0.14, + "grad_norm": 0.04645713681303877, + "learning_rate": 8.7073417721519e-06, + "loss": 1.9062, + "step": 2804 + }, + { + "epoch": 0.14, + "grad_norm": 0.05664159305337188, + "learning_rate": 8.706329113924052e-06, + "loss": 3.8672, + "step": 2806 + }, + { + "epoch": 0.14, + "grad_norm": 0.043983600414410934, + "learning_rate": 8.705316455696204e-06, + "loss": 1.3203, + "step": 2808 + }, + { + "epoch": 0.14, + "grad_norm": 0.04588362018358865, + "learning_rate": 8.704303797468356e-06, + "loss": 3.2852, + "step": 2810 + }, + { + "epoch": 0.14, + "grad_norm": 0.07635983178014015, + "learning_rate": 8.703291139240508e-06, + "loss": 2.041, + "step": 2812 + }, + { + "epoch": 0.14, + "grad_norm": 0.0916584015456185, + "learning_rate": 8.70227848101266e-06, + "loss": 2.4844, + "step": 2814 + }, + { + "epoch": 0.14, + "grad_norm": 0.05892169492522577, + "learning_rate": 8.701265822784812e-06, + "loss": 2.0312, + "step": 2816 + }, + { + "epoch": 0.14, + "grad_norm": 0.051820325886520344, + "learning_rate": 8.700253164556963e-06, + "loss": 1.3828, + "step": 2818 + }, + { + "epoch": 0.14, + "grad_norm": 0.055710079179014056, + "learning_rate": 8.699240506329114e-06, + "loss": 2.9375, + "step": 2820 + }, + { + "epoch": 0.14, + "grad_norm": 0.056917768326981706, + "learning_rate": 8.698227848101266e-06, + "loss": 3.5938, + "step": 2822 + }, + { + "epoch": 0.14, + "grad_norm": 0.08770435781494222, + "learning_rate": 8.697215189873417e-06, + "loss": 2.3047, + "step": 2824 + }, + { + "epoch": 0.14, + "grad_norm": 0.04423307931390974, + "learning_rate": 8.69620253164557e-06, + "loss": 1.9961, + "step": 2826 + }, + { + "epoch": 0.14, + "grad_norm": 0.0748767172333099, + "learning_rate": 8.695189873417721e-06, + "loss": 2.9141, + "step": 2828 + }, + { + "epoch": 0.14, + "grad_norm": 0.056072194492391036, + "learning_rate": 8.694177215189873e-06, + "loss": 2.7031, + "step": 2830 + }, + { + "epoch": 0.14, + "grad_norm": 0.04389768509713057, + "learning_rate": 8.693164556962027e-06, + "loss": 2.75, + "step": 2832 + }, + { + "epoch": 0.14, + "grad_norm": 0.0476806419907698, + "learning_rate": 8.692151898734179e-06, + "loss": 2.6094, + "step": 2834 + }, + { + "epoch": 0.14, + "grad_norm": 0.04650770271213978, + "learning_rate": 8.69113924050633e-06, + "loss": 1.918, + "step": 2836 + }, + { + "epoch": 0.14, + "grad_norm": 0.05239431577433559, + "learning_rate": 8.690126582278483e-06, + "loss": 1.0547, + "step": 2838 + }, + { + "epoch": 0.14, + "grad_norm": 0.11572726759595672, + "learning_rate": 8.689113924050634e-06, + "loss": 2.2656, + "step": 2840 + }, + { + "epoch": 0.14, + "grad_norm": 0.04643675966012806, + "learning_rate": 8.688101265822786e-06, + "loss": 1.9629, + "step": 2842 + }, + { + "epoch": 0.14, + "grad_norm": 0.04629993854181596, + "learning_rate": 8.687088607594938e-06, + "loss": 3.2656, + "step": 2844 + }, + { + "epoch": 0.14, + "grad_norm": 0.12961856233480304, + "learning_rate": 8.68607594936709e-06, + "loss": 2.3281, + "step": 2846 + }, + { + "epoch": 0.14, + "grad_norm": 0.047928021614982726, + "learning_rate": 8.68506329113924e-06, + "loss": 3.6875, + "step": 2848 + }, + { + "epoch": 0.14, + "grad_norm": 0.04179233438347491, + "learning_rate": 8.684050632911392e-06, + "loss": 2.4961, + "step": 2850 + }, + { + "epoch": 0.14, + "grad_norm": 0.039675581911473946, + "learning_rate": 8.683037974683544e-06, + "loss": 3.1875, + "step": 2852 + }, + { + "epoch": 0.14, + "grad_norm": 0.05333055705211874, + "learning_rate": 8.682025316455696e-06, + "loss": 1.9219, + "step": 2854 + }, + { + "epoch": 0.14, + "grad_norm": 0.05709328519032197, + "learning_rate": 8.681012658227848e-06, + "loss": 3.9922, + "step": 2856 + }, + { + "epoch": 0.14, + "grad_norm": 0.061995079781907214, + "learning_rate": 8.68e-06, + "loss": 3.625, + "step": 2858 + }, + { + "epoch": 0.14, + "grad_norm": 0.04162847533794511, + "learning_rate": 8.678987341772152e-06, + "loss": 2.4844, + "step": 2860 + }, + { + "epoch": 0.14, + "grad_norm": 0.0519238633375782, + "learning_rate": 8.677974683544305e-06, + "loss": 3.3828, + "step": 2862 + }, + { + "epoch": 0.14, + "grad_norm": 0.05243794475973222, + "learning_rate": 8.676962025316457e-06, + "loss": 1.3984, + "step": 2864 + }, + { + "epoch": 0.14, + "grad_norm": 0.032479401549115876, + "learning_rate": 8.67594936708861e-06, + "loss": 4.0625, + "step": 2866 + }, + { + "epoch": 0.14, + "grad_norm": 0.05057630078935886, + "learning_rate": 8.674936708860761e-06, + "loss": 1.9375, + "step": 2868 + }, + { + "epoch": 0.14, + "grad_norm": 0.0478188685230481, + "learning_rate": 8.673924050632913e-06, + "loss": 3.1719, + "step": 2870 + }, + { + "epoch": 0.14, + "grad_norm": 0.05455466085242383, + "learning_rate": 8.672911392405065e-06, + "loss": 2.1309, + "step": 2872 + }, + { + "epoch": 0.14, + "grad_norm": 0.04743098061180723, + "learning_rate": 8.671898734177215e-06, + "loss": 3.2578, + "step": 2874 + }, + { + "epoch": 0.14, + "grad_norm": 0.06311851906083067, + "learning_rate": 8.670886075949367e-06, + "loss": 2.168, + "step": 2876 + }, + { + "epoch": 0.14, + "grad_norm": 0.04812334931394406, + "learning_rate": 8.669873417721519e-06, + "loss": 2.5859, + "step": 2878 + }, + { + "epoch": 0.14, + "grad_norm": 0.04399156255926814, + "learning_rate": 8.668860759493671e-06, + "loss": 1.9688, + "step": 2880 + }, + { + "epoch": 0.14, + "grad_norm": 0.047780502359174505, + "learning_rate": 8.667848101265823e-06, + "loss": 2.6445, + "step": 2882 + }, + { + "epoch": 0.14, + "grad_norm": 0.05398302519583386, + "learning_rate": 8.666835443037975e-06, + "loss": 2.373, + "step": 2884 + }, + { + "epoch": 0.14, + "grad_norm": 0.04777209339294052, + "learning_rate": 8.665822784810127e-06, + "loss": 1.8711, + "step": 2886 + }, + { + "epoch": 0.14, + "grad_norm": 0.05130103596515375, + "learning_rate": 8.664810126582279e-06, + "loss": 3.0156, + "step": 2888 + }, + { + "epoch": 0.14, + "grad_norm": 0.08383940227125532, + "learning_rate": 8.66379746835443e-06, + "loss": 2.1016, + "step": 2890 + }, + { + "epoch": 0.14, + "grad_norm": 0.07182262899450922, + "learning_rate": 8.662784810126584e-06, + "loss": 2.0508, + "step": 2892 + }, + { + "epoch": 0.14, + "grad_norm": 0.04364754140496318, + "learning_rate": 8.661772151898736e-06, + "loss": 2.3672, + "step": 2894 + }, + { + "epoch": 0.14, + "grad_norm": 0.04535774826607822, + "learning_rate": 8.660759493670888e-06, + "loss": 1.6797, + "step": 2896 + }, + { + "epoch": 0.14, + "grad_norm": 0.08776352222005823, + "learning_rate": 8.65974683544304e-06, + "loss": 3.0078, + "step": 2898 + }, + { + "epoch": 0.14, + "grad_norm": 0.08231854933995264, + "learning_rate": 8.65873417721519e-06, + "loss": 2.375, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 46.09375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 70.46411895751953, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2188, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.705, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.640625, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 16.6015625, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.515625, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.565414428710938, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5627, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.973, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.39, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.3125, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.68214416503906, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8629, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.892, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.671875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.640625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.0409049987793, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4689, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.5234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.95207214355469, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8349, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.907, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.13858032226562, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.905, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.4296875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.765625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.133331298828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6538, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.007, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.6484375, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.5234375, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.84375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.24149322509766, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2411, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.941, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.15625, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 75.1953125, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.84765625, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.0799446105957, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2428, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.207, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.191, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 88.0859375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 66.8700180053711, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.0597, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.066, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.142, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.3828125, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.453125, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 66.93702697753906, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0233, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.741, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.199, + "step": 2900 + }, + { + "epoch": 0.14, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.9453125, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.83131790161133, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2282, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.498, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 2900 + }, + { + "epoch": 0.15, + "grad_norm": 0.07477114924089268, + "learning_rate": 8.657721518987342e-06, + "loss": 3.375, + "step": 2902 + }, + { + "epoch": 0.15, + "grad_norm": 0.0432321120471421, + "learning_rate": 8.656708860759494e-06, + "loss": 2.7656, + "step": 2904 + }, + { + "epoch": 0.15, + "grad_norm": 0.044349297010555074, + "learning_rate": 8.655696202531646e-06, + "loss": 2.4688, + "step": 2906 + }, + { + "epoch": 0.15, + "grad_norm": 0.03877685385237317, + "learning_rate": 8.654683544303798e-06, + "loss": 3.6953, + "step": 2908 + }, + { + "epoch": 0.15, + "grad_norm": 0.058803880921406156, + "learning_rate": 8.65367088607595e-06, + "loss": 2.5781, + "step": 2910 + }, + { + "epoch": 0.15, + "grad_norm": 0.0411508309987447, + "learning_rate": 8.652658227848101e-06, + "loss": 2.1797, + "step": 2912 + }, + { + "epoch": 0.15, + "grad_norm": 0.04504128580073143, + "learning_rate": 8.651645569620253e-06, + "loss": 2.5, + "step": 2914 + }, + { + "epoch": 0.15, + "grad_norm": 0.060956641241486406, + "learning_rate": 8.650632911392405e-06, + "loss": 3.6406, + "step": 2916 + }, + { + "epoch": 0.15, + "grad_norm": 0.04308382117943785, + "learning_rate": 8.649620253164557e-06, + "loss": 2.9375, + "step": 2918 + }, + { + "epoch": 0.15, + "grad_norm": 0.07348782990907644, + "learning_rate": 8.648607594936709e-06, + "loss": 1.7793, + "step": 2920 + }, + { + "epoch": 0.15, + "grad_norm": 0.061247028730661686, + "learning_rate": 8.647594936708863e-06, + "loss": 1.832, + "step": 2922 + }, + { + "epoch": 0.15, + "grad_norm": 0.04503804713557941, + "learning_rate": 8.646582278481015e-06, + "loss": 3.0234, + "step": 2924 + }, + { + "epoch": 0.15, + "grad_norm": 0.1000457954971076, + "learning_rate": 8.645569620253166e-06, + "loss": 1.8047, + "step": 2926 + }, + { + "epoch": 0.15, + "grad_norm": 0.09482330121461713, + "learning_rate": 8.644556962025317e-06, + "loss": 2.293, + "step": 2928 + }, + { + "epoch": 0.15, + "grad_norm": 0.06873864478381334, + "learning_rate": 8.643544303797469e-06, + "loss": 2.1562, + "step": 2930 + }, + { + "epoch": 0.15, + "grad_norm": 0.029219417698263857, + "learning_rate": 8.64253164556962e-06, + "loss": 4.1484, + "step": 2932 + }, + { + "epoch": 0.15, + "grad_norm": 0.11640324428697353, + "learning_rate": 8.641518987341772e-06, + "loss": 0.9961, + "step": 2934 + }, + { + "epoch": 0.15, + "grad_norm": 0.04955319176202268, + "learning_rate": 8.640506329113924e-06, + "loss": 3.4688, + "step": 2936 + }, + { + "epoch": 0.15, + "grad_norm": 0.057930255428599024, + "learning_rate": 8.639493670886076e-06, + "loss": 4.4922, + "step": 2938 + }, + { + "epoch": 0.15, + "grad_norm": 0.04593005258256435, + "learning_rate": 8.638481012658228e-06, + "loss": 2.9219, + "step": 2940 + }, + { + "epoch": 0.15, + "grad_norm": 0.060304294210792975, + "learning_rate": 8.63746835443038e-06, + "loss": 3.0, + "step": 2942 + }, + { + "epoch": 0.15, + "grad_norm": 0.0218452468401511, + "learning_rate": 8.636455696202532e-06, + "loss": 3.5078, + "step": 2944 + }, + { + "epoch": 0.15, + "grad_norm": 0.0799725289881099, + "learning_rate": 8.635443037974684e-06, + "loss": 1.6738, + "step": 2946 + }, + { + "epoch": 0.15, + "grad_norm": 0.06799658145226696, + "learning_rate": 8.634430379746836e-06, + "loss": 2.9453, + "step": 2948 + }, + { + "epoch": 0.15, + "grad_norm": 0.04533609893838451, + "learning_rate": 8.633417721518988e-06, + "loss": 1.8516, + "step": 2950 + }, + { + "epoch": 0.15, + "grad_norm": 0.13776389909632097, + "learning_rate": 8.632405063291141e-06, + "loss": 2.3867, + "step": 2952 + }, + { + "epoch": 0.15, + "grad_norm": 0.09322318933903165, + "learning_rate": 8.631392405063292e-06, + "loss": 1.7012, + "step": 2954 + }, + { + "epoch": 0.15, + "grad_norm": 0.051294343484733905, + "learning_rate": 8.630379746835443e-06, + "loss": 3.4375, + "step": 2956 + }, + { + "epoch": 0.15, + "grad_norm": 0.05283734855406228, + "learning_rate": 8.629367088607595e-06, + "loss": 1.9297, + "step": 2958 + }, + { + "epoch": 0.15, + "grad_norm": 0.0446215051653574, + "learning_rate": 8.628354430379747e-06, + "loss": 1.8984, + "step": 2960 + }, + { + "epoch": 0.15, + "grad_norm": 0.028306240252279194, + "learning_rate": 8.627341772151899e-06, + "loss": 3.5508, + "step": 2962 + }, + { + "epoch": 0.15, + "grad_norm": 0.10700466459291955, + "learning_rate": 8.626329113924051e-06, + "loss": 3.2422, + "step": 2964 + }, + { + "epoch": 0.15, + "grad_norm": 0.04298498107222189, + "learning_rate": 8.625316455696203e-06, + "loss": 2.5859, + "step": 2966 + }, + { + "epoch": 0.15, + "grad_norm": 0.10153439939997064, + "learning_rate": 8.624303797468355e-06, + "loss": 2.2461, + "step": 2968 + }, + { + "epoch": 0.15, + "grad_norm": 0.08713454505120027, + "learning_rate": 8.623291139240507e-06, + "loss": 2.9453, + "step": 2970 + }, + { + "epoch": 0.15, + "grad_norm": 0.07463167478931523, + "learning_rate": 8.622278481012659e-06, + "loss": 0.9746, + "step": 2972 + }, + { + "epoch": 0.15, + "grad_norm": 0.045513612046108515, + "learning_rate": 8.62126582278481e-06, + "loss": 2.7578, + "step": 2974 + }, + { + "epoch": 0.15, + "grad_norm": 0.04592394120233233, + "learning_rate": 8.620253164556963e-06, + "loss": 3.0391, + "step": 2976 + }, + { + "epoch": 0.15, + "grad_norm": 0.041987217841938596, + "learning_rate": 8.619240506329114e-06, + "loss": 3.25, + "step": 2978 + }, + { + "epoch": 0.15, + "grad_norm": 0.04678244692335755, + "learning_rate": 8.618227848101266e-06, + "loss": 2.0391, + "step": 2980 + }, + { + "epoch": 0.15, + "grad_norm": 0.11298075232977826, + "learning_rate": 8.617215189873418e-06, + "loss": 3.1875, + "step": 2982 + }, + { + "epoch": 0.15, + "grad_norm": 0.05431368485278824, + "learning_rate": 8.61620253164557e-06, + "loss": 3.2578, + "step": 2984 + }, + { + "epoch": 0.15, + "grad_norm": 0.053505713526510804, + "learning_rate": 8.615189873417722e-06, + "loss": 2.8438, + "step": 2986 + }, + { + "epoch": 0.15, + "grad_norm": 0.04393738834145115, + "learning_rate": 8.614177215189874e-06, + "loss": 3.2891, + "step": 2988 + }, + { + "epoch": 0.15, + "grad_norm": 0.05246996599152096, + "learning_rate": 8.613164556962026e-06, + "loss": 2.5352, + "step": 2990 + }, + { + "epoch": 0.15, + "grad_norm": 0.10998424228866684, + "learning_rate": 8.612151898734178e-06, + "loss": 1.0117, + "step": 2992 + }, + { + "epoch": 0.15, + "grad_norm": 0.05296540867273297, + "learning_rate": 8.61113924050633e-06, + "loss": 4.0, + "step": 2994 + }, + { + "epoch": 0.15, + "grad_norm": 0.05825320064235262, + "learning_rate": 8.610126582278482e-06, + "loss": 1.9844, + "step": 2996 + }, + { + "epoch": 0.15, + "grad_norm": 0.04920198130300496, + "learning_rate": 8.609113924050633e-06, + "loss": 2.5703, + "step": 2998 + }, + { + "epoch": 0.15, + "grad_norm": 0.04230717274145532, + "learning_rate": 8.608101265822785e-06, + "loss": 1.9707, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.359375, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.0390625, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.26922607421875, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3394, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.644, + "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, + "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.828125, + "eval_specter_top15HN_validation.jsonl.gz_loss": 1.453125, + "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.220108032226562, + "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6716, + "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.956, + "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.374, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.1171875, + "eval_nq_top15HN_validation.jsonl.gz_acc3": 93.1640625, + "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, + "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.32588195800781, + "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9789, + "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.829, + "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.890625, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.078125, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.68324279785156, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.535, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.403, + "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.91932678222656, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8405, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.904, + "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.57400512695312, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7451, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.956, + "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.8203125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.3515625, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.20136260986328, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8702, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.888, + "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, + "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.0625, + "eval_fever_top15HN_validation.jsonl.gz_loss": 1.8359375, + "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.19670867919922, + "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3653, + "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.911, + "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.453125, + "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 83.3984375, + "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.82421875, + "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.63498306274414, + "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.5675, + "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.495, + "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.18, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.3828125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, + "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.95784759521484, + "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.1936, + "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.811, + "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.122, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, + "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, + "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, + "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.28853607177734, + "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1568, + "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.411, + "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.194, + "step": 3000 + }, + { + "epoch": 0.15, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.1015625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.359375, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.09654998779297, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1945, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.509, + "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, + "step": 3000 + } + ], + "logging_steps": 2, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}