|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6281078251766553, |
|
"eval_steps": 400, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 424.8911502777972, |
|
"learning_rate": 3.125e-08, |
|
"loss": 713.6646, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -3.0610547065734863, |
|
"rewards/margins": -0.43895024061203003, |
|
"rewards/rejected": -2.6221041679382324, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 403.8683081084132, |
|
"learning_rate": 6.25e-08, |
|
"loss": 717.3508, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.993378162384033, |
|
"rewards/margins": -0.3109555244445801, |
|
"rewards/rejected": -2.682422637939453, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 487.9238814591701, |
|
"learning_rate": 9.375e-08, |
|
"loss": 713.6135, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.5931520462036133, |
|
"rewards/margins": -0.21937386691570282, |
|
"rewards/rejected": -2.3737778663635254, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 540.6870493028796, |
|
"learning_rate": 1.25e-07, |
|
"loss": 712.8184, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -3.164547920227051, |
|
"rewards/margins": -0.7127091288566589, |
|
"rewards/rejected": -2.451838970184326, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 427.3180170525652, |
|
"learning_rate": 1.5625e-07, |
|
"loss": 707.0853, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.7615244388580322, |
|
"rewards/margins": -0.16162791848182678, |
|
"rewards/rejected": -2.599896192550659, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 383763.7480098094, |
|
"learning_rate": 1.875e-07, |
|
"loss": 715.4415, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -2.58443546295166, |
|
"rewards/margins": -0.2884238660335541, |
|
"rewards/rejected": -2.2960116863250732, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 439.43685355063843, |
|
"learning_rate": 2.1874999999999997e-07, |
|
"loss": 717.8594, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.9699530601501465, |
|
"rewards/margins": -0.2793353796005249, |
|
"rewards/rejected": -2.690617799758911, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 503.22488533065024, |
|
"learning_rate": 2.5e-07, |
|
"loss": 710.3533, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.8341801166534424, |
|
"rewards/margins": -0.11880241334438324, |
|
"rewards/rejected": -2.7153773307800293, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 1337.4413216082382, |
|
"learning_rate": 2.8125e-07, |
|
"loss": 711.881, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -2.8177173137664795, |
|
"rewards/margins": -0.19809791445732117, |
|
"rewards/rejected": -2.619619607925415, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 385.7756641011158, |
|
"learning_rate": 3.125e-07, |
|
"loss": 705.6052, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.5368785858154297, |
|
"rewards/margins": 0.30438369512557983, |
|
"rewards/rejected": -2.8412623405456543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 422.3765258964566, |
|
"learning_rate": 3.4374999999999994e-07, |
|
"loss": 706.4469, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -3.026949882507324, |
|
"rewards/margins": -0.3166283369064331, |
|
"rewards/rejected": -2.7103216648101807, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 394.9174924028097, |
|
"learning_rate": 3.75e-07, |
|
"loss": 710.1363, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.814709424972534, |
|
"rewards/margins": -0.3439286947250366, |
|
"rewards/rejected": -2.470780849456787, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 590.9528736566529, |
|
"learning_rate": 4.0625000000000003e-07, |
|
"loss": 704.8263, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -3.221498489379883, |
|
"rewards/margins": -0.13808628916740417, |
|
"rewards/rejected": -3.083411931991577, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 666.685124573273, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"loss": 709.7217, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.896270751953125, |
|
"rewards/margins": -0.23611800372600555, |
|
"rewards/rejected": -2.6601529121398926, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 422.81236685781573, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"loss": 701.5896, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -3.1475062370300293, |
|
"rewards/margins": -0.23899349570274353, |
|
"rewards/rejected": -2.908513069152832, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 1161.6272059916828, |
|
"learning_rate": 5e-07, |
|
"loss": 712.8695, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.549727439880371, |
|
"rewards/margins": 0.05642218515276909, |
|
"rewards/rejected": -2.606149673461914, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 691.6756141822095, |
|
"learning_rate": 5.3125e-07, |
|
"loss": 715.2848, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.962017774581909, |
|
"rewards/margins": -0.18228396773338318, |
|
"rewards/rejected": -2.779733896255493, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 557.7156194405, |
|
"learning_rate": 5.625e-07, |
|
"loss": 710.1722, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -3.1114089488983154, |
|
"rewards/margins": -0.5688842535018921, |
|
"rewards/rejected": -2.542525053024292, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 643.081848366494, |
|
"learning_rate": 5.9375e-07, |
|
"loss": 708.6848, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.728463888168335, |
|
"rewards/margins": 0.1268891543149948, |
|
"rewards/rejected": -2.8553528785705566, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 1749.0480774010928, |
|
"learning_rate": 5.999678242522831e-07, |
|
"loss": 712.9789, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -3.0832152366638184, |
|
"rewards/margins": -0.22872868180274963, |
|
"rewards/rejected": -2.8544864654541016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 10514.346037549345, |
|
"learning_rate": 5.998371221059621e-07, |
|
"loss": 697.8367, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -3.205540895462036, |
|
"rewards/margins": -0.16175726056098938, |
|
"rewards/rejected": -3.043783187866211, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 2830.1739417475483, |
|
"learning_rate": 5.996059263493219e-07, |
|
"loss": 714.0083, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -3.1492106914520264, |
|
"rewards/margins": -0.41294175386428833, |
|
"rewards/rejected": -2.736268997192383, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 3697.295888208749, |
|
"learning_rate": 5.992743144700869e-07, |
|
"loss": 703.9895, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -3.2083535194396973, |
|
"rewards/margins": -0.05745415762066841, |
|
"rewards/rejected": -3.1508989334106445, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 1170.631987747208, |
|
"learning_rate": 5.988423976115163e-07, |
|
"loss": 921.5164, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -3.09690523147583, |
|
"rewards/margins": -0.07702343910932541, |
|
"rewards/rejected": -3.0198817253112793, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 1391.9151015605798, |
|
"learning_rate": 5.983103205351532e-07, |
|
"loss": 704.9495, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.0641629695892334, |
|
"rewards/margins": 0.2125791758298874, |
|
"rewards/rejected": -3.2767422199249268, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 39002.795574769065, |
|
"learning_rate": 5.976782615723061e-07, |
|
"loss": 728.894, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -3.5640883445739746, |
|
"rewards/margins": -0.004037248902022839, |
|
"rewards/rejected": -3.5600509643554688, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 1164.3315552399881, |
|
"learning_rate": 5.969464325642798e-07, |
|
"loss": 700.7844, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.116656541824341, |
|
"rewards/margins": 0.2637160122394562, |
|
"rewards/rejected": -3.3803725242614746, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 2549527.6361433878, |
|
"learning_rate": 5.961150787913738e-07, |
|
"loss": 1252.7453, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.451526641845703, |
|
"rewards/margins": -0.17801007628440857, |
|
"rewards/rejected": -3.2735161781311035, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 2301.1157939792593, |
|
"learning_rate": 5.951844788906746e-07, |
|
"loss": 749.2581, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -3.379659652709961, |
|
"rewards/margins": -0.495597779750824, |
|
"rewards/rejected": -2.884061813354492, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 1288.9212020876917, |
|
"learning_rate": 5.941549447626671e-07, |
|
"loss": 22400296550.4, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.371587038040161, |
|
"rewards/margins": 0.21983376145362854, |
|
"rewards/rejected": -3.5914206504821777, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 4169.091186018576, |
|
"learning_rate": 5.930268214666979e-07, |
|
"loss": 689.9577, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -3.393592119216919, |
|
"rewards/margins": 0.27183833718299866, |
|
"rewards/rejected": -3.6654305458068848, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 40987.876210824266, |
|
"learning_rate": 5.918004871053251e-07, |
|
"loss": 699.906, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.6145877838134766, |
|
"rewards/margins": 0.1472555547952652, |
|
"rewards/rejected": -3.76184344291687, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 3446.7363874580406, |
|
"learning_rate": 5.904763526975934e-07, |
|
"loss": 700.4801, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.6712746620178223, |
|
"rewards/margins": 0.03212170675396919, |
|
"rewards/rejected": -3.7033963203430176, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 4383.523843958487, |
|
"learning_rate": 5.890548620412763e-07, |
|
"loss": 696.9372, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -4.037501335144043, |
|
"rewards/margins": -0.23374083638191223, |
|
"rewards/rejected": -3.803760528564453, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 1968.8221017002966, |
|
"learning_rate": 5.875364915641322e-07, |
|
"loss": 693.6001, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.061675071716309, |
|
"rewards/margins": 0.16252286732196808, |
|
"rewards/rejected": -4.224198818206787, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 4165.273001929711, |
|
"learning_rate": 5.859217501642258e-07, |
|
"loss": 689.4774, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.6556944847106934, |
|
"rewards/margins": 0.33984482288360596, |
|
"rewards/rejected": -3.995539903640747, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 7680.7561691485025, |
|
"learning_rate": 5.842111790393642e-07, |
|
"loss": 690.4501, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -4.226962566375732, |
|
"rewards/margins": 0.14984741806983948, |
|
"rewards/rejected": -4.376810073852539, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 3514.664070908699, |
|
"learning_rate": 5.824053515057091e-07, |
|
"loss": 693.3683, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.8957512378692627, |
|
"rewards/margins": 0.18127045035362244, |
|
"rewards/rejected": -4.077021598815918, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 6684.674851679545, |
|
"learning_rate": 5.805048728056245e-07, |
|
"loss": 685.6387, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -4.011441707611084, |
|
"rewards/margins": 0.16933482885360718, |
|
"rewards/rejected": -4.180776596069336, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 2817.010612327531, |
|
"learning_rate": 5.785103799048218e-07, |
|
"loss": 691.3805, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -4.5704665184021, |
|
"rewards/margins": 0.05775844305753708, |
|
"rewards/rejected": -4.628224849700928, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 12460.132704854665, |
|
"learning_rate": 5.764225412788754e-07, |
|
"loss": 690.0626, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -4.749141693115234, |
|
"rewards/margins": 0.10000785440206528, |
|
"rewards/rejected": -4.849149703979492, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 3156.835029013167, |
|
"learning_rate": 5.742420566891749e-07, |
|
"loss": 679.2428, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -3.9751620292663574, |
|
"rewards/margins": 0.5410782098770142, |
|
"rewards/rejected": -4.516240119934082, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 4165.789445089526, |
|
"learning_rate": 5.719696569483936e-07, |
|
"loss": 679.8576, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -4.488650321960449, |
|
"rewards/margins": 0.339005708694458, |
|
"rewards/rejected": -4.827655792236328, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 7010.571587146665, |
|
"learning_rate": 5.696061036755478e-07, |
|
"loss": 685.5709, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -4.9668354988098145, |
|
"rewards/margins": 0.14022143185138702, |
|
"rewards/rejected": -5.107056617736816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 4950.519059974548, |
|
"learning_rate": 5.671521890407327e-07, |
|
"loss": 680.7437, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -4.726534843444824, |
|
"rewards/margins": 0.5676447749137878, |
|
"rewards/rejected": -5.294179916381836, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 7887.8682965510425, |
|
"learning_rate": 5.64608735499618e-07, |
|
"loss": 674.3191, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -4.650925159454346, |
|
"rewards/margins": 0.5207107067108154, |
|
"rewards/rejected": -5.17163610458374, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 5967.489228784308, |
|
"learning_rate": 5.619765955177932e-07, |
|
"loss": 680.9146, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -4.723302841186523, |
|
"rewards/margins": 0.5448298454284668, |
|
"rewards/rejected": -5.268132209777832, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 4067.749182919556, |
|
"learning_rate": 5.592566512850545e-07, |
|
"loss": 677.9534, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -5.117281436920166, |
|
"rewards/margins": 0.6115970611572266, |
|
"rewards/rejected": -5.728878974914551, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 3495.5145356721982, |
|
"learning_rate": 5.564498144197293e-07, |
|
"loss": 681.9477, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -5.144923210144043, |
|
"rewards/margins": 0.386813759803772, |
|
"rewards/rejected": -5.531736850738525, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 4370.4657370961, |
|
"learning_rate": 5.535570256631384e-07, |
|
"loss": 679.4021, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -5.269853591918945, |
|
"rewards/margins": 0.39420580863952637, |
|
"rewards/rejected": -5.664059162139893, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 5546.329529459924, |
|
"learning_rate": 5.505792545642954e-07, |
|
"loss": 680.8774, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -5.6696882247924805, |
|
"rewards/margins": 0.1662217080593109, |
|
"rewards/rejected": -5.835909843444824, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 3396.1113411173433, |
|
"learning_rate": 5.475174991549528e-07, |
|
"loss": 680.5286, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -5.477304935455322, |
|
"rewards/margins": 0.6626185178756714, |
|
"rewards/rejected": -6.139924049377441, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 7509.706842299371, |
|
"learning_rate": 5.443727856151007e-07, |
|
"loss": 667.1712, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -5.695134162902832, |
|
"rewards/margins": 0.6776683330535889, |
|
"rewards/rejected": -6.372802734375, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 5167.959854781231, |
|
"learning_rate": 5.411461679290317e-07, |
|
"loss": 678.3353, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -5.676094055175781, |
|
"rewards/margins": 0.755618691444397, |
|
"rewards/rejected": -6.431711673736572, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 3674.961462097515, |
|
"learning_rate": 5.378387275320869e-07, |
|
"loss": 666.944, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.402568817138672, |
|
"rewards/margins": 0.7821658253669739, |
|
"rewards/rejected": -6.18473482131958, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 5634.831880478573, |
|
"learning_rate": 5.34451572948201e-07, |
|
"loss": 670.9914, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -6.517806053161621, |
|
"rewards/margins": 0.6073935627937317, |
|
"rewards/rejected": -7.125199794769287, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 10174.679987145297, |
|
"learning_rate": 5.309858394183691e-07, |
|
"loss": 674.4187, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -6.75530481338501, |
|
"rewards/margins": 0.6639969944953918, |
|
"rewards/rejected": -7.419301509857178, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 6705.843344302837, |
|
"learning_rate": 5.274426885201582e-07, |
|
"loss": 680.643, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -6.8337082862854, |
|
"rewards/margins": 0.32110291719436646, |
|
"rewards/rejected": -7.154810905456543, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 29305.105895087316, |
|
"learning_rate": 5.238233077783925e-07, |
|
"loss": 663.5017, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -5.573851585388184, |
|
"rewards/margins": 0.6479231715202332, |
|
"rewards/rejected": -6.221774578094482, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 4360.840909716472, |
|
"learning_rate": 5.201289102671411e-07, |
|
"loss": 673.6718, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -6.299983024597168, |
|
"rewards/margins": 0.7464480400085449, |
|
"rewards/rejected": -7.046431064605713, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|