|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2907, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -1.8825098276138306, |
|
"logits/rejected": -1.6692813634872437, |
|
"logps/chosen": -107.98798370361328, |
|
"logps/rejected": -99.48463439941406, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -1.912903904914856, |
|
"logits/rejected": -1.679023265838623, |
|
"logps/chosen": -232.9512481689453, |
|
"logps/rejected": -205.12588500976562, |
|
"loss": 1.0008, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.004386746324598789, |
|
"rewards/margins": 0.0018502763705328107, |
|
"rewards/rejected": 0.0025364691391587257, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.007096529006958, |
|
"logits/rejected": -1.9176105260849, |
|
"logps/chosen": -270.76263427734375, |
|
"logps/rejected": -241.11474609375, |
|
"loss": 0.9998, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.013296608813107014, |
|
"rewards/margins": 0.011036807671189308, |
|
"rewards/rejected": 0.0022598025389015675, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -1.996591329574585, |
|
"logits/rejected": -1.9179508686065674, |
|
"logps/chosen": -264.7063903808594, |
|
"logps/rejected": -223.7097930908203, |
|
"loss": 1.0021, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0007220959523692727, |
|
"rewards/margins": 0.004284311085939407, |
|
"rewards/rejected": -0.0050064073875546455, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.0471560955047607, |
|
"logits/rejected": -1.9881904125213623, |
|
"logps/chosen": -309.6553955078125, |
|
"logps/rejected": -272.2703552246094, |
|
"loss": 0.9983, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0036458049435168505, |
|
"rewards/margins": 0.0004331391828600317, |
|
"rewards/rejected": 0.003212666604667902, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -1.822840929031372, |
|
"logits/rejected": -1.8331642150878906, |
|
"logps/chosen": -333.86126708984375, |
|
"logps/rejected": -206.30355834960938, |
|
"loss": 1.0007, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.000272188161034137, |
|
"rewards/margins": 0.007083783857524395, |
|
"rewards/rejected": -0.006811595521867275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -1.7849409580230713, |
|
"logits/rejected": -1.8800642490386963, |
|
"logps/chosen": -249.81332397460938, |
|
"logps/rejected": -225.6211395263672, |
|
"loss": 1.0016, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.005839993245899677, |
|
"rewards/margins": -0.003313907189294696, |
|
"rewards/rejected": -0.0025260853581130505, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -1.9829210042953491, |
|
"logits/rejected": -1.818284034729004, |
|
"logps/chosen": -328.28521728515625, |
|
"logps/rejected": -236.7754364013672, |
|
"loss": 0.9958, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.002883409382775426, |
|
"rewards/margins": 0.003594732377678156, |
|
"rewards/rejected": -0.0007113233441486955, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -1.973283052444458, |
|
"logits/rejected": -1.9369332790374756, |
|
"logps/chosen": -258.3551940917969, |
|
"logps/rejected": -212.8301544189453, |
|
"loss": 0.9948, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0014923412818461657, |
|
"rewards/margins": 0.004948171321302652, |
|
"rewards/rejected": -0.0034558300394564867, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -1.9635775089263916, |
|
"logits/rejected": -1.8522634506225586, |
|
"logps/chosen": -256.3603820800781, |
|
"logps/rejected": -208.6153106689453, |
|
"loss": 0.9963, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.003207797883078456, |
|
"rewards/margins": 0.00904668215662241, |
|
"rewards/rejected": -0.01225447840988636, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -1.7430416345596313, |
|
"logits/rejected": -1.8678181171417236, |
|
"logps/chosen": -271.9082946777344, |
|
"logps/rejected": -195.18521118164062, |
|
"loss": 0.9982, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00269494135864079, |
|
"rewards/margins": 0.0009464768809266388, |
|
"rewards/rejected": 0.0017484650015830994, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -1.938391089439392, |
|
"logits/rejected": -1.8031425476074219, |
|
"logps/chosen": -250.1339874267578, |
|
"logps/rejected": -235.4950714111328, |
|
"loss": 0.9934, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004927259869873524, |
|
"rewards/margins": 0.009144905023276806, |
|
"rewards/rejected": -0.004217647016048431, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -1.836554765701294, |
|
"logits/rejected": -1.8783756494522095, |
|
"logps/chosen": -319.69873046875, |
|
"logps/rejected": -234.7427215576172, |
|
"loss": 0.9934, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.002145239384844899, |
|
"rewards/margins": 0.012560705654323101, |
|
"rewards/rejected": -0.010415466502308846, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -1.9726636409759521, |
|
"logits/rejected": -1.9809471368789673, |
|
"logps/chosen": -283.53985595703125, |
|
"logps/rejected": -239.5770721435547, |
|
"loss": 0.9926, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.012158048339188099, |
|
"rewards/margins": 0.019386615604162216, |
|
"rewards/rejected": -0.007228570524603128, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.024657726287842, |
|
"logits/rejected": -1.880409836769104, |
|
"logps/chosen": -269.13629150390625, |
|
"logps/rejected": -213.2104949951172, |
|
"loss": 0.9939, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0044730305671691895, |
|
"rewards/margins": 0.00196995516307652, |
|
"rewards/rejected": 0.0025030761025846004, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -1.8670217990875244, |
|
"logits/rejected": -2.0206334590911865, |
|
"logps/chosen": -268.1510925292969, |
|
"logps/rejected": -225.56527709960938, |
|
"loss": 0.9891, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.011090939864516258, |
|
"rewards/margins": 0.014944592490792274, |
|
"rewards/rejected": -0.0038536519277840853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.1929473876953125, |
|
"logits/rejected": -2.0768256187438965, |
|
"logps/chosen": -311.89215087890625, |
|
"logps/rejected": -241.4412078857422, |
|
"loss": 0.9796, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.014715956524014473, |
|
"rewards/margins": 0.023210834711790085, |
|
"rewards/rejected": -0.008494878187775612, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.0199875831604004, |
|
"logits/rejected": -2.028744697570801, |
|
"logps/chosen": -270.58197021484375, |
|
"logps/rejected": -221.40823364257812, |
|
"loss": 0.9859, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.008927601389586926, |
|
"rewards/margins": 0.004688750021159649, |
|
"rewards/rejected": 0.004238851368427277, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -1.9151681661605835, |
|
"logits/rejected": -1.9035532474517822, |
|
"logps/chosen": -215.2420196533203, |
|
"logps/rejected": -195.22682189941406, |
|
"loss": 0.9779, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.010352599434554577, |
|
"rewards/margins": 0.02298773266375065, |
|
"rewards/rejected": -0.012635131366550922, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -1.9542953968048096, |
|
"logits/rejected": -1.8753105401992798, |
|
"logps/chosen": -266.09234619140625, |
|
"logps/rejected": -196.09292602539062, |
|
"loss": 0.9728, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01909886673092842, |
|
"rewards/margins": 0.028360243886709213, |
|
"rewards/rejected": -0.009261379018425941, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -1.8358113765716553, |
|
"logits/rejected": -1.9175758361816406, |
|
"logps/chosen": -221.7442169189453, |
|
"logps/rejected": -154.5623016357422, |
|
"loss": 0.9761, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.018382752314209938, |
|
"rewards/margins": 0.026139695197343826, |
|
"rewards/rejected": -0.007756945677101612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -1.9191606044769287, |
|
"logits/rejected": -1.90776789188385, |
|
"logps/chosen": -299.2118225097656, |
|
"logps/rejected": -205.9092559814453, |
|
"loss": 0.9656, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016829822212457657, |
|
"rewards/margins": 0.03627743944525719, |
|
"rewards/rejected": -0.01944761723279953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -1.827575445175171, |
|
"logits/rejected": -1.7433605194091797, |
|
"logps/chosen": -220.52767944335938, |
|
"logps/rejected": -210.1706085205078, |
|
"loss": 0.9578, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.00790109671652317, |
|
"rewards/margins": 0.03705426678061485, |
|
"rewards/rejected": -0.029153168201446533, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -1.8479468822479248, |
|
"logits/rejected": -1.7774213552474976, |
|
"logps/chosen": -235.8931427001953, |
|
"logps/rejected": -207.35610961914062, |
|
"loss": 0.9595, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01835930533707142, |
|
"rewards/margins": 0.04197651147842407, |
|
"rewards/rejected": -0.023617204278707504, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -1.9261528253555298, |
|
"logits/rejected": -1.9167568683624268, |
|
"logps/chosen": -313.67254638671875, |
|
"logps/rejected": -203.87185668945312, |
|
"loss": 0.9514, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.016246426850557327, |
|
"rewards/margins": 0.03881000727415085, |
|
"rewards/rejected": -0.022563578560948372, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -1.9955661296844482, |
|
"logits/rejected": -1.906306266784668, |
|
"logps/chosen": -255.4471435546875, |
|
"logps/rejected": -228.58023071289062, |
|
"loss": 0.945, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02427186816930771, |
|
"rewards/margins": 0.06289727985858917, |
|
"rewards/rejected": -0.03862541541457176, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -1.961930274963379, |
|
"logits/rejected": -1.925752878189087, |
|
"logps/chosen": -267.53271484375, |
|
"logps/rejected": -198.67776489257812, |
|
"loss": 0.935, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03963503614068031, |
|
"rewards/margins": 0.08463665097951889, |
|
"rewards/rejected": -0.045001622289419174, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.0039916038513184, |
|
"logits/rejected": -2.0741240978240967, |
|
"logps/chosen": -292.31549072265625, |
|
"logps/rejected": -226.5825958251953, |
|
"loss": 0.9226, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00682613393291831, |
|
"rewards/margins": 0.052050817757844925, |
|
"rewards/rejected": -0.04522468149662018, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -1.9262123107910156, |
|
"logits/rejected": -1.8254365921020508, |
|
"logps/chosen": -271.57513427734375, |
|
"logps/rejected": -218.11032104492188, |
|
"loss": 0.9163, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05085228011012077, |
|
"rewards/margins": 0.0943886935710907, |
|
"rewards/rejected": -0.04353641718626022, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.0598292350769043, |
|
"logits/rejected": -1.8968032598495483, |
|
"logps/chosen": -271.613037109375, |
|
"logps/rejected": -210.1744384765625, |
|
"loss": 0.9097, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0635281652212143, |
|
"rewards/margins": 0.12082493305206299, |
|
"rewards/rejected": -0.0572967603802681, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982798165137615e-07, |
|
"logits/chosen": -1.8396161794662476, |
|
"logits/rejected": -1.779496431350708, |
|
"logps/chosen": -227.2111358642578, |
|
"logps/rejected": -224.27841186523438, |
|
"loss": 0.8899, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007174974773079157, |
|
"rewards/margins": 0.07386655360460281, |
|
"rewards/rejected": -0.06669158488512039, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963685015290519e-07, |
|
"logits/chosen": -1.9974682331085205, |
|
"logits/rejected": -2.1091110706329346, |
|
"logps/chosen": -316.4252014160156, |
|
"logps/rejected": -260.0753173828125, |
|
"loss": 0.8829, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04557584971189499, |
|
"rewards/margins": 0.1289386749267578, |
|
"rewards/rejected": -0.08336281031370163, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944571865443424e-07, |
|
"logits/chosen": -1.846778154373169, |
|
"logits/rejected": -1.7403427362442017, |
|
"logps/chosen": -248.9180450439453, |
|
"logps/rejected": -179.14321899414062, |
|
"loss": 0.8495, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02153083309531212, |
|
"rewards/margins": 0.15574803948402405, |
|
"rewards/rejected": -0.13421721756458282, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92545871559633e-07, |
|
"logits/chosen": -1.9045463800430298, |
|
"logits/rejected": -1.9483264684677124, |
|
"logps/chosen": -325.5777282714844, |
|
"logps/rejected": -232.71499633789062, |
|
"loss": 0.8641, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.03281577304005623, |
|
"rewards/margins": 0.12244097143411636, |
|
"rewards/rejected": -0.08962519466876984, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906345565749235e-07, |
|
"logits/chosen": -1.721255898475647, |
|
"logits/rejected": -1.6868213415145874, |
|
"logps/chosen": -236.89242553710938, |
|
"logps/rejected": -216.6375732421875, |
|
"loss": 0.8565, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004082153085619211, |
|
"rewards/margins": 0.11858376115560532, |
|
"rewards/rejected": -0.11450158059597015, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.88723241590214e-07, |
|
"logits/chosen": -1.8765642642974854, |
|
"logits/rejected": -1.8209621906280518, |
|
"logps/chosen": -297.5986328125, |
|
"logps/rejected": -230.46334838867188, |
|
"loss": 0.8171, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.040931276977062225, |
|
"rewards/margins": 0.19416043162345886, |
|
"rewards/rejected": -0.15322914719581604, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868119266055046e-07, |
|
"logits/chosen": -1.9383245706558228, |
|
"logits/rejected": -1.9298241138458252, |
|
"logps/chosen": -283.68145751953125, |
|
"logps/rejected": -255.94949340820312, |
|
"loss": 0.7955, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.003488479647785425, |
|
"rewards/margins": 0.1996344029903412, |
|
"rewards/rejected": -0.20312288403511047, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.849006116207951e-07, |
|
"logits/chosen": -2.060605764389038, |
|
"logits/rejected": -1.9366719722747803, |
|
"logps/chosen": -284.1970520019531, |
|
"logps/rejected": -244.7628936767578, |
|
"loss": 0.7797, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06834305822849274, |
|
"rewards/margins": 0.258323609828949, |
|
"rewards/rejected": -0.18998056650161743, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829892966360856e-07, |
|
"logits/chosen": -1.9930108785629272, |
|
"logits/rejected": -1.8641077280044556, |
|
"logps/chosen": -305.69573974609375, |
|
"logps/rejected": -261.52850341796875, |
|
"loss": 0.8123, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0743982270359993, |
|
"rewards/margins": 0.23559841513633728, |
|
"rewards/rejected": -0.1612001657485962, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810779816513762e-07, |
|
"logits/chosen": -1.9459863901138306, |
|
"logits/rejected": -1.8753139972686768, |
|
"logps/chosen": -236.7461700439453, |
|
"logps/rejected": -215.2508087158203, |
|
"loss": 0.781, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.02323376014828682, |
|
"rewards/margins": 0.22391769289970398, |
|
"rewards/rejected": -0.2471514195203781, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -1.850494623184204, |
|
"logits/rejected": -1.8692734241485596, |
|
"logps/chosen": -270.0162658691406, |
|
"logps/rejected": -190.91690063476562, |
|
"loss": 0.7595, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03287973254919052, |
|
"rewards/margins": 0.2878955900669098, |
|
"rewards/rejected": -0.2550157904624939, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772553516819572e-07, |
|
"logits/chosen": -1.9037336111068726, |
|
"logits/rejected": -1.889154076576233, |
|
"logps/chosen": -283.4707336425781, |
|
"logps/rejected": -263.4888000488281, |
|
"loss": 0.7516, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.023483851924538612, |
|
"rewards/margins": 0.3119986057281494, |
|
"rewards/rejected": -0.2885147035121918, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753440366972477e-07, |
|
"logits/chosen": -2.035512924194336, |
|
"logits/rejected": -1.9027979373931885, |
|
"logps/chosen": -223.506103515625, |
|
"logps/rejected": -222.5137176513672, |
|
"loss": 0.7745, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08041305840015411, |
|
"rewards/margins": 0.13345971703529358, |
|
"rewards/rejected": -0.2138727903366089, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7343272171253825e-07, |
|
"logits/chosen": -1.9109729528427124, |
|
"logits/rejected": -1.9474281072616577, |
|
"logps/chosen": -241.5768280029297, |
|
"logps/rejected": -223.0868682861328, |
|
"loss": 0.7334, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05294427275657654, |
|
"rewards/margins": 0.23036575317382812, |
|
"rewards/rejected": -0.28331005573272705, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.715214067278288e-07, |
|
"logits/chosen": -1.9270904064178467, |
|
"logits/rejected": -1.9717504978179932, |
|
"logps/chosen": -278.0482177734375, |
|
"logps/rejected": -216.4290008544922, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.031850665807724, |
|
"rewards/margins": 0.4009469151496887, |
|
"rewards/rejected": -0.43279749155044556, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.696100917431192e-07, |
|
"logits/chosen": -1.8353502750396729, |
|
"logits/rejected": -1.9710232019424438, |
|
"logps/chosen": -314.343505859375, |
|
"logps/rejected": -263.90374755859375, |
|
"loss": 0.7716, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0073336451314389706, |
|
"rewards/margins": 0.2296525537967682, |
|
"rewards/rejected": -0.22231896221637726, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6769877675840974e-07, |
|
"logits/chosen": -1.8592478036880493, |
|
"logits/rejected": -1.7888505458831787, |
|
"logps/chosen": -253.55599975585938, |
|
"logps/rejected": -232.0161895751953, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07120613008737564, |
|
"rewards/margins": 0.2692238688468933, |
|
"rewards/rejected": -0.34042999148368835, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6578746177370027e-07, |
|
"logits/chosen": -1.830445647239685, |
|
"logits/rejected": -1.8560142517089844, |
|
"logps/chosen": -196.4136962890625, |
|
"logps/rejected": -182.5649871826172, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.06338037550449371, |
|
"rewards/margins": 0.29189833998680115, |
|
"rewards/rejected": -0.35527873039245605, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.638761467889908e-07, |
|
"logits/chosen": -1.887717604637146, |
|
"logits/rejected": -1.8548129796981812, |
|
"logps/chosen": -274.1175537109375, |
|
"logps/rejected": -214.11209106445312, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.057955551892519, |
|
"rewards/margins": 0.43419212102890015, |
|
"rewards/rejected": -0.4921477437019348, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196483180428133e-07, |
|
"logits/chosen": -1.9349689483642578, |
|
"logits/rejected": -1.9116013050079346, |
|
"logps/chosen": -269.4372863769531, |
|
"logps/rejected": -212.83779907226562, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.041172344237565994, |
|
"rewards/margins": 0.3499607443809509, |
|
"rewards/rejected": -0.39113301038742065, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.600535168195718e-07, |
|
"logits/chosen": -1.8402849435806274, |
|
"logits/rejected": -1.827770471572876, |
|
"logps/chosen": -213.6937255859375, |
|
"logps/rejected": -208.28866577148438, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.18305625021457672, |
|
"rewards/margins": 0.20005738735198975, |
|
"rewards/rejected": -0.3831135928630829, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5814220183486234e-07, |
|
"logits/chosen": -1.8466438055038452, |
|
"logits/rejected": -1.8012769222259521, |
|
"logps/chosen": -292.2630310058594, |
|
"logps/rejected": -263.00091552734375, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13879308104515076, |
|
"rewards/margins": 0.31510522961616516, |
|
"rewards/rejected": -0.45389825105667114, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.562308868501529e-07, |
|
"logits/chosen": -1.909223198890686, |
|
"logits/rejected": -1.7789217233657837, |
|
"logps/chosen": -255.5054931640625, |
|
"logps/rejected": -254.28439331054688, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03209429234266281, |
|
"rewards/margins": 0.5229288935661316, |
|
"rewards/rejected": -0.555023193359375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543195718654434e-07, |
|
"logits/chosen": -1.7467231750488281, |
|
"logits/rejected": -1.8646119832992554, |
|
"logps/chosen": -221.7431640625, |
|
"logps/rejected": -200.1488494873047, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2338542938232422, |
|
"rewards/margins": 0.2427963763475418, |
|
"rewards/rejected": -0.4766507148742676, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5240825688073394e-07, |
|
"logits/chosen": -1.8729002475738525, |
|
"logits/rejected": -1.6796636581420898, |
|
"logps/chosen": -280.7690734863281, |
|
"logps/rejected": -226.85562133789062, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08351187407970428, |
|
"rewards/margins": 0.5537512898445129, |
|
"rewards/rejected": -0.6372631788253784, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504969418960244e-07, |
|
"logits/chosen": -2.0048041343688965, |
|
"logits/rejected": -1.637963056564331, |
|
"logps/chosen": -254.12939453125, |
|
"logps/rejected": -259.4322814941406, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12823496758937836, |
|
"rewards/margins": 0.29864269495010376, |
|
"rewards/rejected": -0.4268776476383209, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4858562691131495e-07, |
|
"logits/chosen": -2.012173652648926, |
|
"logits/rejected": -1.9192218780517578, |
|
"logps/chosen": -291.0408935546875, |
|
"logps/rejected": -280.9718933105469, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1138150691986084, |
|
"rewards/margins": 0.5268052816390991, |
|
"rewards/rejected": -0.6406203508377075, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.466743119266055e-07, |
|
"logits/chosen": -1.9362258911132812, |
|
"logits/rejected": -1.841904878616333, |
|
"logps/chosen": -275.77545166015625, |
|
"logps/rejected": -239.5581817626953, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2778164744377136, |
|
"rewards/margins": 0.3516896367073059, |
|
"rewards/rejected": -0.6295061111450195, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.44762996941896e-07, |
|
"logits/chosen": -1.8746612071990967, |
|
"logits/rejected": -1.979645013809204, |
|
"logps/chosen": -292.9163513183594, |
|
"logps/rejected": -251.6305694580078, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10857485234737396, |
|
"rewards/margins": 0.647341251373291, |
|
"rewards/rejected": -0.7559161186218262, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4285168195718655e-07, |
|
"logits/chosen": -1.7814861536026, |
|
"logits/rejected": -1.7613455057144165, |
|
"logps/chosen": -233.90591430664062, |
|
"logps/rejected": -235.93832397460938, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2671836018562317, |
|
"rewards/margins": 0.46907442808151245, |
|
"rewards/rejected": -0.7362579107284546, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40940366972477e-07, |
|
"logits/chosen": -1.9857203960418701, |
|
"logits/rejected": -1.810185194015503, |
|
"logps/chosen": -234.3582763671875, |
|
"logps/rejected": -216.35922241210938, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.29425790905952454, |
|
"rewards/margins": 0.4220006465911865, |
|
"rewards/rejected": -0.7162585258483887, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3902905198776756e-07, |
|
"logits/chosen": -1.8552411794662476, |
|
"logits/rejected": -1.839869499206543, |
|
"logps/chosen": -275.1326904296875, |
|
"logps/rejected": -226.4814910888672, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1326485425233841, |
|
"rewards/margins": 0.5507789254188538, |
|
"rewards/rejected": -0.6834274530410767, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371177370030581e-07, |
|
"logits/chosen": -1.9499883651733398, |
|
"logits/rejected": -1.8449184894561768, |
|
"logps/chosen": -274.458984375, |
|
"logps/rejected": -231.59921264648438, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2783154845237732, |
|
"rewards/margins": 0.6603595614433289, |
|
"rewards/rejected": -0.938675045967102, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.352064220183486e-07, |
|
"logits/chosen": -1.7157669067382812, |
|
"logits/rejected": -1.7650973796844482, |
|
"logps/chosen": -215.53311157226562, |
|
"logps/rejected": -200.57073974609375, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.27303647994995117, |
|
"rewards/margins": 0.4611503481864929, |
|
"rewards/rejected": -0.7341868281364441, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3329510703363915e-07, |
|
"logits/chosen": -1.8946468830108643, |
|
"logits/rejected": -1.8263728618621826, |
|
"logps/chosen": -255.1796417236328, |
|
"logps/rejected": -215.44821166992188, |
|
"loss": 0.3838, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2462444007396698, |
|
"rewards/margins": 0.8292306661605835, |
|
"rewards/rejected": -1.0754752159118652, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313837920489297e-07, |
|
"logits/chosen": -1.8516194820404053, |
|
"logits/rejected": -1.8784716129302979, |
|
"logps/chosen": -231.55343627929688, |
|
"logps/rejected": -226.981201171875, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.19073012471199036, |
|
"rewards/margins": 0.6038515567779541, |
|
"rewards/rejected": -0.7945817112922668, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2947247706422016e-07, |
|
"logits/chosen": -1.8391790390014648, |
|
"logits/rejected": -1.9652955532073975, |
|
"logps/chosen": -252.66983032226562, |
|
"logps/rejected": -235.1903533935547, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.33843857049942017, |
|
"rewards/margins": 0.6845678091049194, |
|
"rewards/rejected": -1.0230063199996948, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.275611620795107e-07, |
|
"logits/chosen": -1.9520423412322998, |
|
"logits/rejected": -1.8214895725250244, |
|
"logps/chosen": -308.54522705078125, |
|
"logps/rejected": -243.960693359375, |
|
"loss": 0.3558, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.36050236225128174, |
|
"rewards/margins": 0.6172996759414673, |
|
"rewards/rejected": -0.9778021574020386, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2564984709480123e-07, |
|
"logits/chosen": -1.9856923818588257, |
|
"logits/rejected": -1.8237674236297607, |
|
"logps/chosen": -291.28717041015625, |
|
"logps/rejected": -226.1394500732422, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4318040907382965, |
|
"rewards/margins": 0.5425896644592285, |
|
"rewards/rejected": -0.9743936657905579, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2373853211009176e-07, |
|
"logits/chosen": -2.0162062644958496, |
|
"logits/rejected": -1.994715690612793, |
|
"logps/chosen": -265.1205139160156, |
|
"logps/rejected": -243.013916015625, |
|
"loss": 0.4168, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4797751307487488, |
|
"rewards/margins": 0.790332555770874, |
|
"rewards/rejected": -1.2701075077056885, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2182721712538224e-07, |
|
"logits/chosen": -1.9402358531951904, |
|
"logits/rejected": -1.7293392419815063, |
|
"logps/chosen": -283.6700744628906, |
|
"logps/rejected": -226.171142578125, |
|
"loss": 0.3693, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.17032106220722198, |
|
"rewards/margins": 1.0015548467636108, |
|
"rewards/rejected": -1.171876072883606, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199159021406727e-07, |
|
"logits/chosen": -1.9769999980926514, |
|
"logits/rejected": -1.6814262866973877, |
|
"logps/chosen": -243.1233367919922, |
|
"logps/rejected": -221.1171875, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5523873567581177, |
|
"rewards/margins": 0.596055269241333, |
|
"rewards/rejected": -1.1484426259994507, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1800458715596325e-07, |
|
"logits/chosen": -1.927851915359497, |
|
"logits/rejected": -1.8016504049301147, |
|
"logps/chosen": -311.8240966796875, |
|
"logps/rejected": -259.55096435546875, |
|
"loss": 0.3839, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5330373644828796, |
|
"rewards/margins": 0.41053658723831177, |
|
"rewards/rejected": -0.9435739517211914, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.160932721712538e-07, |
|
"logits/chosen": -1.934565544128418, |
|
"logits/rejected": -1.8886038064956665, |
|
"logps/chosen": -286.6680908203125, |
|
"logps/rejected": -258.78961181640625, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.48792019486427307, |
|
"rewards/margins": 0.4587516784667969, |
|
"rewards/rejected": -0.9466718435287476, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.141819571865443e-07, |
|
"logits/chosen": -1.7957875728607178, |
|
"logits/rejected": -1.8168989419937134, |
|
"logps/chosen": -255.51919555664062, |
|
"logps/rejected": -200.88294982910156, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5373031497001648, |
|
"rewards/margins": 0.7255297899246216, |
|
"rewards/rejected": -1.2628331184387207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1227064220183485e-07, |
|
"logits/chosen": -1.784045934677124, |
|
"logits/rejected": -1.66024649143219, |
|
"logps/chosen": -261.5906677246094, |
|
"logps/rejected": -236.45486450195312, |
|
"loss": 0.2914, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6871198415756226, |
|
"rewards/margins": 0.8158855438232422, |
|
"rewards/rejected": -1.5030055046081543, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103593272171253e-07, |
|
"logits/chosen": -1.8295629024505615, |
|
"logits/rejected": -1.8517471551895142, |
|
"logps/chosen": -273.9855651855469, |
|
"logps/rejected": -268.47784423828125, |
|
"loss": 0.2635, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6461771130561829, |
|
"rewards/margins": 0.6871333122253418, |
|
"rewards/rejected": -1.3333103656768799, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0844801223241586e-07, |
|
"logits/chosen": -1.7927148342132568, |
|
"logits/rejected": -1.8215078115463257, |
|
"logps/chosen": -280.751220703125, |
|
"logps/rejected": -242.97573852539062, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.48614630103111267, |
|
"rewards/margins": 0.6972783207893372, |
|
"rewards/rejected": -1.183424472808838, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.065366972477064e-07, |
|
"logits/chosen": -1.9858176708221436, |
|
"logits/rejected": -1.8561077117919922, |
|
"logps/chosen": -287.98516845703125, |
|
"logps/rejected": -252.1136932373047, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.689589262008667, |
|
"rewards/margins": 0.799708366394043, |
|
"rewards/rejected": -1.4892975091934204, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.046253822629969e-07, |
|
"logits/chosen": -1.8583405017852783, |
|
"logits/rejected": -1.8265085220336914, |
|
"logps/chosen": -265.4439697265625, |
|
"logps/rejected": -234.42135620117188, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.582925021648407, |
|
"rewards/margins": 0.8660266995429993, |
|
"rewards/rejected": -1.4489517211914062, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0271406727828745e-07, |
|
"logits/chosen": -1.8910176753997803, |
|
"logits/rejected": -1.8912360668182373, |
|
"logps/chosen": -259.268310546875, |
|
"logps/rejected": -229.13623046875, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8472847938537598, |
|
"rewards/margins": 0.5785714387893677, |
|
"rewards/rejected": -1.425856351852417, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.00802752293578e-07, |
|
"logits/chosen": -1.8274396657943726, |
|
"logits/rejected": -1.7036545276641846, |
|
"logps/chosen": -273.0886535644531, |
|
"logps/rejected": -241.332763671875, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7010241746902466, |
|
"rewards/margins": 0.7486527562141418, |
|
"rewards/rejected": -1.449676752090454, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9889143730886847e-07, |
|
"logits/chosen": -2.0030179023742676, |
|
"logits/rejected": -1.938431978225708, |
|
"logps/chosen": -308.9624328613281, |
|
"logps/rejected": -252.2946319580078, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5951758623123169, |
|
"rewards/margins": 1.0006935596466064, |
|
"rewards/rejected": -1.5958693027496338, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.96980122324159e-07, |
|
"logits/chosen": -1.889276146888733, |
|
"logits/rejected": -1.7101726531982422, |
|
"logps/chosen": -264.00048828125, |
|
"logps/rejected": -214.24649047851562, |
|
"loss": -0.045, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6674525737762451, |
|
"rewards/margins": 1.1390091180801392, |
|
"rewards/rejected": -1.8064616918563843, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9506880733944953e-07, |
|
"logits/chosen": -1.877962350845337, |
|
"logits/rejected": -1.7040239572525024, |
|
"logps/chosen": -257.03582763671875, |
|
"logps/rejected": -227.9223175048828, |
|
"loss": 0.3336, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.072399377822876, |
|
"rewards/margins": 0.5846762657165527, |
|
"rewards/rejected": -1.6570755243301392, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9315749235474006e-07, |
|
"logits/chosen": -1.840431809425354, |
|
"logits/rejected": -1.892249345779419, |
|
"logps/chosen": -278.1147766113281, |
|
"logps/rejected": -272.54327392578125, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8901047706604004, |
|
"rewards/margins": 0.7304579019546509, |
|
"rewards/rejected": -1.6205627918243408, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.912461773700306e-07, |
|
"logits/chosen": -2.031358003616333, |
|
"logits/rejected": -1.8924942016601562, |
|
"logps/chosen": -300.32110595703125, |
|
"logps/rejected": -295.2864074707031, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8579466938972473, |
|
"rewards/margins": 1.062766432762146, |
|
"rewards/rejected": -1.920713186264038, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8933486238532107e-07, |
|
"logits/chosen": -1.6753685474395752, |
|
"logits/rejected": -1.6504751443862915, |
|
"logps/chosen": -313.80853271484375, |
|
"logps/rejected": -248.9676513671875, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7647205591201782, |
|
"rewards/margins": 1.3286702632904053, |
|
"rewards/rejected": -2.093390941619873, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874235474006116e-07, |
|
"logits/chosen": -1.8231757879257202, |
|
"logits/rejected": -1.7661590576171875, |
|
"logps/chosen": -287.50677490234375, |
|
"logps/rejected": -241.2566375732422, |
|
"loss": -0.0319, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6531956195831299, |
|
"rewards/margins": 1.3242131471633911, |
|
"rewards/rejected": -1.9774086475372314, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8551223241590214e-07, |
|
"logits/chosen": -1.8550631999969482, |
|
"logits/rejected": -1.7476036548614502, |
|
"logps/chosen": -261.09033203125, |
|
"logps/rejected": -238.5321502685547, |
|
"loss": 0.1266, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9579564332962036, |
|
"rewards/margins": 1.210017442703247, |
|
"rewards/rejected": -2.1679739952087402, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8360091743119267e-07, |
|
"logits/chosen": -1.9090309143066406, |
|
"logits/rejected": -1.6929905414581299, |
|
"logps/chosen": -267.00323486328125, |
|
"logps/rejected": -239.107177734375, |
|
"loss": -0.0438, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9265840649604797, |
|
"rewards/margins": 1.2149267196655273, |
|
"rewards/rejected": -2.1415107250213623, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.816896024464832e-07, |
|
"logits/chosen": -1.7493388652801514, |
|
"logits/rejected": -1.9231748580932617, |
|
"logps/chosen": -240.63134765625, |
|
"logps/rejected": -220.7272186279297, |
|
"loss": -0.0152, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0793166160583496, |
|
"rewards/margins": 0.8653289675712585, |
|
"rewards/rejected": -1.9446455240249634, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797782874617737e-07, |
|
"logits/chosen": -1.9112157821655273, |
|
"logits/rejected": -2.013073682785034, |
|
"logps/chosen": -284.13824462890625, |
|
"logps/rejected": -243.91806030273438, |
|
"loss": -0.0423, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1301993131637573, |
|
"rewards/margins": 0.8508475422859192, |
|
"rewards/rejected": -1.9810469150543213, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778669724770642e-07, |
|
"logits/chosen": -1.779415488243103, |
|
"logits/rejected": -1.688969612121582, |
|
"logps/chosen": -264.59710693359375, |
|
"logps/rejected": -212.7026824951172, |
|
"loss": -0.0335, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0757131576538086, |
|
"rewards/margins": 1.205359697341919, |
|
"rewards/rejected": -2.2810730934143066, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7595565749235474e-07, |
|
"logits/chosen": -1.9989725351333618, |
|
"logits/rejected": -1.9091949462890625, |
|
"logps/chosen": -287.1456604003906, |
|
"logps/rejected": -226.24771118164062, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8323071599006653, |
|
"rewards/margins": 1.2861956357955933, |
|
"rewards/rejected": -2.1185028553009033, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.740443425076452e-07, |
|
"logits/chosen": -1.9547460079193115, |
|
"logits/rejected": -1.8703094720840454, |
|
"logps/chosen": -287.35198974609375, |
|
"logps/rejected": -261.2727966308594, |
|
"loss": -0.0247, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3492201566696167, |
|
"rewards/margins": 0.8363786935806274, |
|
"rewards/rejected": -2.185598850250244, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7213302752293575e-07, |
|
"logits/chosen": -1.86129891872406, |
|
"logits/rejected": -1.7522859573364258, |
|
"logps/chosen": -273.3508605957031, |
|
"logps/rejected": -244.61569213867188, |
|
"loss": -0.2007, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0919667482376099, |
|
"rewards/margins": 0.9706255793571472, |
|
"rewards/rejected": -2.0625922679901123, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.097571611404419, |
|
"eval_logits/rejected": -1.9875798225402832, |
|
"eval_logps/chosen": -301.2187805175781, |
|
"eval_logps/rejected": -259.9261779785156, |
|
"eval_loss": -0.09876806288957596, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -1.1416146755218506, |
|
"eval_rewards/margins": 1.3577306270599365, |
|
"eval_rewards/rejected": -2.499345302581787, |
|
"eval_runtime": 238.6969, |
|
"eval_samples_per_second": 8.379, |
|
"eval_steps_per_second": 0.264, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.702217125382263e-07, |
|
"logits/chosen": -1.9615962505340576, |
|
"logits/rejected": -1.7775417566299438, |
|
"logps/chosen": -293.53924560546875, |
|
"logps/rejected": -264.0589599609375, |
|
"loss": -0.1153, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.265012264251709, |
|
"rewards/margins": 1.2176616191864014, |
|
"rewards/rejected": -2.4826738834381104, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6831039755351677e-07, |
|
"logits/chosen": -1.7482898235321045, |
|
"logits/rejected": -1.872532844543457, |
|
"logps/chosen": -272.051025390625, |
|
"logps/rejected": -255.67703247070312, |
|
"loss": -0.1987, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1273905038833618, |
|
"rewards/margins": 1.4816131591796875, |
|
"rewards/rejected": -2.609003782272339, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663990825688073e-07, |
|
"logits/chosen": -1.8804614543914795, |
|
"logits/rejected": -1.6789354085922241, |
|
"logps/chosen": -262.64892578125, |
|
"logps/rejected": -241.96353149414062, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6520445346832275, |
|
"rewards/margins": 0.846520721912384, |
|
"rewards/rejected": -2.4985649585723877, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6448776758409783e-07, |
|
"logits/chosen": -1.989675760269165, |
|
"logits/rejected": -1.7391383647918701, |
|
"logps/chosen": -288.917724609375, |
|
"logps/rejected": -266.159912109375, |
|
"loss": -0.0103, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2550522089004517, |
|
"rewards/margins": 1.0477917194366455, |
|
"rewards/rejected": -2.3028438091278076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6257645259938836e-07, |
|
"logits/chosen": -1.848597526550293, |
|
"logits/rejected": -1.7234575748443604, |
|
"logps/chosen": -254.86672973632812, |
|
"logps/rejected": -258.8224792480469, |
|
"loss": -0.1996, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1828529834747314, |
|
"rewards/margins": 1.4569838047027588, |
|
"rewards/rejected": -2.6398367881774902, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.606651376146789e-07, |
|
"logits/chosen": -1.6995182037353516, |
|
"logits/rejected": -1.8116445541381836, |
|
"logps/chosen": -258.5903015136719, |
|
"logps/rejected": -209.5008544921875, |
|
"loss": -0.3461, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5025261640548706, |
|
"rewards/margins": 1.1272718906402588, |
|
"rewards/rejected": -2.629798412322998, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5875382262996937e-07, |
|
"logits/chosen": -1.7140220403671265, |
|
"logits/rejected": -1.7449703216552734, |
|
"logps/chosen": -283.09796142578125, |
|
"logps/rejected": -295.8421325683594, |
|
"loss": -0.232, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.183707594871521, |
|
"rewards/margins": 1.0434316396713257, |
|
"rewards/rejected": -2.2271392345428467, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568425076452599e-07, |
|
"logits/chosen": -1.8618385791778564, |
|
"logits/rejected": -1.7049167156219482, |
|
"logps/chosen": -295.1578063964844, |
|
"logps/rejected": -260.01678466796875, |
|
"loss": -0.2474, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6388660669326782, |
|
"rewards/margins": 0.968669056892395, |
|
"rewards/rejected": -2.6075356006622314, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5493119266055044e-07, |
|
"logits/chosen": -1.7115428447723389, |
|
"logits/rejected": -1.6858937740325928, |
|
"logps/chosen": -273.44049072265625, |
|
"logps/rejected": -214.3227996826172, |
|
"loss": -0.3821, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6126127243041992, |
|
"rewards/margins": 1.3140199184417725, |
|
"rewards/rejected": -2.9266326427459717, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5301987767584097e-07, |
|
"logits/chosen": -1.7064205408096313, |
|
"logits/rejected": -1.6017926931381226, |
|
"logps/chosen": -268.70416259765625, |
|
"logps/rejected": -272.87066650390625, |
|
"loss": -0.3771, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3269054889678955, |
|
"rewards/margins": 1.9102023839950562, |
|
"rewards/rejected": -3.237107515335083, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511085626911315e-07, |
|
"logits/chosen": -1.8583990335464478, |
|
"logits/rejected": -1.5322113037109375, |
|
"logps/chosen": -333.03924560546875, |
|
"logps/rejected": -262.2927551269531, |
|
"loss": -0.4133, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4196674823760986, |
|
"rewards/margins": 1.6081445217132568, |
|
"rewards/rejected": -3.0278122425079346, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.49197247706422e-07, |
|
"logits/chosen": -1.6976341009140015, |
|
"logits/rejected": -1.6782453060150146, |
|
"logps/chosen": -233.9385223388672, |
|
"logps/rejected": -220.5770263671875, |
|
"loss": -0.4013, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9680936336517334, |
|
"rewards/margins": 0.8472940325737, |
|
"rewards/rejected": -2.8153879642486572, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.472859327217125e-07, |
|
"logits/chosen": -1.7105554342269897, |
|
"logits/rejected": -1.4681594371795654, |
|
"logps/chosen": -300.76226806640625, |
|
"logps/rejected": -273.32696533203125, |
|
"loss": -0.3314, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4938586950302124, |
|
"rewards/margins": 1.572718858718872, |
|
"rewards/rejected": -3.066577196121216, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4537461773700304e-07, |
|
"logits/chosen": -1.5856693983078003, |
|
"logits/rejected": -1.7974258661270142, |
|
"logps/chosen": -225.534423828125, |
|
"logps/rejected": -204.51913452148438, |
|
"loss": -0.311, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.020268201828003, |
|
"rewards/margins": 1.0005836486816406, |
|
"rewards/rejected": -3.0208516120910645, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.434633027522936e-07, |
|
"logits/chosen": -1.7942464351654053, |
|
"logits/rejected": -1.6902000904083252, |
|
"logps/chosen": -293.67706298828125, |
|
"logps/rejected": -255.2344512939453, |
|
"loss": -0.3927, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.663823127746582, |
|
"rewards/margins": 1.3553807735443115, |
|
"rewards/rejected": -3.0192039012908936, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.415519877675841e-07, |
|
"logits/chosen": -1.7715240716934204, |
|
"logits/rejected": -1.643781065940857, |
|
"logps/chosen": -252.25729370117188, |
|
"logps/rejected": -269.1502685546875, |
|
"loss": -0.4037, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5638118982315063, |
|
"rewards/margins": 1.3495436906814575, |
|
"rewards/rejected": -2.9133553504943848, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3964067278287464e-07, |
|
"logits/chosen": -1.7634483575820923, |
|
"logits/rejected": -1.7055590152740479, |
|
"logps/chosen": -297.56658935546875, |
|
"logps/rejected": -240.73849487304688, |
|
"loss": -0.4873, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6788049936294556, |
|
"rewards/margins": 1.7267656326293945, |
|
"rewards/rejected": -3.4055705070495605, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377293577981651e-07, |
|
"logits/chosen": -1.6601498126983643, |
|
"logits/rejected": -1.5715376138687134, |
|
"logps/chosen": -248.473388671875, |
|
"logps/rejected": -251.2434844970703, |
|
"loss": -0.6724, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1505351066589355, |
|
"rewards/margins": 1.4957424402236938, |
|
"rewards/rejected": -3.646277666091919, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3581804281345565e-07, |
|
"logits/chosen": -1.7773525714874268, |
|
"logits/rejected": -1.5578742027282715, |
|
"logps/chosen": -305.3437805175781, |
|
"logps/rejected": -243.5063018798828, |
|
"loss": -0.2898, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0654540061950684, |
|
"rewards/margins": 1.2946364879608154, |
|
"rewards/rejected": -3.3600902557373047, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.339067278287462e-07, |
|
"logits/chosen": -1.5956556797027588, |
|
"logits/rejected": -1.5084911584854126, |
|
"logps/chosen": -269.35736083984375, |
|
"logps/rejected": -251.64675903320312, |
|
"loss": -0.5003, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1167237758636475, |
|
"rewards/margins": 1.4275890588760376, |
|
"rewards/rejected": -3.5443129539489746, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.319954128440367e-07, |
|
"logits/chosen": -1.8844757080078125, |
|
"logits/rejected": -1.7271381616592407, |
|
"logps/chosen": -299.93609619140625, |
|
"logps/rejected": -243.735595703125, |
|
"loss": -0.5037, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.080448627471924, |
|
"rewards/margins": 1.4392142295837402, |
|
"rewards/rejected": -3.519662857055664, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3008409785932725e-07, |
|
"logits/chosen": -1.6695266962051392, |
|
"logits/rejected": -1.7183201313018799, |
|
"logps/chosen": -266.7185974121094, |
|
"logps/rejected": -248.3719482421875, |
|
"loss": -0.5103, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2367124557495117, |
|
"rewards/margins": 1.7176882028579712, |
|
"rewards/rejected": -3.9544003009796143, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2817278287461773e-07, |
|
"logits/chosen": -1.7001529932022095, |
|
"logits/rejected": -1.6251550912857056, |
|
"logps/chosen": -256.841064453125, |
|
"logps/rejected": -273.620849609375, |
|
"loss": -0.6167, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5783565044403076, |
|
"rewards/margins": 1.5206773281097412, |
|
"rewards/rejected": -4.099034309387207, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262614678899082e-07, |
|
"logits/chosen": -1.7264814376831055, |
|
"logits/rejected": -1.5964632034301758, |
|
"logps/chosen": -270.0255126953125, |
|
"logps/rejected": -285.0582580566406, |
|
"loss": -0.8394, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.6388707160949707, |
|
"rewards/margins": 1.2730156183242798, |
|
"rewards/rejected": -3.911886692047119, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2435015290519874e-07, |
|
"logits/chosen": -1.704097032546997, |
|
"logits/rejected": -1.581463098526001, |
|
"logps/chosen": -246.20022583007812, |
|
"logps/rejected": -232.1162872314453, |
|
"loss": -0.4922, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0876688957214355, |
|
"rewards/margins": 1.698892593383789, |
|
"rewards/rejected": -3.7865612506866455, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2243883792048927e-07, |
|
"logits/chosen": -1.7485994100570679, |
|
"logits/rejected": -1.7491531372070312, |
|
"logps/chosen": -307.69842529296875, |
|
"logps/rejected": -313.2864685058594, |
|
"loss": -0.4846, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.471188545227051, |
|
"rewards/margins": 1.2810360193252563, |
|
"rewards/rejected": -3.7522246837615967, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.205275229357798e-07, |
|
"logits/chosen": -1.7512283325195312, |
|
"logits/rejected": -1.559390902519226, |
|
"logps/chosen": -316.75592041015625, |
|
"logps/rejected": -306.21160888671875, |
|
"loss": -0.5032, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.4955337047576904, |
|
"rewards/margins": 1.243558645248413, |
|
"rewards/rejected": -3.7390923500061035, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186162079510703e-07, |
|
"logits/chosen": -1.6653553247451782, |
|
"logits/rejected": -1.6318597793579102, |
|
"logps/chosen": -300.54473876953125, |
|
"logps/rejected": -305.09423828125, |
|
"loss": -0.7512, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3271665573120117, |
|
"rewards/margins": 1.914544701576233, |
|
"rewards/rejected": -4.241711616516113, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.167048929663608e-07, |
|
"logits/chosen": -1.6567821502685547, |
|
"logits/rejected": -1.5714284181594849, |
|
"logps/chosen": -250.9105682373047, |
|
"logps/rejected": -234.5415802001953, |
|
"loss": -0.7222, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.356020927429199, |
|
"rewards/margins": 1.9912364482879639, |
|
"rewards/rejected": -4.347257614135742, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1479357798165134e-07, |
|
"logits/chosen": -1.7020305395126343, |
|
"logits/rejected": -1.78921377658844, |
|
"logps/chosen": -308.86163330078125, |
|
"logps/rejected": -269.3481140136719, |
|
"loss": -0.6017, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.404141902923584, |
|
"rewards/margins": 1.7603607177734375, |
|
"rewards/rejected": -4.1645026206970215, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.128822629969419e-07, |
|
"logits/chosen": -1.7761846780776978, |
|
"logits/rejected": -1.8085511922836304, |
|
"logps/chosen": -312.18280029296875, |
|
"logps/rejected": -287.8406677246094, |
|
"loss": -0.6923, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.880483388900757, |
|
"rewards/margins": 2.123534917831421, |
|
"rewards/rejected": -5.004018306732178, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.109709480122324e-07, |
|
"logits/chosen": -1.5170055627822876, |
|
"logits/rejected": -1.7044398784637451, |
|
"logps/chosen": -265.9775085449219, |
|
"logps/rejected": -256.2364501953125, |
|
"loss": -0.5623, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.711970806121826, |
|
"rewards/margins": 1.4848562479019165, |
|
"rewards/rejected": -4.196827411651611, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0905963302752294e-07, |
|
"logits/chosen": -1.6849790811538696, |
|
"logits/rejected": -1.4959250688552856, |
|
"logps/chosen": -266.03338623046875, |
|
"logps/rejected": -265.19293212890625, |
|
"loss": -0.5952, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.8066792488098145, |
|
"rewards/margins": 1.4092586040496826, |
|
"rewards/rejected": -4.215937614440918, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.071483180428134e-07, |
|
"logits/chosen": -1.6794811487197876, |
|
"logits/rejected": -1.7896066904067993, |
|
"logps/chosen": -349.8259582519531, |
|
"logps/rejected": -259.8904113769531, |
|
"loss": -0.8813, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.7685770988464355, |
|
"rewards/margins": 2.0453274250030518, |
|
"rewards/rejected": -4.813904285430908, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0523700305810395e-07, |
|
"logits/chosen": -1.826123833656311, |
|
"logits/rejected": -1.6994121074676514, |
|
"logps/chosen": -292.24188232421875, |
|
"logps/rejected": -294.88665771484375, |
|
"loss": -0.82, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.468705892562866, |
|
"rewards/margins": 2.3952927589416504, |
|
"rewards/rejected": -4.863998889923096, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.033256880733945e-07, |
|
"logits/chosen": -1.6138427257537842, |
|
"logits/rejected": -1.5653212070465088, |
|
"logps/chosen": -272.35723876953125, |
|
"logps/rejected": -244.7309112548828, |
|
"loss": -1.0125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.5213189125061035, |
|
"rewards/margins": 1.1809017658233643, |
|
"rewards/rejected": -4.702220439910889, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01414373088685e-07, |
|
"logits/chosen": -1.678091287612915, |
|
"logits/rejected": -1.6383358240127563, |
|
"logps/chosen": -305.22039794921875, |
|
"logps/rejected": -270.16644287109375, |
|
"loss": -0.7008, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.750168800354004, |
|
"rewards/margins": 1.9968726634979248, |
|
"rewards/rejected": -4.747041702270508, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9950305810397555e-07, |
|
"logits/chosen": -1.8723211288452148, |
|
"logits/rejected": -1.6131559610366821, |
|
"logps/chosen": -302.5225524902344, |
|
"logps/rejected": -265.1141662597656, |
|
"loss": -0.8252, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.2168407440185547, |
|
"rewards/margins": 1.872542381286621, |
|
"rewards/rejected": -5.089383125305176, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9759174311926603e-07, |
|
"logits/chosen": -1.6552165746688843, |
|
"logits/rejected": -1.5474542379379272, |
|
"logps/chosen": -300.1455383300781, |
|
"logps/rejected": -269.47235107421875, |
|
"loss": -0.8361, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.7245125770568848, |
|
"rewards/margins": 1.281724214553833, |
|
"rewards/rejected": -5.006236553192139, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9568042813455656e-07, |
|
"logits/chosen": -1.6618553400039673, |
|
"logits/rejected": -1.667223334312439, |
|
"logps/chosen": -310.4003601074219, |
|
"logps/rejected": -243.12240600585938, |
|
"loss": -0.731, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.615830659866333, |
|
"rewards/margins": 1.214110016822815, |
|
"rewards/rejected": -4.829940319061279, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937691131498471e-07, |
|
"logits/chosen": -1.7880207300186157, |
|
"logits/rejected": -1.6480613946914673, |
|
"logps/chosen": -316.4659729003906, |
|
"logps/rejected": -285.7911682128906, |
|
"loss": -0.7485, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.4602973461151123, |
|
"rewards/margins": 1.8094520568847656, |
|
"rewards/rejected": -5.269749641418457, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.918577981651376e-07, |
|
"logits/chosen": -1.6994645595550537, |
|
"logits/rejected": -1.5988848209381104, |
|
"logps/chosen": -261.1907958984375, |
|
"logps/rejected": -270.0497741699219, |
|
"loss": -0.6992, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.9491472244262695, |
|
"rewards/margins": 1.7523548603057861, |
|
"rewards/rejected": -4.701501846313477, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.8994648318042816e-07, |
|
"logits/chosen": -1.7593275308609009, |
|
"logits/rejected": -1.6046631336212158, |
|
"logps/chosen": -299.09637451171875, |
|
"logps/rejected": -268.380859375, |
|
"loss": -0.9355, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.154327630996704, |
|
"rewards/margins": 2.1164073944091797, |
|
"rewards/rejected": -5.2707343101501465, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8803516819571863e-07, |
|
"logits/chosen": -1.7734966278076172, |
|
"logits/rejected": -1.5951334238052368, |
|
"logps/chosen": -347.52239990234375, |
|
"logps/rejected": -293.4967346191406, |
|
"loss": -0.7492, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.558985948562622, |
|
"rewards/margins": 1.6943881511688232, |
|
"rewards/rejected": -5.253373622894287, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8612385321100917e-07, |
|
"logits/chosen": -1.573909044265747, |
|
"logits/rejected": -1.3949878215789795, |
|
"logps/chosen": -270.31292724609375, |
|
"logps/rejected": -276.7618408203125, |
|
"loss": -1.0184, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.927518129348755, |
|
"rewards/margins": 1.0909746885299683, |
|
"rewards/rejected": -5.018492221832275, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842125382262997e-07, |
|
"logits/chosen": -1.5700122117996216, |
|
"logits/rejected": -1.4677519798278809, |
|
"logps/chosen": -285.81103515625, |
|
"logps/rejected": -269.4806823730469, |
|
"loss": -1.0111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -3.3703293800354004, |
|
"rewards/margins": 1.9672693014144897, |
|
"rewards/rejected": -5.3375983238220215, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8230122324159023e-07, |
|
"logits/chosen": -1.5128788948059082, |
|
"logits/rejected": -1.475921869277954, |
|
"logps/chosen": -321.65020751953125, |
|
"logps/rejected": -334.950439453125, |
|
"loss": -1.0548, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.5967164039611816, |
|
"rewards/margins": 3.013796806335449, |
|
"rewards/rejected": -6.610513210296631, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8038990825688076e-07, |
|
"logits/chosen": -1.5578891038894653, |
|
"logits/rejected": -1.5609424114227295, |
|
"logps/chosen": -261.49920654296875, |
|
"logps/rejected": -235.4161376953125, |
|
"loss": -0.9126, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.372951984405518, |
|
"rewards/margins": 0.7127580642700195, |
|
"rewards/rejected": -5.085709571838379, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.784785932721712e-07, |
|
"logits/chosen": -1.636566400527954, |
|
"logits/rejected": -1.4580261707305908, |
|
"logps/chosen": -304.53240966796875, |
|
"logps/rejected": -282.1325378417969, |
|
"loss": -0.9912, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.88989520072937, |
|
"rewards/margins": 2.1614978313446045, |
|
"rewards/rejected": -6.051393032073975, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.765672782874617e-07, |
|
"logits/chosen": -1.5655293464660645, |
|
"logits/rejected": -1.485058069229126, |
|
"logps/chosen": -318.0874938964844, |
|
"logps/rejected": -259.27130126953125, |
|
"loss": -0.908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -4.269219398498535, |
|
"rewards/margins": 1.588846206665039, |
|
"rewards/rejected": -5.858065605163574, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7465596330275225e-07, |
|
"logits/chosen": -1.5867637395858765, |
|
"logits/rejected": -1.6024789810180664, |
|
"logps/chosen": -312.49591064453125, |
|
"logps/rejected": -236.5067596435547, |
|
"loss": -0.9474, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -3.954392910003662, |
|
"rewards/margins": 2.293760061264038, |
|
"rewards/rejected": -6.248152732849121, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.727446483180428e-07, |
|
"logits/chosen": -1.7006601095199585, |
|
"logits/rejected": -1.5267009735107422, |
|
"logps/chosen": -298.12469482421875, |
|
"logps/rejected": -292.24395751953125, |
|
"loss": -1.2885, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -4.79081392288208, |
|
"rewards/margins": 1.9046157598495483, |
|
"rewards/rejected": -6.695429801940918, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -1.4423596858978271, |
|
"logits/rejected": -1.6754871606826782, |
|
"logps/chosen": -288.9786376953125, |
|
"logps/rejected": -286.6379699707031, |
|
"loss": -1.1448, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -4.346983909606934, |
|
"rewards/margins": 1.7372324466705322, |
|
"rewards/rejected": -6.084217071533203, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6892201834862385e-07, |
|
"logits/chosen": -1.5492124557495117, |
|
"logits/rejected": -1.4746440649032593, |
|
"logps/chosen": -301.6089782714844, |
|
"logps/rejected": -268.57745361328125, |
|
"loss": -1.1715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -4.268374443054199, |
|
"rewards/margins": 2.3135557174682617, |
|
"rewards/rejected": -6.581930637359619, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6701070336391433e-07, |
|
"logits/chosen": -1.6524032354354858, |
|
"logits/rejected": -1.7148220539093018, |
|
"logps/chosen": -358.600830078125, |
|
"logps/rejected": -300.6563720703125, |
|
"loss": -1.2609, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.343426704406738, |
|
"rewards/margins": 2.409090280532837, |
|
"rewards/rejected": -6.752516746520996, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6509938837920486e-07, |
|
"logits/chosen": -1.6305720806121826, |
|
"logits/rejected": -1.4456074237823486, |
|
"logps/chosen": -276.3116455078125, |
|
"logps/rejected": -281.0287780761719, |
|
"loss": -1.2401, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.514157295227051, |
|
"rewards/margins": 2.1482510566711426, |
|
"rewards/rejected": -6.662408351898193, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.631880733944954e-07, |
|
"logits/chosen": -1.529931664466858, |
|
"logits/rejected": -1.4439135789871216, |
|
"logps/chosen": -299.05157470703125, |
|
"logps/rejected": -275.3011474609375, |
|
"loss": -1.5077, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -4.548555850982666, |
|
"rewards/margins": 1.7041714191436768, |
|
"rewards/rejected": -6.252727508544922, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.612767584097859e-07, |
|
"logits/chosen": -1.629601240158081, |
|
"logits/rejected": -1.5165350437164307, |
|
"logps/chosen": -306.07647705078125, |
|
"logps/rejected": -262.46044921875, |
|
"loss": -1.1474, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.669868469238281, |
|
"rewards/margins": 1.6571537256240845, |
|
"rewards/rejected": -6.327021598815918, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936544342507646e-07, |
|
"logits/chosen": -1.682499647140503, |
|
"logits/rejected": -1.3376775979995728, |
|
"logps/chosen": -307.9109802246094, |
|
"logps/rejected": -296.7003479003906, |
|
"loss": -1.174, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.40157413482666, |
|
"rewards/margins": 2.6601130962371826, |
|
"rewards/rejected": -7.061687469482422, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5745412844036693e-07, |
|
"logits/chosen": -1.6591355800628662, |
|
"logits/rejected": -1.5533663034439087, |
|
"logps/chosen": -335.03057861328125, |
|
"logps/rejected": -287.7520751953125, |
|
"loss": -1.3877, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -5.175102710723877, |
|
"rewards/margins": 2.255643844604492, |
|
"rewards/rejected": -7.430747032165527, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5554281345565747e-07, |
|
"logits/chosen": -1.6894855499267578, |
|
"logits/rejected": -1.586287260055542, |
|
"logps/chosen": -304.94989013671875, |
|
"logps/rejected": -305.7507019042969, |
|
"loss": -1.3946, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.844755172729492, |
|
"rewards/margins": 2.5443403720855713, |
|
"rewards/rejected": -7.389095306396484, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.53631498470948e-07, |
|
"logits/chosen": -1.593643069267273, |
|
"logits/rejected": -1.5452083349227905, |
|
"logps/chosen": -339.3976135253906, |
|
"logps/rejected": -294.7111511230469, |
|
"loss": -1.556, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -5.678981781005859, |
|
"rewards/margins": 1.9970118999481201, |
|
"rewards/rejected": -7.6759934425354, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5172018348623853e-07, |
|
"logits/chosen": -1.5461888313293457, |
|
"logits/rejected": -1.408332109451294, |
|
"logps/chosen": -336.61553955078125, |
|
"logps/rejected": -301.59759521484375, |
|
"loss": -1.0556, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -5.15664005279541, |
|
"rewards/margins": 2.157421827316284, |
|
"rewards/rejected": -7.314061164855957, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4980886850152906e-07, |
|
"logits/chosen": -1.5243330001831055, |
|
"logits/rejected": -1.5529712438583374, |
|
"logps/chosen": -333.6750793457031, |
|
"logps/rejected": -288.2501525878906, |
|
"loss": -1.1851, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.502475738525391, |
|
"rewards/margins": 1.9487812519073486, |
|
"rewards/rejected": -7.451257228851318, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478975535168196e-07, |
|
"logits/chosen": -1.5254369974136353, |
|
"logits/rejected": -1.4977315664291382, |
|
"logps/chosen": -316.21868896484375, |
|
"logps/rejected": -277.3542175292969, |
|
"loss": -1.6845, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.898019790649414, |
|
"rewards/margins": 1.867455244064331, |
|
"rewards/rejected": -7.765474796295166, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.459862385321101e-07, |
|
"logits/chosen": -1.747807502746582, |
|
"logits/rejected": -1.5742764472961426, |
|
"logps/chosen": -369.40728759765625, |
|
"logps/rejected": -318.35821533203125, |
|
"loss": -1.0463, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -6.141237735748291, |
|
"rewards/margins": 1.5390453338623047, |
|
"rewards/rejected": -7.680283546447754, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.440749235474006e-07, |
|
"logits/chosen": -1.5898773670196533, |
|
"logits/rejected": -1.5242546796798706, |
|
"logps/chosen": -336.76824951171875, |
|
"logps/rejected": -315.19384765625, |
|
"loss": -1.7444, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.8129730224609375, |
|
"rewards/margins": 3.9923481941223145, |
|
"rewards/rejected": -8.80532169342041, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421636085626911e-07, |
|
"logits/chosen": -1.4560226202011108, |
|
"logits/rejected": -1.421790361404419, |
|
"logps/chosen": -328.801025390625, |
|
"logps/rejected": -280.44549560546875, |
|
"loss": -1.3433, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -6.336007118225098, |
|
"rewards/margins": 1.1562750339508057, |
|
"rewards/rejected": -7.492282867431641, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.402522935779816e-07, |
|
"logits/chosen": -1.7103254795074463, |
|
"logits/rejected": -1.5623626708984375, |
|
"logps/chosen": -353.27081298828125, |
|
"logps/rejected": -334.1893005371094, |
|
"loss": -1.3197, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -6.708989143371582, |
|
"rewards/margins": 1.9991058111190796, |
|
"rewards/rejected": -8.708094596862793, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3834097859327215e-07, |
|
"logits/chosen": -1.508737325668335, |
|
"logits/rejected": -1.3252770900726318, |
|
"logps/chosen": -288.32635498046875, |
|
"logps/rejected": -296.4991455078125, |
|
"loss": -1.3704, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -5.095849990844727, |
|
"rewards/margins": 3.1136269569396973, |
|
"rewards/rejected": -8.209476470947266, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3642966360856268e-07, |
|
"logits/chosen": -1.6645679473876953, |
|
"logits/rejected": -1.5162229537963867, |
|
"logps/chosen": -322.9044189453125, |
|
"logps/rejected": -300.4761047363281, |
|
"loss": -1.9619, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.100916862487793, |
|
"rewards/margins": 3.4188740253448486, |
|
"rewards/rejected": -8.519791603088379, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345183486238532e-07, |
|
"logits/chosen": -1.5683923959732056, |
|
"logits/rejected": -1.4021813869476318, |
|
"logps/chosen": -327.93804931640625, |
|
"logps/rejected": -343.5317077636719, |
|
"loss": -1.6055, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -7.430581569671631, |
|
"rewards/margins": 1.1966545581817627, |
|
"rewards/rejected": -8.62723445892334, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3260703363914372e-07, |
|
"logits/chosen": -1.631715178489685, |
|
"logits/rejected": -1.5181890726089478, |
|
"logps/chosen": -370.483642578125, |
|
"logps/rejected": -336.6531677246094, |
|
"loss": -1.8198, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -6.040884494781494, |
|
"rewards/margins": 3.5697269439697266, |
|
"rewards/rejected": -9.610611915588379, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3069571865443425e-07, |
|
"logits/chosen": -1.4669805765151978, |
|
"logits/rejected": -1.3994741439819336, |
|
"logps/chosen": -295.02496337890625, |
|
"logps/rejected": -321.65814208984375, |
|
"loss": -2.0272, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -6.588179588317871, |
|
"rewards/margins": 2.978868007659912, |
|
"rewards/rejected": -9.567047119140625, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2878440366972476e-07, |
|
"logits/chosen": -1.6681219339370728, |
|
"logits/rejected": -1.491996169090271, |
|
"logps/chosen": -338.78729248046875, |
|
"logps/rejected": -374.81634521484375, |
|
"loss": -1.8513, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -6.465402126312256, |
|
"rewards/margins": 2.903998851776123, |
|
"rewards/rejected": -9.369401931762695, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.268730886850153e-07, |
|
"logits/chosen": -1.4729211330413818, |
|
"logits/rejected": -1.1880760192871094, |
|
"logps/chosen": -334.2304992675781, |
|
"logps/rejected": -305.21368408203125, |
|
"loss": -2.0159, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -7.080233573913574, |
|
"rewards/margins": 2.117523431777954, |
|
"rewards/rejected": -9.197757720947266, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.249617737003058e-07, |
|
"logits/chosen": -1.563912034034729, |
|
"logits/rejected": -1.5858089923858643, |
|
"logps/chosen": -378.62396240234375, |
|
"logps/rejected": -377.3148193359375, |
|
"loss": -1.9999, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -8.893011093139648, |
|
"rewards/margins": 2.892535924911499, |
|
"rewards/rejected": -11.785547256469727, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2305045871559633e-07, |
|
"logits/chosen": -1.392828345298767, |
|
"logits/rejected": -1.3615916967391968, |
|
"logps/chosen": -361.71051025390625, |
|
"logps/rejected": -335.33673095703125, |
|
"loss": -2.324, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.41543197631836, |
|
"rewards/margins": 2.103468418121338, |
|
"rewards/rejected": -10.518899917602539, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113914373088686e-07, |
|
"logits/chosen": -1.4968210458755493, |
|
"logits/rejected": -1.4618072509765625, |
|
"logps/chosen": -382.20367431640625, |
|
"logps/rejected": -378.17901611328125, |
|
"loss": -2.2244, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -7.971160888671875, |
|
"rewards/margins": 4.1811017990112305, |
|
"rewards/rejected": -12.152261734008789, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1922782874617736e-07, |
|
"logits/chosen": -1.37467360496521, |
|
"logits/rejected": -1.3709341287612915, |
|
"logps/chosen": -313.5454406738281, |
|
"logps/rejected": -367.4576721191406, |
|
"loss": -1.9016, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -8.650922775268555, |
|
"rewards/margins": 2.388150691986084, |
|
"rewards/rejected": -11.039073944091797, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1731651376146787e-07, |
|
"logits/chosen": -1.4017971754074097, |
|
"logits/rejected": -1.2825329303741455, |
|
"logps/chosen": -311.17572021484375, |
|
"logps/rejected": -345.6313171386719, |
|
"loss": -1.8086, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -8.644105911254883, |
|
"rewards/margins": 3.1263985633850098, |
|
"rewards/rejected": -11.77050495147705, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.154051987767584e-07, |
|
"logits/chosen": -1.5087705850601196, |
|
"logits/rejected": -1.3651801347732544, |
|
"logps/chosen": -351.78582763671875, |
|
"logps/rejected": -337.21295166015625, |
|
"loss": -1.5194, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -8.838014602661133, |
|
"rewards/margins": 3.8112175464630127, |
|
"rewards/rejected": -12.64923095703125, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.134938837920489e-07, |
|
"logits/chosen": -1.2244422435760498, |
|
"logits/rejected": -1.352123498916626, |
|
"logps/chosen": -281.91949462890625, |
|
"logps/rejected": -307.07867431640625, |
|
"loss": -1.8547, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -8.937861442565918, |
|
"rewards/margins": 3.2620761394500732, |
|
"rewards/rejected": -12.19993782043457, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1158256880733944e-07, |
|
"logits/chosen": -1.3861466646194458, |
|
"logits/rejected": -1.3692476749420166, |
|
"logps/chosen": -379.1508483886719, |
|
"logps/rejected": -348.1292724609375, |
|
"loss": -2.9603, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -8.26054859161377, |
|
"rewards/margins": 5.145617485046387, |
|
"rewards/rejected": -13.406166076660156, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0967125382262994e-07, |
|
"logits/chosen": -1.2910804748535156, |
|
"logits/rejected": -1.3471043109893799, |
|
"logps/chosen": -359.8387451171875, |
|
"logps/rejected": -327.9253845214844, |
|
"loss": -1.6607, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -9.164748191833496, |
|
"rewards/margins": 1.6195499897003174, |
|
"rewards/rejected": -10.784297943115234, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775993883792048e-07, |
|
"logits/chosen": -1.4032940864562988, |
|
"logits/rejected": -1.253796100616455, |
|
"logps/chosen": -357.06549072265625, |
|
"logps/rejected": -318.9412536621094, |
|
"loss": -2.9627, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -7.581735134124756, |
|
"rewards/margins": 3.610663652420044, |
|
"rewards/rejected": -11.192397117614746, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.05848623853211e-07, |
|
"logits/chosen": -1.4505128860473633, |
|
"logits/rejected": -1.4566118717193604, |
|
"logps/chosen": -400.2586975097656, |
|
"logps/rejected": -357.13458251953125, |
|
"loss": -2.4791, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -10.725576400756836, |
|
"rewards/margins": 2.9340341091156006, |
|
"rewards/rejected": -13.6596097946167, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0393730886850151e-07, |
|
"logits/chosen": -1.4870072603225708, |
|
"logits/rejected": -1.2949211597442627, |
|
"logps/chosen": -384.02947998046875, |
|
"logps/rejected": -346.6249084472656, |
|
"loss": -1.9201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -9.375011444091797, |
|
"rewards/margins": 3.3913631439208984, |
|
"rewards/rejected": -12.766374588012695, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0202599388379205e-07, |
|
"logits/chosen": -1.5045329332351685, |
|
"logits/rejected": -1.3934712409973145, |
|
"logps/chosen": -375.15155029296875, |
|
"logps/rejected": -420.40283203125, |
|
"loss": -2.6807, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.547384262084961, |
|
"rewards/margins": 4.163573265075684, |
|
"rewards/rejected": -14.710957527160645, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011467889908258e-07, |
|
"logits/chosen": -1.5281354188919067, |
|
"logits/rejected": -1.2871553897857666, |
|
"logps/chosen": -395.02471923828125, |
|
"logps/rejected": -297.695068359375, |
|
"loss": -2.9446, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -10.007207870483398, |
|
"rewards/margins": 2.725371837615967, |
|
"rewards/rejected": -12.732580184936523, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9820336391437308e-07, |
|
"logits/chosen": -1.310064673423767, |
|
"logits/rejected": -1.3744899034500122, |
|
"logps/chosen": -344.2419738769531, |
|
"logps/rejected": -346.10430908203125, |
|
"loss": -2.1029, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -9.30712604522705, |
|
"rewards/margins": 4.580069541931152, |
|
"rewards/rejected": -13.88719654083252, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9629204892966362e-07, |
|
"logits/chosen": -1.4468410015106201, |
|
"logits/rejected": -1.3818161487579346, |
|
"logps/chosen": -382.7313232421875, |
|
"logps/rejected": -351.123291015625, |
|
"loss": -3.6235, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -10.61854362487793, |
|
"rewards/margins": 3.927306652069092, |
|
"rewards/rejected": -14.545849800109863, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.943807339449541e-07, |
|
"logits/chosen": -1.280505895614624, |
|
"logits/rejected": -1.126263976097107, |
|
"logps/chosen": -361.83258056640625, |
|
"logps/rejected": -395.9088439941406, |
|
"loss": -2.9208, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -10.34544849395752, |
|
"rewards/margins": 5.857726097106934, |
|
"rewards/rejected": -16.20317268371582, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9246941896024463e-07, |
|
"logits/chosen": -1.300330400466919, |
|
"logits/rejected": -1.2540452480316162, |
|
"logps/chosen": -374.8466796875, |
|
"logps/rejected": -399.75933837890625, |
|
"loss": -2.3092, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -12.081557273864746, |
|
"rewards/margins": 3.7616398334503174, |
|
"rewards/rejected": -15.8431978225708, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9055810397553516e-07, |
|
"logits/chosen": -1.3129085302352905, |
|
"logits/rejected": -1.266152262687683, |
|
"logps/chosen": -376.607666015625, |
|
"logps/rejected": -378.1551513671875, |
|
"loss": -2.5453, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.030994415283203, |
|
"rewards/margins": 3.7824196815490723, |
|
"rewards/rejected": -15.813413619995117, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8864678899082566e-07, |
|
"logits/chosen": -1.3077343702316284, |
|
"logits/rejected": -1.3403010368347168, |
|
"logps/chosen": -395.0058288574219, |
|
"logps/rejected": -348.19683837890625, |
|
"loss": -2.6208, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -12.53403091430664, |
|
"rewards/margins": 2.729905605316162, |
|
"rewards/rejected": -15.263936042785645, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.867354740061162e-07, |
|
"logits/chosen": -1.2000598907470703, |
|
"logits/rejected": -1.1860705614089966, |
|
"logps/chosen": -377.95538330078125, |
|
"logps/rejected": -394.8722229003906, |
|
"loss": -2.3739, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -12.864456176757812, |
|
"rewards/margins": 4.33025598526001, |
|
"rewards/rejected": -17.194711685180664, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.5908974409103394, |
|
"eval_logits/rejected": -1.4396800994873047, |
|
"eval_logps/chosen": -418.98797607421875, |
|
"eval_logps/rejected": -413.8171691894531, |
|
"eval_loss": -3.0139997005462646, |
|
"eval_rewards/accuracies": 0.658730149269104, |
|
"eval_rewards/chosen": -12.91853141784668, |
|
"eval_rewards/margins": 4.969918251037598, |
|
"eval_rewards/rejected": -17.888450622558594, |
|
"eval_runtime": 237.8807, |
|
"eval_samples_per_second": 8.408, |
|
"eval_steps_per_second": 0.265, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8482415902140673e-07, |
|
"logits/chosen": -1.3271663188934326, |
|
"logits/rejected": -1.3011524677276611, |
|
"logps/chosen": -392.82757568359375, |
|
"logps/rejected": -392.5997314453125, |
|
"loss": -2.8487, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.525858879089355, |
|
"rewards/margins": 4.043676376342773, |
|
"rewards/rejected": -17.569536209106445, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8291284403669723e-07, |
|
"logits/chosen": -1.2705609798431396, |
|
"logits/rejected": -1.3038126230239868, |
|
"logps/chosen": -373.14031982421875, |
|
"logps/rejected": -386.13677978515625, |
|
"loss": -3.4669, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.79985237121582, |
|
"rewards/margins": 3.9696388244628906, |
|
"rewards/rejected": -16.76949119567871, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8100152905198777e-07, |
|
"logits/chosen": -1.061727523803711, |
|
"logits/rejected": -1.110812783241272, |
|
"logps/chosen": -381.92529296875, |
|
"logps/rejected": -418.325927734375, |
|
"loss": -2.5351, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -12.154891967773438, |
|
"rewards/margins": 2.9344310760498047, |
|
"rewards/rejected": -15.089323043823242, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7909021406727827e-07, |
|
"logits/chosen": -1.107367992401123, |
|
"logits/rejected": -1.453131914138794, |
|
"logps/chosen": -374.98504638671875, |
|
"logps/rejected": -365.43121337890625, |
|
"loss": -3.2083, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.676986694335938, |
|
"rewards/margins": 4.082896709442139, |
|
"rewards/rejected": -15.75988483428955, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771788990825688e-07, |
|
"logits/chosen": -1.3608293533325195, |
|
"logits/rejected": -1.06710684299469, |
|
"logps/chosen": -423.4046325683594, |
|
"logps/rejected": -425.17681884765625, |
|
"loss": -2.8719, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -14.271319389343262, |
|
"rewards/margins": 4.651618957519531, |
|
"rewards/rejected": -18.92293930053711, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7526758409785934e-07, |
|
"logits/chosen": -1.4061094522476196, |
|
"logits/rejected": -1.2359219789505005, |
|
"logps/chosen": -473.62841796875, |
|
"logps/rejected": -462.7413024902344, |
|
"loss": -3.7745, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.31951332092285, |
|
"rewards/margins": 4.680100440979004, |
|
"rewards/rejected": -21.99961280822754, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7335626911314984e-07, |
|
"logits/chosen": -1.2134959697723389, |
|
"logits/rejected": -0.9500266909599304, |
|
"logps/chosen": -372.77880859375, |
|
"logps/rejected": -378.95703125, |
|
"loss": -3.7006, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -15.280550003051758, |
|
"rewards/margins": 3.6492176055908203, |
|
"rewards/rejected": -18.929767608642578, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7144495412844037e-07, |
|
"logits/chosen": -1.3106526136398315, |
|
"logits/rejected": -1.1043154001235962, |
|
"logps/chosen": -463.447021484375, |
|
"logps/rejected": -458.7850036621094, |
|
"loss": -2.2585, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -16.96882438659668, |
|
"rewards/margins": 5.767825603485107, |
|
"rewards/rejected": -22.736652374267578, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6953363914373088e-07, |
|
"logits/chosen": -1.373578667640686, |
|
"logits/rejected": -1.0678809881210327, |
|
"logps/chosen": -457.8382873535156, |
|
"logps/rejected": -414.07177734375, |
|
"loss": -2.567, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -16.41115951538086, |
|
"rewards/margins": 3.8120930194854736, |
|
"rewards/rejected": -20.223255157470703, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6762232415902138e-07, |
|
"logits/chosen": -1.1989551782608032, |
|
"logits/rejected": -1.2645841836929321, |
|
"logps/chosen": -400.55767822265625, |
|
"logps/rejected": -395.439208984375, |
|
"loss": -2.5975, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.613690376281738, |
|
"rewards/margins": 3.9228732585906982, |
|
"rewards/rejected": -18.536563873291016, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6571100917431192e-07, |
|
"logits/chosen": -1.3490411043167114, |
|
"logits/rejected": -1.1970597505569458, |
|
"logps/chosen": -399.5980224609375, |
|
"logps/rejected": -373.1912841796875, |
|
"loss": -2.902, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -13.886739730834961, |
|
"rewards/margins": 3.5066256523132324, |
|
"rewards/rejected": -17.39336585998535, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6379969418960242e-07, |
|
"logits/chosen": -1.2504427433013916, |
|
"logits/rejected": -1.1676826477050781, |
|
"logps/chosen": -489.55914306640625, |
|
"logps/rejected": -456.7439880371094, |
|
"loss": -3.6402, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -17.439693450927734, |
|
"rewards/margins": 2.561140537261963, |
|
"rewards/rejected": -20.000835418701172, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6188837920489295e-07, |
|
"logits/chosen": -1.4094626903533936, |
|
"logits/rejected": -0.9949380159378052, |
|
"logps/chosen": -461.00286865234375, |
|
"logps/rejected": -403.776123046875, |
|
"loss": -3.295, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -16.868534088134766, |
|
"rewards/margins": 2.4951887130737305, |
|
"rewards/rejected": -19.36372184753418, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5997706422018349e-07, |
|
"logits/chosen": -1.5532751083374023, |
|
"logits/rejected": -1.1935482025146484, |
|
"logps/chosen": -410.2151794433594, |
|
"logps/rejected": -497.067626953125, |
|
"loss": -3.6557, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.347163200378418, |
|
"rewards/margins": 6.878281593322754, |
|
"rewards/rejected": -21.225446701049805, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.58065749235474e-07, |
|
"logits/chosen": -1.2084702253341675, |
|
"logits/rejected": -1.1066341400146484, |
|
"logps/chosen": -507.550537109375, |
|
"logps/rejected": -450.25341796875, |
|
"loss": -3.175, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -17.20359992980957, |
|
"rewards/margins": 2.5353291034698486, |
|
"rewards/rejected": -19.738927841186523, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5615443425076452e-07, |
|
"logits/chosen": -1.0724289417266846, |
|
"logits/rejected": -1.0922993421554565, |
|
"logps/chosen": -402.9586486816406, |
|
"logps/rejected": -448.19830322265625, |
|
"loss": -3.6034, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.504638671875, |
|
"rewards/margins": 8.439682006835938, |
|
"rewards/rejected": -22.944318771362305, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424311926605506e-07, |
|
"logits/chosen": -0.9919137954711914, |
|
"logits/rejected": -0.9210994839668274, |
|
"logps/chosen": -369.6788024902344, |
|
"logps/rejected": -405.4874572753906, |
|
"loss": -4.0197, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -14.22148323059082, |
|
"rewards/margins": 8.295201301574707, |
|
"rewards/rejected": -22.51668357849121, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5233180428134556e-07, |
|
"logits/chosen": -1.2452681064605713, |
|
"logits/rejected": -1.1532270908355713, |
|
"logps/chosen": -423.7035217285156, |
|
"logps/rejected": -444.843017578125, |
|
"loss": -4.0177, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -15.260812759399414, |
|
"rewards/margins": 7.597034454345703, |
|
"rewards/rejected": -22.857845306396484, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.504204892966361e-07, |
|
"logits/chosen": -1.2690461874008179, |
|
"logits/rejected": -1.036007046699524, |
|
"logps/chosen": -481.8981018066406, |
|
"logps/rejected": -458.9730529785156, |
|
"loss": -4.7473, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -18.875347137451172, |
|
"rewards/margins": 3.536944627761841, |
|
"rewards/rejected": -22.412288665771484, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.485091743119266e-07, |
|
"logits/chosen": -1.2708927392959595, |
|
"logits/rejected": -0.9568039178848267, |
|
"logps/chosen": -407.5409851074219, |
|
"logps/rejected": -469.5133361816406, |
|
"loss": -4.6317, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.849174499511719, |
|
"rewards/margins": 7.314669609069824, |
|
"rewards/rejected": -23.16384506225586, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.465978593272171e-07, |
|
"logits/chosen": -0.9912996292114258, |
|
"logits/rejected": -0.8085284233093262, |
|
"logps/chosen": -353.92413330078125, |
|
"logps/rejected": -355.982666015625, |
|
"loss": -4.0689, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.852828979492188, |
|
"rewards/margins": 5.281838417053223, |
|
"rewards/rejected": -21.134668350219727, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4468654434250764e-07, |
|
"logits/chosen": -1.2438325881958008, |
|
"logits/rejected": -1.1221582889556885, |
|
"logps/chosen": -519.1368408203125, |
|
"logps/rejected": -489.5020446777344, |
|
"loss": -2.5507, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -19.783428192138672, |
|
"rewards/margins": 1.9117088317871094, |
|
"rewards/rejected": -21.69513702392578, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4277522935779814e-07, |
|
"logits/chosen": -1.0609955787658691, |
|
"logits/rejected": -1.2507171630859375, |
|
"logps/chosen": -440.3553161621094, |
|
"logps/rejected": -453.50323486328125, |
|
"loss": -3.4898, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -16.776865005493164, |
|
"rewards/margins": 4.883880615234375, |
|
"rewards/rejected": -21.660743713378906, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4086391437308867e-07, |
|
"logits/chosen": -1.274261474609375, |
|
"logits/rejected": -1.1783298254013062, |
|
"logps/chosen": -472.8345642089844, |
|
"logps/rejected": -521.444580078125, |
|
"loss": -4.8646, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -17.88382339477539, |
|
"rewards/margins": 8.282121658325195, |
|
"rewards/rejected": -26.165943145751953, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.389525993883792e-07, |
|
"logits/chosen": -1.1477124691009521, |
|
"logits/rejected": -0.8995282053947449, |
|
"logps/chosen": -398.76043701171875, |
|
"logps/rejected": -418.26507568359375, |
|
"loss": -3.7138, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -17.033363342285156, |
|
"rewards/margins": 5.318907737731934, |
|
"rewards/rejected": -22.352272033691406, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.370412844036697e-07, |
|
"logits/chosen": -1.067368507385254, |
|
"logits/rejected": -1.2181618213653564, |
|
"logps/chosen": -437.30010986328125, |
|
"logps/rejected": -469.89056396484375, |
|
"loss": -3.5077, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -19.17410659790039, |
|
"rewards/margins": 4.917706489562988, |
|
"rewards/rejected": -24.091812133789062, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3512996941896024e-07, |
|
"logits/chosen": -1.0004960298538208, |
|
"logits/rejected": -0.8722925186157227, |
|
"logps/chosen": -415.56719970703125, |
|
"logps/rejected": -430.51934814453125, |
|
"loss": -4.4207, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -19.313739776611328, |
|
"rewards/margins": 4.984239101409912, |
|
"rewards/rejected": -24.297977447509766, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3321865443425075e-07, |
|
"logits/chosen": -1.0728847980499268, |
|
"logits/rejected": -0.7316335439682007, |
|
"logps/chosen": -489.6312561035156, |
|
"logps/rejected": -451.337158203125, |
|
"loss": -4.1332, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -22.230623245239258, |
|
"rewards/margins": 0.4879101812839508, |
|
"rewards/rejected": -22.71853256225586, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3130733944954128e-07, |
|
"logits/chosen": -1.31141197681427, |
|
"logits/rejected": -1.0552115440368652, |
|
"logps/chosen": -473.9842834472656, |
|
"logps/rejected": -515.6341552734375, |
|
"loss": -3.8823, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -18.287981033325195, |
|
"rewards/margins": 8.8409423828125, |
|
"rewards/rejected": -27.128925323486328, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.293960244648318e-07, |
|
"logits/chosen": -1.1148552894592285, |
|
"logits/rejected": -1.010851502418518, |
|
"logps/chosen": -501.09588623046875, |
|
"logps/rejected": -478.259033203125, |
|
"loss": -4.2715, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -20.409494400024414, |
|
"rewards/margins": 5.208123683929443, |
|
"rewards/rejected": -25.617618560791016, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2748470948012232e-07, |
|
"logits/chosen": -1.1082009077072144, |
|
"logits/rejected": -0.9660416841506958, |
|
"logps/chosen": -469.67169189453125, |
|
"logps/rejected": -531.001708984375, |
|
"loss": -5.0927, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -20.520383834838867, |
|
"rewards/margins": 7.39895486831665, |
|
"rewards/rejected": -27.91933822631836, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557339449541285e-07, |
|
"logits/chosen": -1.1804434061050415, |
|
"logits/rejected": -1.013977289199829, |
|
"logps/chosen": -464.8408203125, |
|
"logps/rejected": -479.40301513671875, |
|
"loss": -5.3276, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -18.352712631225586, |
|
"rewards/margins": 6.4782609939575195, |
|
"rewards/rejected": -24.830974578857422, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2366207951070336e-07, |
|
"logits/chosen": -1.1036585569381714, |
|
"logits/rejected": -1.1067498922348022, |
|
"logps/chosen": -482.696533203125, |
|
"logps/rejected": -480.6744689941406, |
|
"loss": -3.2585, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -21.47313117980957, |
|
"rewards/margins": 3.0878939628601074, |
|
"rewards/rejected": -24.561023712158203, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.217507645259939e-07, |
|
"logits/chosen": -1.1785879135131836, |
|
"logits/rejected": -0.8433502912521362, |
|
"logps/chosen": -448.43017578125, |
|
"logps/rejected": -455.5037536621094, |
|
"loss": -3.4348, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -17.98015594482422, |
|
"rewards/margins": 5.601017475128174, |
|
"rewards/rejected": -23.581167221069336, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.198394495412844e-07, |
|
"logits/chosen": -0.9618834257125854, |
|
"logits/rejected": -0.9488929510116577, |
|
"logps/chosen": -486.79248046875, |
|
"logps/rejected": -497.04498291015625, |
|
"loss": -5.1327, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -20.99383544921875, |
|
"rewards/margins": 5.7790398597717285, |
|
"rewards/rejected": -26.772876739501953, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1792813455657493e-07, |
|
"logits/chosen": -1.175091028213501, |
|
"logits/rejected": -0.9602692723274231, |
|
"logps/chosen": -455.3914489746094, |
|
"logps/rejected": -423.8038024902344, |
|
"loss": -5.0517, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -19.302087783813477, |
|
"rewards/margins": 2.1102347373962402, |
|
"rewards/rejected": -21.412322998046875, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1601681957186543e-07, |
|
"logits/chosen": -1.0566465854644775, |
|
"logits/rejected": -0.9404910802841187, |
|
"logps/chosen": -462.96478271484375, |
|
"logps/rejected": -497.4542541503906, |
|
"loss": -5.7946, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -18.82391357421875, |
|
"rewards/margins": 9.018692016601562, |
|
"rewards/rejected": -27.842605590820312, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1410550458715595e-07, |
|
"logits/chosen": -0.9795541763305664, |
|
"logits/rejected": -1.0114076137542725, |
|
"logps/chosen": -471.488037109375, |
|
"logps/rejected": -525.1002807617188, |
|
"loss": -5.3356, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -21.668289184570312, |
|
"rewards/margins": 7.146371364593506, |
|
"rewards/rejected": -28.814661026000977, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1219418960244648e-07, |
|
"logits/chosen": -1.0043725967407227, |
|
"logits/rejected": -0.814845085144043, |
|
"logps/chosen": -471.0302734375, |
|
"logps/rejected": -553.7667846679688, |
|
"loss": -7.8293, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -25.013063430786133, |
|
"rewards/margins": 5.8318376541137695, |
|
"rewards/rejected": -30.84490394592285, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.10282874617737e-07, |
|
"logits/chosen": -1.04874587059021, |
|
"logits/rejected": -0.8525232076644897, |
|
"logps/chosen": -503.905517578125, |
|
"logps/rejected": -522.6575927734375, |
|
"loss": -5.5289, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -27.28557777404785, |
|
"rewards/margins": 4.0987653732299805, |
|
"rewards/rejected": -31.38434410095215, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0837155963302752e-07, |
|
"logits/chosen": -0.9908869862556458, |
|
"logits/rejected": -0.810786247253418, |
|
"logps/chosen": -424.4300842285156, |
|
"logps/rejected": -541.6786499023438, |
|
"loss": -4.8461, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -21.23228645324707, |
|
"rewards/margins": 9.21428394317627, |
|
"rewards/rejected": -30.446569442749023, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0646024464831804e-07, |
|
"logits/chosen": -0.9769414067268372, |
|
"logits/rejected": -0.7807801961898804, |
|
"logps/chosen": -508.21539306640625, |
|
"logps/rejected": -505.65118408203125, |
|
"loss": -4.3914, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -23.276477813720703, |
|
"rewards/margins": 5.2235870361328125, |
|
"rewards/rejected": -28.500064849853516, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0454892966360856e-07, |
|
"logits/chosen": -0.9564868211746216, |
|
"logits/rejected": -0.8420296907424927, |
|
"logps/chosen": -462.8448791503906, |
|
"logps/rejected": -527.154541015625, |
|
"loss": -4.464, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -20.870206832885742, |
|
"rewards/margins": 10.262295722961426, |
|
"rewards/rejected": -31.13250160217285, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0263761467889908e-07, |
|
"logits/chosen": -0.8812466859817505, |
|
"logits/rejected": -0.8560865521430969, |
|
"logps/chosen": -485.3046875, |
|
"logps/rejected": -493.06463623046875, |
|
"loss": -4.3681, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -22.31712532043457, |
|
"rewards/margins": 3.8712782859802246, |
|
"rewards/rejected": -26.188400268554688, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.007262996941896e-07, |
|
"logits/chosen": -0.9851228594779968, |
|
"logits/rejected": -0.6262848973274231, |
|
"logps/chosen": -522.0337524414062, |
|
"logps/rejected": -517.8556518554688, |
|
"loss": -3.7686, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -26.0426025390625, |
|
"rewards/margins": 6.041103839874268, |
|
"rewards/rejected": -32.083702087402344, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.881498470948011e-08, |
|
"logits/chosen": -0.9182702898979187, |
|
"logits/rejected": -0.8262530565261841, |
|
"logps/chosen": -526.509521484375, |
|
"logps/rejected": -501.44390869140625, |
|
"loss": -4.2044, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -25.09255599975586, |
|
"rewards/margins": 4.793452739715576, |
|
"rewards/rejected": -29.88600730895996, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.690366972477065e-08, |
|
"logits/chosen": -1.020527958869934, |
|
"logits/rejected": -0.9827292561531067, |
|
"logps/chosen": -535.2392578125, |
|
"logps/rejected": -474.393310546875, |
|
"loss": -4.1335, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -26.45871925354004, |
|
"rewards/margins": -0.03600626066327095, |
|
"rewards/rejected": -26.42270851135254, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.499235474006116e-08, |
|
"logits/chosen": -1.0558453798294067, |
|
"logits/rejected": -0.9320958256721497, |
|
"logps/chosen": -537.9478759765625, |
|
"logps/rejected": -526.4865112304688, |
|
"loss": -5.638, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -22.561458587646484, |
|
"rewards/margins": 8.904586791992188, |
|
"rewards/rejected": -31.466039657592773, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.308103975535168e-08, |
|
"logits/chosen": -0.9652633666992188, |
|
"logits/rejected": -0.9594426155090332, |
|
"logps/chosen": -547.8624877929688, |
|
"logps/rejected": -563.6146240234375, |
|
"loss": -5.9731, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -25.674243927001953, |
|
"rewards/margins": 5.594969749450684, |
|
"rewards/rejected": -31.269210815429688, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.116972477064219e-08, |
|
"logits/chosen": -0.9045840501785278, |
|
"logits/rejected": -0.9294489622116089, |
|
"logps/chosen": -545.46923828125, |
|
"logps/rejected": -560.5089111328125, |
|
"loss": -4.2549, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -26.388031005859375, |
|
"rewards/margins": 3.495013475418091, |
|
"rewards/rejected": -29.883047103881836, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.925840978593272e-08, |
|
"logits/chosen": -1.0906932353973389, |
|
"logits/rejected": -0.8586239814758301, |
|
"logps/chosen": -426.898193359375, |
|
"logps/rejected": -520.27099609375, |
|
"loss": -5.0199, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -21.479185104370117, |
|
"rewards/margins": 6.4797844886779785, |
|
"rewards/rejected": -27.958969116210938, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.734709480122324e-08, |
|
"logits/chosen": -0.8275815844535828, |
|
"logits/rejected": -0.8221632242202759, |
|
"logps/chosen": -514.3942260742188, |
|
"logps/rejected": -599.9034423828125, |
|
"loss": -5.2968, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -26.718358993530273, |
|
"rewards/margins": 10.353950500488281, |
|
"rewards/rejected": -37.07230758666992, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.543577981651376e-08, |
|
"logits/chosen": -1.0432653427124023, |
|
"logits/rejected": -1.0401585102081299, |
|
"logps/chosen": -591.7424926757812, |
|
"logps/rejected": -566.1804809570312, |
|
"loss": -5.6217, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -23.220060348510742, |
|
"rewards/margins": 7.772289276123047, |
|
"rewards/rejected": -30.99234962463379, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.352446483180428e-08, |
|
"logits/chosen": -0.9903294444084167, |
|
"logits/rejected": -0.9184169769287109, |
|
"logps/chosen": -531.7313842773438, |
|
"logps/rejected": -534.6137084960938, |
|
"loss": -3.808, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -26.629840850830078, |
|
"rewards/margins": 6.476201057434082, |
|
"rewards/rejected": -33.106040954589844, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.161314984709481e-08, |
|
"logits/chosen": -1.09735107421875, |
|
"logits/rejected": -0.9943147897720337, |
|
"logps/chosen": -521.5565795898438, |
|
"logps/rejected": -527.12744140625, |
|
"loss": -4.6607, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -24.24032211303711, |
|
"rewards/margins": 6.096768379211426, |
|
"rewards/rejected": -30.33709144592285, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.970183486238531e-08, |
|
"logits/chosen": -0.9941670298576355, |
|
"logits/rejected": -0.8651212453842163, |
|
"logps/chosen": -524.2615356445312, |
|
"logps/rejected": -511.6361389160156, |
|
"loss": -5.4135, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -26.943328857421875, |
|
"rewards/margins": 3.2720611095428467, |
|
"rewards/rejected": -30.21539306640625, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.779051987767583e-08, |
|
"logits/chosen": -1.145651936531067, |
|
"logits/rejected": -1.0183385610580444, |
|
"logps/chosen": -548.2122192382812, |
|
"logps/rejected": -548.8924560546875, |
|
"loss": -7.1154, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -24.76950454711914, |
|
"rewards/margins": 7.515495300292969, |
|
"rewards/rejected": -32.284996032714844, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.587920489296635e-08, |
|
"logits/chosen": -1.0196164846420288, |
|
"logits/rejected": -1.0715177059173584, |
|
"logps/chosen": -547.984375, |
|
"logps/rejected": -582.05126953125, |
|
"loss": -6.2112, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -26.091842651367188, |
|
"rewards/margins": 9.326883316040039, |
|
"rewards/rejected": -35.41872787475586, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.396788990825688e-08, |
|
"logits/chosen": -1.106105923652649, |
|
"logits/rejected": -0.9713128209114075, |
|
"logps/chosen": -485.42083740234375, |
|
"logps/rejected": -594.6617431640625, |
|
"loss": -6.3354, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -22.46695327758789, |
|
"rewards/margins": 16.13218879699707, |
|
"rewards/rejected": -38.599143981933594, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.20565749235474e-08, |
|
"logits/chosen": -0.9968252182006836, |
|
"logits/rejected": -0.8192588090896606, |
|
"logps/chosen": -515.4625854492188, |
|
"logps/rejected": -511.76849365234375, |
|
"loss": -5.7957, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -26.7257022857666, |
|
"rewards/margins": 5.563019752502441, |
|
"rewards/rejected": -32.28872299194336, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.014525993883792e-08, |
|
"logits/chosen": -0.9310768246650696, |
|
"logits/rejected": -0.9076264500617981, |
|
"logps/chosen": -532.5977783203125, |
|
"logps/rejected": -534.395263671875, |
|
"loss": -6.54, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -25.927881240844727, |
|
"rewards/margins": 5.213220119476318, |
|
"rewards/rejected": -31.141101837158203, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.823394495412843e-08, |
|
"logits/chosen": -0.8654760122299194, |
|
"logits/rejected": -0.9172045588493347, |
|
"logps/chosen": -494.37005615234375, |
|
"logps/rejected": -517.6791381835938, |
|
"loss": -5.034, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -22.28108787536621, |
|
"rewards/margins": 8.954570770263672, |
|
"rewards/rejected": -31.235660552978516, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.632262996941895e-08, |
|
"logits/chosen": -0.8583803176879883, |
|
"logits/rejected": -1.0703628063201904, |
|
"logps/chosen": -545.6281127929688, |
|
"logps/rejected": -576.9746704101562, |
|
"loss": -6.1075, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -29.19540023803711, |
|
"rewards/margins": 6.0790910720825195, |
|
"rewards/rejected": -35.27449035644531, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.441131498470948e-08, |
|
"logits/chosen": -0.9567610621452332, |
|
"logits/rejected": -0.7643688917160034, |
|
"logps/chosen": -534.2616577148438, |
|
"logps/rejected": -558.9248657226562, |
|
"loss": -6.139, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -28.329565048217773, |
|
"rewards/margins": 6.173336982727051, |
|
"rewards/rejected": -34.50290298461914, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.973718523979187, |
|
"logits/rejected": -0.9818305969238281, |
|
"logps/chosen": -575.3428955078125, |
|
"logps/rejected": -579.9434814453125, |
|
"loss": -5.6596, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -26.38543701171875, |
|
"rewards/margins": 7.383551120758057, |
|
"rewards/rejected": -33.76898956298828, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.058868501529052e-08, |
|
"logits/chosen": -1.0347440242767334, |
|
"logits/rejected": -1.049570083618164, |
|
"logps/chosen": -574.5046997070312, |
|
"logps/rejected": -509.9305114746094, |
|
"loss": -5.3415, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -28.74808692932129, |
|
"rewards/margins": -0.5448096394538879, |
|
"rewards/rejected": -28.203277587890625, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.8677370030581035e-08, |
|
"logits/chosen": -1.0313562154769897, |
|
"logits/rejected": -0.7958860397338867, |
|
"logps/chosen": -527.613525390625, |
|
"logps/rejected": -525.542724609375, |
|
"loss": -6.3272, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -26.725732803344727, |
|
"rewards/margins": 5.203427314758301, |
|
"rewards/rejected": -31.92915916442871, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6766055045871554e-08, |
|
"logits/chosen": -0.9836112260818481, |
|
"logits/rejected": -0.9386361241340637, |
|
"logps/chosen": -525.5948486328125, |
|
"logps/rejected": -588.316162109375, |
|
"loss": -7.7597, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -26.01352882385254, |
|
"rewards/margins": 9.247634887695312, |
|
"rewards/rejected": -35.26116180419922, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.485474006116208e-08, |
|
"logits/chosen": -1.163663387298584, |
|
"logits/rejected": -0.8112475275993347, |
|
"logps/chosen": -533.2066040039062, |
|
"logps/rejected": -540.348388671875, |
|
"loss": -5.5581, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -26.54342269897461, |
|
"rewards/margins": 6.053246021270752, |
|
"rewards/rejected": -32.59667205810547, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.294342507645259e-08, |
|
"logits/chosen": -0.9945865869522095, |
|
"logits/rejected": -0.8910030126571655, |
|
"logps/chosen": -593.0062866210938, |
|
"logps/rejected": -556.135009765625, |
|
"loss": -4.082, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -28.179473876953125, |
|
"rewards/margins": 6.279976844787598, |
|
"rewards/rejected": -34.459449768066406, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1032110091743117e-08, |
|
"logits/chosen": -0.6196568012237549, |
|
"logits/rejected": -0.8046578168869019, |
|
"logps/chosen": -520.0942993164062, |
|
"logps/rejected": -537.9401245117188, |
|
"loss": -4.2575, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -30.394145965576172, |
|
"rewards/margins": 2.2538414001464844, |
|
"rewards/rejected": -32.64798355102539, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.9120795107033635e-08, |
|
"logits/chosen": -0.9934478998184204, |
|
"logits/rejected": -0.9696424603462219, |
|
"logps/chosen": -542.2866821289062, |
|
"logps/rejected": -604.997314453125, |
|
"loss": -7.0483, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -26.58272361755371, |
|
"rewards/margins": 10.307984352111816, |
|
"rewards/rejected": -36.890708923339844, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7209480122324154e-08, |
|
"logits/chosen": -1.0938358306884766, |
|
"logits/rejected": -0.9137821197509766, |
|
"logps/chosen": -557.5704345703125, |
|
"logps/rejected": -692.9163818359375, |
|
"loss": -4.9773, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -29.691476821899414, |
|
"rewards/margins": 14.579462051391602, |
|
"rewards/rejected": -44.270938873291016, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.529816513761467e-08, |
|
"logits/chosen": -0.9120942950248718, |
|
"logits/rejected": -0.9001785516738892, |
|
"logps/chosen": -560.4894409179688, |
|
"logps/rejected": -654.8059692382812, |
|
"loss": -8.4192, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -25.794815063476562, |
|
"rewards/margins": 12.174165725708008, |
|
"rewards/rejected": -37.9689826965332, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33868501529052e-08, |
|
"logits/chosen": -1.0795204639434814, |
|
"logits/rejected": -0.798618733882904, |
|
"logps/chosen": -502.0621643066406, |
|
"logps/rejected": -547.533203125, |
|
"loss": -6.4279, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -27.192279815673828, |
|
"rewards/margins": 8.032793998718262, |
|
"rewards/rejected": -35.225074768066406, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.147553516819572e-08, |
|
"logits/chosen": -0.8620451092720032, |
|
"logits/rejected": -0.8486016988754272, |
|
"logps/chosen": -493.64447021484375, |
|
"logps/rejected": -515.5201416015625, |
|
"loss": -7.8372, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -26.775470733642578, |
|
"rewards/margins": 3.5873656272888184, |
|
"rewards/rejected": -30.362834930419922, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.9564220183486236e-08, |
|
"logits/chosen": -0.9343770146369934, |
|
"logits/rejected": -0.7297960519790649, |
|
"logps/chosen": -554.4147338867188, |
|
"logps/rejected": -525.2438354492188, |
|
"loss": -9.0144, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -29.26637840270996, |
|
"rewards/margins": 4.40227746963501, |
|
"rewards/rejected": -33.668663024902344, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7652905198776755e-08, |
|
"logits/chosen": -0.9890888333320618, |
|
"logits/rejected": -0.8765937685966492, |
|
"logps/chosen": -600.9437255859375, |
|
"logps/rejected": -544.9365234375, |
|
"loss": -5.8302, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -30.97121238708496, |
|
"rewards/margins": 1.8391153812408447, |
|
"rewards/rejected": -32.810325622558594, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.574159021406728e-08, |
|
"logits/chosen": -0.885094165802002, |
|
"logits/rejected": -1.0295897722244263, |
|
"logps/chosen": -576.9892578125, |
|
"logps/rejected": -676.5888671875, |
|
"loss": -3.6913, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -30.517303466796875, |
|
"rewards/margins": 8.666280746459961, |
|
"rewards/rejected": -39.1835823059082, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.383027522935779e-08, |
|
"logits/chosen": -0.9966446161270142, |
|
"logits/rejected": -0.8098469972610474, |
|
"logps/chosen": -550.4933471679688, |
|
"logps/rejected": -582.1474609375, |
|
"loss": -5.0871, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -28.010635375976562, |
|
"rewards/margins": 6.666192054748535, |
|
"rewards/rejected": -34.67682647705078, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.191896024464832e-08, |
|
"logits/chosen": -0.9043565988540649, |
|
"logits/rejected": -0.7680369019508362, |
|
"logps/chosen": -609.8352661132812, |
|
"logps/rejected": -591.0311279296875, |
|
"loss": -7.8269, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -32.64293670654297, |
|
"rewards/margins": 3.9754230976104736, |
|
"rewards/rejected": -36.618350982666016, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0007645259938836e-08, |
|
"logits/chosen": -0.9044156074523926, |
|
"logits/rejected": -0.7647527456283569, |
|
"logps/chosen": -504.7789611816406, |
|
"logps/rejected": -613.888671875, |
|
"loss": -8.5506, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -28.10544204711914, |
|
"rewards/margins": 12.60815715789795, |
|
"rewards/rejected": -40.713600158691406, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.809633027522936e-08, |
|
"logits/chosen": -0.7312633395195007, |
|
"logits/rejected": -0.8145195245742798, |
|
"logps/chosen": -568.981689453125, |
|
"logps/rejected": -555.5551147460938, |
|
"loss": -8.1692, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -32.20295715332031, |
|
"rewards/margins": 2.2871875762939453, |
|
"rewards/rejected": -34.490150451660156, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6185015290519877e-08, |
|
"logits/chosen": -0.692935049533844, |
|
"logits/rejected": -0.9012505412101746, |
|
"logps/chosen": -530.6353759765625, |
|
"logps/rejected": -661.496826171875, |
|
"loss": -7.2198, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -26.056262969970703, |
|
"rewards/margins": 15.568461418151855, |
|
"rewards/rejected": -41.62472152709961, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4273700305810396e-08, |
|
"logits/chosen": -1.0233699083328247, |
|
"logits/rejected": -0.7373823523521423, |
|
"logps/chosen": -615.1844482421875, |
|
"logps/rejected": -660.0513305664062, |
|
"loss": -4.8984, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -30.922901153564453, |
|
"rewards/margins": 10.078287124633789, |
|
"rewards/rejected": -41.001190185546875, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2362385321100918e-08, |
|
"logits/chosen": -1.081015944480896, |
|
"logits/rejected": -0.7994831800460815, |
|
"logps/chosen": -571.29150390625, |
|
"logps/rejected": -558.0635375976562, |
|
"loss": -7.6276, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -28.834667205810547, |
|
"rewards/margins": 5.563540458679199, |
|
"rewards/rejected": -34.39820861816406, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0451070336391437e-08, |
|
"logits/chosen": -0.8437451124191284, |
|
"logits/rejected": -0.8097723722457886, |
|
"logps/chosen": -579.05615234375, |
|
"logps/rejected": -576.0593872070312, |
|
"loss": -7.1202, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -30.914493560791016, |
|
"rewards/margins": 7.180275917053223, |
|
"rewards/rejected": -38.094764709472656, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8539755351681956e-08, |
|
"logits/chosen": -0.816103458404541, |
|
"logits/rejected": -0.7335480451583862, |
|
"logps/chosen": -546.3377685546875, |
|
"logps/rejected": -556.5323486328125, |
|
"loss": -9.8674, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -28.328052520751953, |
|
"rewards/margins": 3.8435778617858887, |
|
"rewards/rejected": -32.171630859375, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6628440366972478e-08, |
|
"logits/chosen": -0.994927704334259, |
|
"logits/rejected": -0.8127029538154602, |
|
"logps/chosen": -540.6372680664062, |
|
"logps/rejected": -544.7251586914062, |
|
"loss": -4.5174, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -25.344158172607422, |
|
"rewards/margins": 6.365922451019287, |
|
"rewards/rejected": -31.710086822509766, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4717125382262997e-08, |
|
"logits/chosen": -0.900621235370636, |
|
"logits/rejected": -0.9584394693374634, |
|
"logps/chosen": -547.0264892578125, |
|
"logps/rejected": -650.3952026367188, |
|
"loss": -7.3594, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -27.896032333374023, |
|
"rewards/margins": 12.844169616699219, |
|
"rewards/rejected": -40.740196228027344, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2805810397553517e-08, |
|
"logits/chosen": -0.9609657526016235, |
|
"logits/rejected": -0.9351837038993835, |
|
"logps/chosen": -620.4771118164062, |
|
"logps/rejected": -574.4600830078125, |
|
"loss": -4.3283, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -34.61457061767578, |
|
"rewards/margins": 0.11583442986011505, |
|
"rewards/rejected": -34.730403900146484, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0894495412844038e-08, |
|
"logits/chosen": -0.9170868992805481, |
|
"logits/rejected": -0.9585624933242798, |
|
"logps/chosen": -573.9271240234375, |
|
"logps/rejected": -615.289794921875, |
|
"loss": -9.5317, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -31.822189331054688, |
|
"rewards/margins": 6.956101894378662, |
|
"rewards/rejected": -38.77829360961914, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.983180428134555e-09, |
|
"logits/chosen": -0.9001976847648621, |
|
"logits/rejected": -0.7435283660888672, |
|
"logps/chosen": -546.4014892578125, |
|
"logps/rejected": -503.437255859375, |
|
"loss": -7.6183, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -28.65180015563965, |
|
"rewards/margins": 3.4450135231018066, |
|
"rewards/rejected": -32.0968132019043, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.071865443425076e-09, |
|
"logits/chosen": -0.9058972597122192, |
|
"logits/rejected": -0.6541125774383545, |
|
"logps/chosen": -654.9110107421875, |
|
"logps/rejected": -610.533447265625, |
|
"loss": -7.5332, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -37.84512710571289, |
|
"rewards/margins": 0.6990224123001099, |
|
"rewards/rejected": -38.544151306152344, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1605504587155965e-09, |
|
"logits/chosen": -1.0033214092254639, |
|
"logits/rejected": -1.143033504486084, |
|
"logps/chosen": -586.54833984375, |
|
"logps/rejected": -587.2291259765625, |
|
"loss": -8.2379, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -30.552661895751953, |
|
"rewards/margins": 5.087256908416748, |
|
"rewards/rejected": -35.63991928100586, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.249235474006116e-09, |
|
"logits/chosen": -0.9559615850448608, |
|
"logits/rejected": -0.6920489072799683, |
|
"logps/chosen": -566.3262329101562, |
|
"logps/rejected": -614.7679443359375, |
|
"loss": -6.6734, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -29.60939598083496, |
|
"rewards/margins": 8.876012802124023, |
|
"rewards/rejected": -38.48540496826172, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3379204892966359e-09, |
|
"logits/chosen": -0.7593010663986206, |
|
"logits/rejected": -0.7966611981391907, |
|
"logps/chosen": -579.7311401367188, |
|
"logps/rejected": -572.9598999023438, |
|
"loss": -5.7169, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -32.345096588134766, |
|
"rewards/margins": 4.745226860046387, |
|
"rewards/rejected": -37.09032440185547, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -1.2629982233047485, |
|
"eval_logits/rejected": -1.075066089630127, |
|
"eval_logps/chosen": -588.9970092773438, |
|
"eval_logps/rejected": -633.47216796875, |
|
"eval_loss": -7.541553497314453, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": -29.919435501098633, |
|
"eval_rewards/margins": 9.934508323669434, |
|
"eval_rewards/rejected": -39.853946685791016, |
|
"eval_runtime": 238.4669, |
|
"eval_samples_per_second": 8.387, |
|
"eval_steps_per_second": 0.264, |
|
"step": 2907 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2907, |
|
"total_flos": 0.0, |
|
"train_loss": -1.932115889119454, |
|
"train_runtime": 45081.596, |
|
"train_samples_per_second": 4.124, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|