|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6595744680851065e-08, |
|
"logits/chosen": 0.4583740830421448, |
|
"logits/rejected": 0.45381295680999756, |
|
"logps/chosen": -403.16717529296875, |
|
"logps/rejected": -354.3865051269531, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6595744680851066e-07, |
|
"logits/chosen": 0.18831893801689148, |
|
"logits/rejected": 0.16829822957515717, |
|
"logps/chosen": -401.88055419921875, |
|
"logps/rejected": -396.11865234375, |
|
"loss": 0.2155, |
|
"rewards/accuracies": 0.2638888955116272, |
|
"rewards/chosen": -0.000744127610232681, |
|
"rewards/margins": -8.914418140193447e-05, |
|
"rewards/rejected": -0.0006549834506586194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.1669415831565857, |
|
"logits/rejected": 0.27017873525619507, |
|
"logps/chosen": -453.92071533203125, |
|
"logps/rejected": -443.1700744628906, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0007682474097236991, |
|
"rewards/margins": -1.9114029782940634e-05, |
|
"rewards/rejected": -0.0007491334108635783, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.97872340425532e-07, |
|
"logits/chosen": 0.1750134825706482, |
|
"logits/rejected": 0.2582840919494629, |
|
"logps/chosen": -361.32965087890625, |
|
"logps/rejected": -338.42205810546875, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.0005828866851516068, |
|
"rewards/margins": 4.7791574615985155e-05, |
|
"rewards/rejected": -0.000630678201559931, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.1780398190021515, |
|
"logits/rejected": 0.23933863639831543, |
|
"logps/chosen": -418.8761291503906, |
|
"logps/rejected": -419.4820251464844, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0007866934174671769, |
|
"rewards/margins": -4.094879113836214e-05, |
|
"rewards/rejected": -0.0007457446190528572, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3297872340425533e-06, |
|
"logits/chosen": 0.21062365174293518, |
|
"logits/rejected": 0.3023103177547455, |
|
"logps/chosen": -369.01318359375, |
|
"logps/rejected": -368.9207458496094, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0007744937902316451, |
|
"rewards/margins": -3.84082886739634e-05, |
|
"rewards/rejected": -0.0007360855233855546, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.21257944405078888, |
|
"logits/rejected": 0.23676128685474396, |
|
"logps/chosen": -401.72259521484375, |
|
"logps/rejected": -401.12982177734375, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.0010942494263872504, |
|
"rewards/margins": 2.2952935978537425e-05, |
|
"rewards/rejected": -0.0011172023368999362, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8617021276595745e-06, |
|
"logits/chosen": 0.22530443966388702, |
|
"logits/rejected": 0.24157127737998962, |
|
"logps/chosen": -421.9361877441406, |
|
"logps/rejected": -450.1436462402344, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0015197412576526403, |
|
"rewards/margins": 2.4099141228361987e-05, |
|
"rewards/rejected": -0.0015438406262546778, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.23238015174865723, |
|
"logits/rejected": 0.2601611614227295, |
|
"logps/chosen": -418.73187255859375, |
|
"logps/rejected": -398.03265380859375, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0016283972654491663, |
|
"rewards/margins": -1.6413705452578142e-05, |
|
"rewards/rejected": -0.0016119834035634995, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.393617021276596e-06, |
|
"logits/chosen": 0.17837639153003693, |
|
"logits/rejected": 0.24620842933654785, |
|
"logps/chosen": -392.7635192871094, |
|
"logps/rejected": -385.9835205078125, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0008109696209430695, |
|
"rewards/margins": 8.667097426950932e-05, |
|
"rewards/rejected": -0.0008976406534202397, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.22456176578998566, |
|
"logits/rejected": 0.2180750072002411, |
|
"logps/chosen": -411.5848693847656, |
|
"logps/rejected": -392.6322326660156, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.00039298724732361734, |
|
"rewards/margins": 0.00011892013571923599, |
|
"rewards/rejected": -0.0005119073903188109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9255319148936174e-06, |
|
"logits/chosen": 0.19836413860321045, |
|
"logits/rejected": 0.3192578852176666, |
|
"logps/chosen": -410.68499755859375, |
|
"logps/rejected": -386.6772155761719, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0007202932611107826, |
|
"rewards/margins": 0.00010950298747047782, |
|
"rewards/rejected": -0.0008297963067889214, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.1606016904115677, |
|
"logits/rejected": 0.28400760889053345, |
|
"logps/chosen": -401.4821472167969, |
|
"logps/rejected": -373.6825256347656, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0019728371407836676, |
|
"rewards/margins": 0.0002594580873847008, |
|
"rewards/rejected": -0.0022322952281683683, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.457446808510639e-06, |
|
"logits/chosen": 0.22675403952598572, |
|
"logits/rejected": 0.2259739637374878, |
|
"logps/chosen": -432.01788330078125, |
|
"logps/rejected": -415.99237060546875, |
|
"loss": 0.2144, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0032550902105867863, |
|
"rewards/margins": 0.000516787578817457, |
|
"rewards/rejected": -0.0037718776147812605, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.13569238781929016, |
|
"logits/rejected": 0.23111942410469055, |
|
"logps/chosen": -416.4497985839844, |
|
"logps/rejected": -402.7656555175781, |
|
"loss": 0.202, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.006250211503356695, |
|
"rewards/margins": 0.0006005663308314979, |
|
"rewards/rejected": -0.006850778125226498, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.98936170212766e-06, |
|
"logits/chosen": 0.15672233700752258, |
|
"logits/rejected": 0.2764233350753784, |
|
"logps/chosen": -441.35137939453125, |
|
"logps/rejected": -409.206787109375, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.008352077566087246, |
|
"rewards/margins": 0.000567329756449908, |
|
"rewards/rejected": -0.008919407613575459, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.17575426399707794, |
|
"logits/rejected": 0.3381495475769043, |
|
"logps/chosen": -468.0437927246094, |
|
"logps/rejected": -437.7367248535156, |
|
"loss": 0.2069, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.011829295195639133, |
|
"rewards/margins": 0.0006891209632158279, |
|
"rewards/rejected": -0.012518415227532387, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.521276595744681e-06, |
|
"logits/chosen": 0.2566456198692322, |
|
"logits/rejected": 0.21506217122077942, |
|
"logps/chosen": -443.35540771484375, |
|
"logps/rejected": -438.29876708984375, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.013920286670327187, |
|
"rewards/margins": 0.0008589512435719371, |
|
"rewards/rejected": -0.014779238030314445, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.1587153822183609, |
|
"logits/rejected": 0.346591055393219, |
|
"logps/chosen": -423.898681640625, |
|
"logps/rejected": -416.0426330566406, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01828654855489731, |
|
"rewards/margins": 0.0014178925193846226, |
|
"rewards/rejected": -0.019704440608620644, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999982660399688e-06, |
|
"logits/chosen": 0.19637706875801086, |
|
"logits/rejected": 0.13249197602272034, |
|
"logps/chosen": -401.75360107421875, |
|
"logps/rejected": -391.2793884277344, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.023105399683117867, |
|
"rewards/margins": 0.0009698948706500232, |
|
"rewards/rejected": -0.02407529577612877, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99937579964398e-06, |
|
"logits/chosen": 0.12021654844284058, |
|
"logits/rejected": 0.22755944728851318, |
|
"logps/chosen": -438.9696350097656, |
|
"logps/rejected": -436.67559814453125, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0280294306576252, |
|
"rewards/margins": 0.001816231175325811, |
|
"rewards/rejected": -0.02984566055238247, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9979021993870645e-06, |
|
"logits/chosen": 0.17782853543758392, |
|
"logits/rejected": 0.1260356307029724, |
|
"logps/chosen": -428.07281494140625, |
|
"logps/rejected": -423.8680725097656, |
|
"loss": 0.2116, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.03266894817352295, |
|
"rewards/margins": 0.0038566484581679106, |
|
"rewards/rejected": -0.03652559593319893, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995562370647553e-06, |
|
"logits/chosen": 0.15141043066978455, |
|
"logits/rejected": 0.21045760810375214, |
|
"logps/chosen": -441.1923828125, |
|
"logps/rejected": -440.4317321777344, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.03683360293507576, |
|
"rewards/margins": 0.00403643399477005, |
|
"rewards/rejected": -0.04087003692984581, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992357124836838e-06, |
|
"logits/chosen": 0.1319437474012375, |
|
"logits/rejected": 0.16590853035449982, |
|
"logps/chosen": -463.49951171875, |
|
"logps/rejected": -463.0955505371094, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04471432790160179, |
|
"rewards/margins": 0.004302737768739462, |
|
"rewards/rejected": -0.04901706799864769, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882875734777044e-06, |
|
"logits/chosen": 0.10973749309778214, |
|
"logits/rejected": 0.14370934665203094, |
|
"logps/chosen": -432.8614807128906, |
|
"logps/rejected": -412.8258361816406, |
|
"loss": 0.2081, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.048478107899427414, |
|
"rewards/margins": 0.0035480097867548466, |
|
"rewards/rejected": -0.052026115357875824, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983355127818882e-06, |
|
"logits/chosen": 0.14486494660377502, |
|
"logits/rejected": 0.13507609069347382, |
|
"logps/chosen": -425.30743408203125, |
|
"logps/rejected": -446.130615234375, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05807062238454819, |
|
"rewards/margins": 0.0065963054075837135, |
|
"rewards/rejected": -0.06466692686080933, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977561498345639e-06, |
|
"logits/chosen": 0.18454475700855255, |
|
"logits/rejected": 0.16011472046375275, |
|
"logps/chosen": -497.3888244628906, |
|
"logps/rejected": -488.9383850097656, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.07542432844638824, |
|
"rewards/margins": 0.005581502337008715, |
|
"rewards/rejected": -0.08100582659244537, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970908694186624e-06, |
|
"logits/chosen": 0.04981505125761032, |
|
"logits/rejected": 0.11929504573345184, |
|
"logps/chosen": -475.73687744140625, |
|
"logps/rejected": -471.32421875, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.07895161211490631, |
|
"rewards/margins": 0.011724306270480156, |
|
"rewards/rejected": -0.09067590534687042, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9633990224171305e-06, |
|
"logits/chosen": 0.09068510681390762, |
|
"logits/rejected": 0.17252135276794434, |
|
"logps/chosen": -545.042724609375, |
|
"logps/rejected": -578.0839233398438, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10266657918691635, |
|
"rewards/margins": 0.02503531612455845, |
|
"rewards/rejected": -0.12770189344882965, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955035087259046e-06, |
|
"logits/chosen": -0.0076313503086566925, |
|
"logits/rejected": 0.0826013833284378, |
|
"logps/chosen": -509.309326171875, |
|
"logps/rejected": -542.4077758789062, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12836064398288727, |
|
"rewards/margins": 0.03418760746717453, |
|
"rewards/rejected": -0.1625482589006424, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945819789177756e-06, |
|
"logits/chosen": -0.011387845501303673, |
|
"logits/rejected": 0.058963656425476074, |
|
"logps/chosen": -550.4841918945312, |
|
"logps/rejected": -566.2930297851562, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1419098824262619, |
|
"rewards/margins": 0.03765057772397995, |
|
"rewards/rejected": -0.17956045269966125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935756323876306e-06, |
|
"logits/chosen": 0.030229881405830383, |
|
"logits/rejected": 0.00785739440470934, |
|
"logps/chosen": -546.796875, |
|
"logps/rejected": -585.1295776367188, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.15364839136600494, |
|
"rewards/margins": 0.04681529477238655, |
|
"rewards/rejected": -0.2004636526107788, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924848181187199e-06, |
|
"logits/chosen": 0.09916579723358154, |
|
"logits/rejected": 0.03708130493760109, |
|
"logps/chosen": -534.3087158203125, |
|
"logps/rejected": -577.957275390625, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13466373085975647, |
|
"rewards/margins": 0.046590112149715424, |
|
"rewards/rejected": -0.1812538504600525, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.913099143862173e-06, |
|
"logits/chosen": 0.09245137125253677, |
|
"logits/rejected": -0.01894455775618553, |
|
"logps/chosen": -570.8424682617188, |
|
"logps/rejected": -633.2811279296875, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.1720808893442154, |
|
"rewards/margins": 0.05095798522233963, |
|
"rewards/rejected": -0.22303888201713562, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900513286260416e-06, |
|
"logits/chosen": 0.06517922878265381, |
|
"logits/rejected": 0.05662886053323746, |
|
"logps/chosen": -556.5380859375, |
|
"logps/rejected": -572.91943359375, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1459980458021164, |
|
"rewards/margins": 0.031131967902183533, |
|
"rewards/rejected": -0.17713001370429993, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.887094972935645e-06, |
|
"logits/chosen": 0.053707320243120193, |
|
"logits/rejected": 0.12972167134284973, |
|
"logps/chosen": -587.4563598632812, |
|
"logps/rejected": -619.8084716796875, |
|
"loss": 0.1789, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14950796961784363, |
|
"rewards/margins": 0.027994930744171143, |
|
"rewards/rejected": -0.17750290036201477, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87284885712256e-06, |
|
"logits/chosen": 0.05275138095021248, |
|
"logits/rejected": 0.13531816005706787, |
|
"logps/chosen": -553.1101684570312, |
|
"logps/rejected": -604.1668701171875, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13555410504341125, |
|
"rewards/margins": 0.0414896085858345, |
|
"rewards/rejected": -0.17704370617866516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857779879123181e-06, |
|
"logits/chosen": 0.10553497076034546, |
|
"logits/rejected": 0.027261802926659584, |
|
"logps/chosen": -506.9090881347656, |
|
"logps/rejected": -567.632568359375, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12961818277835846, |
|
"rewards/margins": 0.0423763282597065, |
|
"rewards/rejected": -0.17199452221393585, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841893264593643e-06, |
|
"logits/chosen": -0.021337047219276428, |
|
"logits/rejected": 0.10264651477336884, |
|
"logps/chosen": -569.1201171875, |
|
"logps/rejected": -573.4818115234375, |
|
"loss": 0.1775, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.15105991065502167, |
|
"rewards/margins": 0.04063944146037102, |
|
"rewards/rejected": -0.19169935584068298, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.825194522732023e-06, |
|
"logits/chosen": -0.007825059816241264, |
|
"logits/rejected": -0.015095917508006096, |
|
"logps/chosen": -517.6556396484375, |
|
"logps/rejected": -564.65673828125, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.15630927681922913, |
|
"rewards/margins": 0.03676559031009674, |
|
"rewards/rejected": -0.19307485222816467, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807689444367853e-06, |
|
"logits/chosen": 0.03950309008359909, |
|
"logits/rejected": 0.025178443640470505, |
|
"logps/chosen": -613.1845703125, |
|
"logps/rejected": -667.6192016601562, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.16962917149066925, |
|
"rewards/margins": 0.061695653945207596, |
|
"rewards/rejected": -0.23132482171058655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.78938409995396e-06, |
|
"logits/chosen": -0.019574914127588272, |
|
"logits/rejected": 0.10981860011816025, |
|
"logps/chosen": -587.1323852539062, |
|
"logps/rejected": -607.53955078125, |
|
"loss": 0.1968, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.1764066517353058, |
|
"rewards/margins": 0.03810610622167587, |
|
"rewards/rejected": -0.21451278030872345, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770284837461342e-06, |
|
"logits/chosen": -0.016084378585219383, |
|
"logits/rejected": 0.11264481395483017, |
|
"logps/chosen": -570.8071899414062, |
|
"logps/rejected": -621.8716430664062, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.16467757523059845, |
|
"rewards/margins": 0.039321959018707275, |
|
"rewards/rejected": -0.20399951934814453, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7503982801778015e-06, |
|
"logits/chosen": 0.05447696894407272, |
|
"logits/rejected": 0.03223523125052452, |
|
"logps/chosen": -529.8160400390625, |
|
"logps/rejected": -550.2005004882812, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13065846264362335, |
|
"rewards/margins": 0.04571721702814102, |
|
"rewards/rejected": -0.17637568712234497, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.729731324411104e-06, |
|
"logits/chosen": -0.021515950560569763, |
|
"logits/rejected": 0.03140898048877716, |
|
"logps/chosen": -578.0147094726562, |
|
"logps/rejected": -585.7742919921875, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.15668722987174988, |
|
"rewards/margins": 0.035644322633743286, |
|
"rewards/rejected": -0.19233153760433197, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7082911370974645e-06, |
|
"logits/chosen": -0.012143002822995186, |
|
"logits/rejected": -0.0082742003723979, |
|
"logps/chosen": -486.759521484375, |
|
"logps/rejected": -521.0432739257812, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.127678781747818, |
|
"rewards/margins": 0.03519933670759201, |
|
"rewards/rejected": -0.1628781259059906, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68608515331618e-06, |
|
"logits/chosen": -0.023242251947522163, |
|
"logits/rejected": 0.003186366055160761, |
|
"logps/chosen": -485.2528381347656, |
|
"logps/rejected": -513.8176879882812, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.11917861551046371, |
|
"rewards/margins": 0.047842614352703094, |
|
"rewards/rejected": -0.167021244764328, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.663121073711269e-06, |
|
"logits/chosen": 0.01416336465626955, |
|
"logits/rejected": -0.06039174646139145, |
|
"logps/chosen": -549.2015380859375, |
|
"logps/rejected": -583.758544921875, |
|
"loss": 0.1822, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14279165863990784, |
|
"rewards/margins": 0.038277558982372284, |
|
"rewards/rejected": -0.18106922507286072, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.63940686182103e-06, |
|
"logits/chosen": 0.01400854904204607, |
|
"logits/rejected": 0.10263659805059433, |
|
"logps/chosen": -515.437255859375, |
|
"logps/rejected": -564.8402099609375, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13872714340686798, |
|
"rewards/margins": 0.03997962549328804, |
|
"rewards/rejected": -0.17870678007602692, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.614950741316425e-06, |
|
"logits/chosen": -0.06926377862691879, |
|
"logits/rejected": 0.03542783483862877, |
|
"logps/chosen": -540.4765625, |
|
"logps/rejected": -549.0443115234375, |
|
"loss": 0.1897, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.13047516345977783, |
|
"rewards/margins": 0.03688601404428482, |
|
"rewards/rejected": -0.16736117005348206, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.589761193149254e-06, |
|
"logits/chosen": -0.055144570767879486, |
|
"logits/rejected": 0.00845063291490078, |
|
"logps/chosen": -540.0427856445312, |
|
"logps/rejected": -555.8956298828125, |
|
"loss": 0.1863, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14786241948604584, |
|
"rewards/margins": 0.03734371438622475, |
|
"rewards/rejected": -0.18520613014698029, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.563846952611112e-06, |
|
"logits/chosen": -0.06796270608901978, |
|
"logits/rejected": -0.021189894527196884, |
|
"logps/chosen": -538.6000366210938, |
|
"logps/rejected": -609.37353515625, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14661245048046112, |
|
"rewards/margins": 0.0674336701631546, |
|
"rewards/rejected": -0.2140461504459381, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.537217006304141e-06, |
|
"logits/chosen": -0.13910801708698273, |
|
"logits/rejected": 0.04259239882230759, |
|
"logps/chosen": -555.0911865234375, |
|
"logps/rejected": -552.6492919921875, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.15548577904701233, |
|
"rewards/margins": 0.03300771862268448, |
|
"rewards/rejected": -0.18849347531795502, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50988058902464e-06, |
|
"logits/chosen": -0.047577690333127975, |
|
"logits/rejected": 0.034965887665748596, |
|
"logps/chosen": -664.0265502929688, |
|
"logps/rejected": -692.3294677734375, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1734737604856491, |
|
"rewards/margins": 0.04430503025650978, |
|
"rewards/rejected": -0.21777880191802979, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481847180560593e-06, |
|
"logits/chosen": -0.013295474462211132, |
|
"logits/rejected": -0.07179677486419678, |
|
"logps/chosen": -540.1502685546875, |
|
"logps/rejected": -567.1649169921875, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1351390928030014, |
|
"rewards/margins": 0.03879848122596741, |
|
"rewards/rejected": -0.1739375740289688, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453126502404253e-06, |
|
"logits/chosen": -0.10417531430721283, |
|
"logits/rejected": 0.025454232469201088, |
|
"logps/chosen": -571.4408569335938, |
|
"logps/rejected": -581.1018676757812, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13031336665153503, |
|
"rewards/margins": 0.04332467168569565, |
|
"rewards/rejected": -0.17363804578781128, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423728514380892e-06, |
|
"logits/chosen": -0.09106893092393875, |
|
"logits/rejected": 0.05246344953775406, |
|
"logps/chosen": -590.4605712890625, |
|
"logps/rejected": -646.8682861328125, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.16008590161800385, |
|
"rewards/margins": 0.03608149290084839, |
|
"rewards/rejected": -0.19616740942001343, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.393663411194918e-06, |
|
"logits/chosen": -0.00037176310434006155, |
|
"logits/rejected": -0.05788201093673706, |
|
"logps/chosen": -603.2279663085938, |
|
"logps/rejected": -637.8117065429688, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.15948781371116638, |
|
"rewards/margins": 0.039536003023386, |
|
"rewards/rejected": -0.19902381300926208, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.362941618894523e-06, |
|
"logits/chosen": -0.06197139620780945, |
|
"logits/rejected": -0.04511360824108124, |
|
"logps/chosen": -502.40704345703125, |
|
"logps/rejected": -549.5247802734375, |
|
"loss": 0.1904, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1260320395231247, |
|
"rewards/margins": 0.04073121398687363, |
|
"rewards/rejected": -0.1667632758617401, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.331573791256116e-06, |
|
"logits/chosen": -0.03825578838586807, |
|
"logits/rejected": 0.00044789613457396626, |
|
"logps/chosen": -474.03973388671875, |
|
"logps/rejected": -531.3597412109375, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.11952260881662369, |
|
"rewards/margins": 0.05453835800290108, |
|
"rewards/rejected": -0.17406097054481506, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.299570806089786e-06, |
|
"logits/chosen": -0.06856271624565125, |
|
"logits/rejected": -0.03950439766049385, |
|
"logps/chosen": -530.919189453125, |
|
"logps/rejected": -575.9044189453125, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1305655688047409, |
|
"rewards/margins": 0.04401278868317604, |
|
"rewards/rejected": -0.17457836866378784, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.266943761467057e-06, |
|
"logits/chosen": -0.008585452102124691, |
|
"logits/rejected": -0.08859982341527939, |
|
"logps/chosen": -561.964599609375, |
|
"logps/rejected": -607.6642456054688, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13443836569786072, |
|
"rewards/margins": 0.061653982847929, |
|
"rewards/rejected": -0.19609235227108002, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.233703971872287e-06, |
|
"logits/chosen": -0.11785644292831421, |
|
"logits/rejected": 0.07683457434177399, |
|
"logps/chosen": -580.4694213867188, |
|
"logps/rejected": -604.2586669921875, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.14866627752780914, |
|
"rewards/margins": 0.03873666003346443, |
|
"rewards/rejected": -0.18740293383598328, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1998629642789925e-06, |
|
"logits/chosen": -0.08321277797222137, |
|
"logits/rejected": -0.059583961963653564, |
|
"logps/chosen": -520.8612060546875, |
|
"logps/rejected": -594.0638427734375, |
|
"loss": 0.1774, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1341467797756195, |
|
"rewards/margins": 0.05918589234352112, |
|
"rewards/rejected": -0.19333268702030182, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.165432474152505e-06, |
|
"logits/chosen": -0.09531132876873016, |
|
"logits/rejected": -0.0270084198564291, |
|
"logps/chosen": -528.1355590820312, |
|
"logps/rejected": -581.7833251953125, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13981810212135315, |
|
"rewards/margins": 0.07118718326091766, |
|
"rewards/rejected": -0.211005300283432, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.130424441380308e-06, |
|
"logits/chosen": -0.10320959240198135, |
|
"logits/rejected": -0.019093522801995277, |
|
"logps/chosen": -571.685791015625, |
|
"logps/rejected": -591.3698120117188, |
|
"loss": 0.2015, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.16250738501548767, |
|
"rewards/margins": 0.04821362346410751, |
|
"rewards/rejected": -0.21072101593017578, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.09485100613151e-06, |
|
"logits/chosen": -0.11241753399372101, |
|
"logits/rejected": -0.032273612916469574, |
|
"logps/chosen": -624.3687133789062, |
|
"logps/rejected": -671.1177368164062, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.17460055649280548, |
|
"rewards/margins": 0.044025253504514694, |
|
"rewards/rejected": -0.21862581372261047, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -0.029143745079636574, |
|
"logits/rejected": -0.060622453689575195, |
|
"logps/chosen": -519.5960083007812, |
|
"logps/rejected": -589.2883911132812, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1383863389492035, |
|
"rewards/margins": 0.062351007014513016, |
|
"rewards/rejected": -0.2007373571395874, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.022057464960632e-06, |
|
"logits/chosen": -0.06448385864496231, |
|
"logits/rejected": -0.04329472780227661, |
|
"logps/chosen": -536.9004516601562, |
|
"logps/rejected": -563.5870361328125, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.14495059847831726, |
|
"rewards/margins": 0.03540947288274765, |
|
"rewards/rejected": -0.1803600788116455, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.984862602556383e-06, |
|
"logits/chosen": -0.004617185331881046, |
|
"logits/rejected": -0.01678011380136013, |
|
"logps/chosen": -555.3660278320312, |
|
"logps/rejected": -594.7876586914062, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13744883239269257, |
|
"rewards/margins": 0.038119856268167496, |
|
"rewards/rejected": -0.17556868493556976, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.947152815957187e-06, |
|
"logits/chosen": -0.11041183769702911, |
|
"logits/rejected": -0.09442894160747528, |
|
"logps/chosen": -475.54583740234375, |
|
"logps/rejected": -500.1151428222656, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.11575014889240265, |
|
"rewards/margins": 0.033661358058452606, |
|
"rewards/rejected": -0.14941151440143585, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.908941182252785e-06, |
|
"logits/chosen": -0.10789042711257935, |
|
"logits/rejected": -0.014411434531211853, |
|
"logps/chosen": -543.808837890625, |
|
"logps/rejected": -581.8538818359375, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1326008141040802, |
|
"rewards/margins": 0.03887839615345001, |
|
"rewards/rejected": -0.17147919535636902, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8702409525646535e-06, |
|
"logits/chosen": -0.10680530220270157, |
|
"logits/rejected": -0.07425309717655182, |
|
"logps/chosen": -522.1688842773438, |
|
"logps/rejected": -554.4418334960938, |
|
"loss": 0.1864, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12628528475761414, |
|
"rewards/margins": 0.04535987228155136, |
|
"rewards/rejected": -0.1716451495885849, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8310655474507495e-06, |
|
"logits/chosen": -0.10503558814525604, |
|
"logits/rejected": -0.013972464017570019, |
|
"logps/chosen": -550.2520751953125, |
|
"logps/rejected": -571.8939208984375, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.14120154082775116, |
|
"rewards/margins": 0.0461118221282959, |
|
"rewards/rejected": -0.18731336295604706, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7914285522515002e-06, |
|
"logits/chosen": -0.05753170698881149, |
|
"logits/rejected": -0.05392669886350632, |
|
"logps/chosen": -513.2532348632812, |
|
"logps/rejected": -580.1729736328125, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13195370137691498, |
|
"rewards/margins": 0.04442184790968895, |
|
"rewards/rejected": -0.17637556791305542, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.751343712378639e-06, |
|
"logits/chosen": -0.03736535459756851, |
|
"logits/rejected": -0.001959963236004114, |
|
"logps/chosen": -531.0137939453125, |
|
"logps/rejected": -591.2086181640625, |
|
"loss": 0.1881, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1330515593290329, |
|
"rewards/margins": 0.05102803185582161, |
|
"rewards/rejected": -0.1840795874595642, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.710824928548546e-06, |
|
"logits/chosen": -0.10402516275644302, |
|
"logits/rejected": -0.04912823066115379, |
|
"logps/chosen": -558.085205078125, |
|
"logps/rejected": -588.9461669921875, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13824841380119324, |
|
"rewards/margins": 0.03640920668840408, |
|
"rewards/rejected": -0.17465761303901672, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6698862519617225e-06, |
|
"logits/chosen": -0.06400999426841736, |
|
"logits/rejected": -0.06958254426717758, |
|
"logps/chosen": -541.27099609375, |
|
"logps/rejected": -621.9313354492188, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12520474195480347, |
|
"rewards/margins": 0.06592214107513428, |
|
"rewards/rejected": -0.19112688302993774, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6285418794300793e-06, |
|
"logits/chosen": -0.13884581625461578, |
|
"logits/rejected": -0.11223921924829483, |
|
"logps/chosen": -552.3884887695312, |
|
"logps/rejected": -589.5878295898438, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.14118912816047668, |
|
"rewards/margins": 0.051578063517808914, |
|
"rewards/rejected": -0.1927671879529953, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5868061484537365e-06, |
|
"logits/chosen": -0.07228229939937592, |
|
"logits/rejected": 0.038021642714738846, |
|
"logps/chosen": -565.1091918945312, |
|
"logps/rejected": -591.867431640625, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.1505979746580124, |
|
"rewards/margins": 0.04035702347755432, |
|
"rewards/rejected": -0.1909550130367279, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5446935322490285e-06, |
|
"logits/chosen": -0.04605743661522865, |
|
"logits/rejected": 0.011277568526566029, |
|
"logps/chosen": -562.3070678710938, |
|
"logps/rejected": -597.06201171875, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14476799964904785, |
|
"rewards/margins": 0.04444306343793869, |
|
"rewards/rejected": -0.18921108543872833, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.502218634729447e-06, |
|
"logits/chosen": -0.06661860644817352, |
|
"logits/rejected": -0.13232673704624176, |
|
"logps/chosen": -506.79315185546875, |
|
"logps/rejected": -562.1184692382812, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12148015201091766, |
|
"rewards/margins": 0.05448601767420769, |
|
"rewards/rejected": -0.17596617341041565, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.459396185441265e-06, |
|
"logits/chosen": -0.13019916415214539, |
|
"logits/rejected": -0.08353747427463531, |
|
"logps/chosen": -481.433837890625, |
|
"logps/rejected": -490.4483947753906, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11900192499160767, |
|
"rewards/margins": 0.037384189665317535, |
|
"rewards/rejected": -0.1563861072063446, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4162410344555834e-06, |
|
"logits/chosen": -0.0683845803141594, |
|
"logits/rejected": -0.03088710829615593, |
|
"logps/chosen": -492.447021484375, |
|
"logps/rejected": -564.9397583007812, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13119229674339294, |
|
"rewards/margins": 0.05578438565135002, |
|
"rewards/rejected": -0.18697668612003326, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3727681472185937e-06, |
|
"logits/chosen": -0.09018312394618988, |
|
"logits/rejected": -0.10359557718038559, |
|
"logps/chosen": -537.9823608398438, |
|
"logps/rejected": -594.79443359375, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13429854810237885, |
|
"rewards/margins": 0.06120805814862251, |
|
"rewards/rejected": -0.19550660252571106, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3289925993618217e-06, |
|
"logits/chosen": -0.18850287795066833, |
|
"logits/rejected": -0.04869599640369415, |
|
"logps/chosen": -496.01007080078125, |
|
"logps/rejected": -560.2242431640625, |
|
"loss": 0.1825, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12265129387378693, |
|
"rewards/margins": 0.05625399202108383, |
|
"rewards/rejected": -0.17890527844429016, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2849295714741643e-06, |
|
"logits/chosen": -0.19866463541984558, |
|
"logits/rejected": -0.004881598986685276, |
|
"logps/chosen": -532.1676635742188, |
|
"logps/rejected": -571.507568359375, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12184770405292511, |
|
"rewards/margins": 0.05913316085934639, |
|
"rewards/rejected": -0.18098084628582, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2405943438375287e-06, |
|
"logits/chosen": -0.2018626630306244, |
|
"logits/rejected": -0.10594689846038818, |
|
"logps/chosen": -488.4137268066406, |
|
"logps/rejected": -556.1409912109375, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.12942823767662048, |
|
"rewards/margins": 0.05433051660656929, |
|
"rewards/rejected": -0.18375876545906067, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1960022911279036e-06, |
|
"logits/chosen": -0.16193589568138123, |
|
"logits/rejected": -0.2137628048658371, |
|
"logps/chosen": -484.3509826660156, |
|
"logps/rejected": -553.5465698242188, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.12599757313728333, |
|
"rewards/margins": 0.049567125737667084, |
|
"rewards/rejected": -0.1755646914243698, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1511688770836844e-06, |
|
"logits/chosen": -0.01477061491459608, |
|
"logits/rejected": -0.20513947308063507, |
|
"logps/chosen": -559.4446411132812, |
|
"logps/rejected": -626.2364501953125, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13351976871490479, |
|
"rewards/margins": 0.07253735512495041, |
|
"rewards/rejected": -0.2060571163892746, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1061096491431307e-06, |
|
"logits/chosen": -0.23158374428749084, |
|
"logits/rejected": -0.15197685360908508, |
|
"logps/chosen": -511.8590393066406, |
|
"logps/rejected": -550.5593872070312, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.11726844310760498, |
|
"rewards/margins": 0.04829251766204834, |
|
"rewards/rejected": -0.16556094586849213, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0608402330527796e-06, |
|
"logits/chosen": -0.17909975349903107, |
|
"logits/rejected": -0.11886115372180939, |
|
"logps/chosen": -516.4205932617188, |
|
"logps/rejected": -559.4550170898438, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1072378158569336, |
|
"rewards/margins": 0.04554293677210808, |
|
"rewards/rejected": -0.15278074145317078, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0153763274487176e-06, |
|
"logits/chosen": -0.14633020758628845, |
|
"logits/rejected": -0.08009175956249237, |
|
"logps/chosen": -497.86724853515625, |
|
"logps/rejected": -534.5050659179688, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09393012523651123, |
|
"rewards/margins": 0.05776657536625862, |
|
"rewards/rejected": -0.15169671177864075, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9697336984125683e-06, |
|
"logits/chosen": -0.17566119134426117, |
|
"logits/rejected": -0.08242344111204147, |
|
"logps/chosen": -552.6607055664062, |
|
"logps/rejected": -587.2052612304688, |
|
"loss": 0.1719, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09468172490596771, |
|
"rewards/margins": 0.05334918573498726, |
|
"rewards/rejected": -0.14803092181682587, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.923928174004094e-06, |
|
"logits/chosen": -0.05346371978521347, |
|
"logits/rejected": -0.1932278424501419, |
|
"logps/chosen": -535.7907104492188, |
|
"logps/rejected": -599.4446411132812, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09776908159255981, |
|
"rewards/margins": 0.05439938232302666, |
|
"rewards/rejected": -0.15216846764087677, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8779756387723036e-06, |
|
"logits/chosen": -0.17646734416484833, |
|
"logits/rejected": -0.10943827778100967, |
|
"logps/chosen": -508.94232177734375, |
|
"logps/rejected": -568.2824096679688, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10447195917367935, |
|
"rewards/margins": 0.05140892416238785, |
|
"rewards/rejected": -0.1558808833360672, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.831892028246968e-06, |
|
"logits/chosen": -0.08494242280721664, |
|
"logits/rejected": -0.18166685104370117, |
|
"logps/chosen": -509.8265686035156, |
|
"logps/rejected": -575.4097900390625, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09938603639602661, |
|
"rewards/margins": 0.06175467371940613, |
|
"rewards/rejected": -0.16114071011543274, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7856933234124617e-06, |
|
"logits/chosen": -0.11445019394159317, |
|
"logits/rejected": -0.12723931670188904, |
|
"logps/chosen": -484.7312927246094, |
|
"logps/rejected": -564.54345703125, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10803971439599991, |
|
"rewards/margins": 0.055267833173274994, |
|
"rewards/rejected": -0.1633075475692749, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7393955451658387e-06, |
|
"logits/chosen": -0.14208866655826569, |
|
"logits/rejected": -0.020831745117902756, |
|
"logps/chosen": -528.3738403320312, |
|
"logps/rejected": -538.9185180664062, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11705558001995087, |
|
"rewards/margins": 0.04653087258338928, |
|
"rewards/rejected": -0.16358645260334015, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6930147487610667e-06, |
|
"logits/chosen": -0.21168622374534607, |
|
"logits/rejected": -0.10795080661773682, |
|
"logps/chosen": -551.8548583984375, |
|
"logps/rejected": -576.697998046875, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12270279228687286, |
|
"rewards/margins": 0.041682593524456024, |
|
"rewards/rejected": -0.1643853634595871, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6465670182413487e-06, |
|
"logits/chosen": -0.08504590392112732, |
|
"logits/rejected": -0.11864916980266571, |
|
"logps/chosen": -531.6979370117188, |
|
"logps/rejected": -553.0827026367188, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.107094407081604, |
|
"rewards/margins": 0.04743362218141556, |
|
"rewards/rejected": -0.15452802181243896, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6000684608614594e-06, |
|
"logits/chosen": -0.08871813118457794, |
|
"logits/rejected": -0.20158669352531433, |
|
"logps/chosen": -532.0164794921875, |
|
"logps/rejected": -609.779052734375, |
|
"loss": 0.1869, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10916718095541, |
|
"rewards/margins": 0.06110434979200363, |
|
"rewards/rejected": -0.17027154564857483, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5535352015020338e-06, |
|
"logits/chosen": -0.13045457005500793, |
|
"logits/rejected": -0.15182146430015564, |
|
"logps/chosen": -511.3404846191406, |
|
"logps/rejected": -542.553466796875, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.10936908423900604, |
|
"rewards/margins": 0.04454661160707474, |
|
"rewards/rejected": -0.15391568839550018, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.506983377077741e-06, |
|
"logits/chosen": -0.10655238479375839, |
|
"logits/rejected": -0.07288862764835358, |
|
"logps/chosen": -545.8250732421875, |
|
"logps/rejected": -581.5811767578125, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11866410076618195, |
|
"rewards/margins": 0.06786342710256577, |
|
"rewards/rejected": -0.18652752041816711, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.460429130941289e-06, |
|
"logits/chosen": -0.18012507259845734, |
|
"logits/rejected": -0.11784622818231583, |
|
"logps/chosen": -466.735107421875, |
|
"logps/rejected": -501.16302490234375, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10379817336797714, |
|
"rewards/margins": 0.051670271903276443, |
|
"rewards/rejected": -0.15546846389770508, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.413888607285192e-06, |
|
"logits/chosen": -0.20981307327747345, |
|
"logits/rejected": -0.1565089374780655, |
|
"logps/chosen": -503.9241638183594, |
|
"logps/rejected": -531.1654052734375, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.11265435069799423, |
|
"rewards/margins": 0.036974333226680756, |
|
"rewards/rejected": -0.14962869882583618, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.367377945543249e-06, |
|
"logits/chosen": -0.17128372192382812, |
|
"logits/rejected": -0.1380850225687027, |
|
"logps/chosen": -493.03936767578125, |
|
"logps/rejected": -539.0289306640625, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10857043415307999, |
|
"rewards/margins": 0.04022621735930443, |
|
"rewards/rejected": -0.14879664778709412, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.320913274793676e-06, |
|
"logits/chosen": -0.14254216849803925, |
|
"logits/rejected": -0.13331882655620575, |
|
"logps/chosen": -505.95428466796875, |
|
"logps/rejected": -575.7135009765625, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11139512062072754, |
|
"rewards/margins": 0.05586816743016243, |
|
"rewards/rejected": -0.16726329922676086, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.27451070816582e-06, |
|
"logits/chosen": -0.1611151099205017, |
|
"logits/rejected": -0.06889496743679047, |
|
"logps/chosen": -582.8988037109375, |
|
"logps/rejected": -629.1710205078125, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11593208461999893, |
|
"rewards/margins": 0.06972763687372208, |
|
"rewards/rejected": -0.1856597363948822, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.228186337252414e-06, |
|
"logits/chosen": -0.22832027077674866, |
|
"logits/rejected": -0.03726217523217201, |
|
"logps/chosen": -508.06768798828125, |
|
"logps/rejected": -514.2000732421875, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.10370014607906342, |
|
"rewards/margins": 0.04398902878165245, |
|
"rewards/rejected": -0.14768919348716736, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1819562265292946e-06, |
|
"logits/chosen": -0.20967726409435272, |
|
"logits/rejected": -0.1568584442138672, |
|
"logps/chosen": -568.8887329101562, |
|
"logps/rejected": -614.3547973632812, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10962442308664322, |
|
"rewards/margins": 0.060089100152254105, |
|
"rewards/rejected": -0.16971352696418762, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1358364077845236e-06, |
|
"logits/chosen": -0.23702092468738556, |
|
"logits/rejected": -0.054711341857910156, |
|
"logps/chosen": -569.4314575195312, |
|
"logps/rejected": -610.4983520507812, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13101813197135925, |
|
"rewards/margins": 0.043327562510967255, |
|
"rewards/rejected": -0.1743457019329071, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.089842874558849e-06, |
|
"logits/chosen": -0.13204485177993774, |
|
"logits/rejected": -0.18103663623332977, |
|
"logps/chosen": -501.7928771972656, |
|
"logps/rejected": -548.9622192382812, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10908225923776627, |
|
"rewards/margins": 0.034237340092659, |
|
"rewards/rejected": -0.14331960678100586, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0439915765994242e-06, |
|
"logits/chosen": -0.178396537899971, |
|
"logits/rejected": -0.2268516570329666, |
|
"logps/chosen": -512.7816772460938, |
|
"logps/rejected": -568.0106811523438, |
|
"loss": 0.1771, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11387573182582855, |
|
"rewards/margins": 0.05218449980020523, |
|
"rewards/rejected": -0.16606023907661438, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9982984143287186e-06, |
|
"logits/chosen": -0.18940022587776184, |
|
"logits/rejected": -0.2560541033744812, |
|
"logps/chosen": -531.294677734375, |
|
"logps/rejected": -585.197509765625, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10909552872180939, |
|
"rewards/margins": 0.05390959978103638, |
|
"rewards/rejected": -0.16300514340400696, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.95277923333053e-06, |
|
"logits/chosen": -0.17567074298858643, |
|
"logits/rejected": -0.08641554415225983, |
|
"logps/chosen": -553.5035400390625, |
|
"logps/rejected": -660.8178100585938, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10895760357379913, |
|
"rewards/margins": 0.08049053698778152, |
|
"rewards/rejected": -0.18944814801216125, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9074498188550156e-06, |
|
"logits/chosen": -0.23474335670471191, |
|
"logits/rejected": -0.10942939668893814, |
|
"logps/chosen": -514.3484497070312, |
|
"logps/rejected": -560.9384765625, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10143520683050156, |
|
"rewards/margins": 0.06253394484519958, |
|
"rewards/rejected": -0.16396915912628174, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.862325890344643e-06, |
|
"logits/chosen": -0.20588059723377228, |
|
"logits/rejected": -0.11454887688159943, |
|
"logps/chosen": -492.59356689453125, |
|
"logps/rejected": -502.0108947753906, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11700236797332764, |
|
"rewards/margins": 0.039218030869960785, |
|
"rewards/rejected": -0.15622040629386902, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.817423095982972e-06, |
|
"logits/chosen": -0.1135745421051979, |
|
"logits/rejected": -0.21998748183250427, |
|
"logps/chosen": -511.43975830078125, |
|
"logps/rejected": -591.8009033203125, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11385758221149445, |
|
"rewards/margins": 0.062401823699474335, |
|
"rewards/rejected": -0.17625939846038818, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7727570072681293e-06, |
|
"logits/chosen": -0.1687012016773224, |
|
"logits/rejected": -0.09966389834880829, |
|
"logps/chosen": -462.24664306640625, |
|
"logps/rejected": -541.049072265625, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11714209616184235, |
|
"rewards/margins": 0.0646623969078064, |
|
"rewards/rejected": -0.18180450797080994, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7283431136128961e-06, |
|
"logits/chosen": -0.246677964925766, |
|
"logits/rejected": -0.1048688292503357, |
|
"logps/chosen": -603.5682983398438, |
|
"logps/rejected": -627.1761474609375, |
|
"loss": 0.1809, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.14629192650318146, |
|
"rewards/margins": 0.04966907575726509, |
|
"rewards/rejected": -0.19596099853515625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6841968169732478e-06, |
|
"logits/chosen": -0.19738665223121643, |
|
"logits/rejected": -0.19479918479919434, |
|
"logps/chosen": -532.4442138671875, |
|
"logps/rejected": -585.4338989257812, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12195596843957901, |
|
"rewards/margins": 0.06082000583410263, |
|
"rewards/rejected": -0.18277597427368164, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6403334265072284e-06, |
|
"logits/chosen": -0.25693798065185547, |
|
"logits/rejected": -0.07481320202350616, |
|
"logps/chosen": -533.8644409179688, |
|
"logps/rejected": -563.401123046875, |
|
"loss": 0.197, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.12102751433849335, |
|
"rewards/margins": 0.05077686160802841, |
|
"rewards/rejected": -0.17180435359477997, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5967681532660066e-06, |
|
"logits/chosen": -0.2309873402118683, |
|
"logits/rejected": -0.10137152671813965, |
|
"logps/chosen": -551.7896728515625, |
|
"logps/rejected": -624.3748779296875, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11541406065225601, |
|
"rewards/margins": 0.06133885309100151, |
|
"rewards/rejected": -0.17675292491912842, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5535161049189463e-06, |
|
"logits/chosen": -0.2246209681034088, |
|
"logits/rejected": -0.13239261507987976, |
|
"logps/chosen": -519.9109497070312, |
|
"logps/rejected": -560.1650390625, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1011136993765831, |
|
"rewards/margins": 0.05040975660085678, |
|
"rewards/rejected": -0.15152345597743988, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5105922805145356e-06, |
|
"logits/chosen": -0.16264840960502625, |
|
"logits/rejected": -0.17924869060516357, |
|
"logps/chosen": -568.4891967773438, |
|
"logps/rejected": -606.84375, |
|
"loss": 0.1893, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.11496226489543915, |
|
"rewards/margins": 0.05937162786722183, |
|
"rewards/rejected": -0.17433388531208038, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4680115652789823e-06, |
|
"logits/chosen": -0.23676082491874695, |
|
"logits/rejected": -0.10287532955408096, |
|
"logps/chosen": -539.798095703125, |
|
"logps/rejected": -559.6898193359375, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.103573277592659, |
|
"rewards/margins": 0.044529445469379425, |
|
"rewards/rejected": -0.14810271561145782, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4257887254542767e-06, |
|
"logits/chosen": -0.16643217206001282, |
|
"logits/rejected": -0.20663738250732422, |
|
"logps/chosen": -477.3910217285156, |
|
"logps/rejected": -546.283203125, |
|
"loss": 0.1722, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09414063394069672, |
|
"rewards/margins": 0.0609886460006237, |
|
"rewards/rejected": -0.15512928366661072, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3839384031775227e-06, |
|
"logits/chosen": -0.2298511266708374, |
|
"logits/rejected": -0.11243085563182831, |
|
"logps/chosen": -501.55517578125, |
|
"logps/rejected": -560.3333740234375, |
|
"loss": 0.2001, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10863594710826874, |
|
"rewards/margins": 0.04755181074142456, |
|
"rewards/rejected": -0.1561877578496933, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.342475111403298e-06, |
|
"logits/chosen": -0.24995306134223938, |
|
"logits/rejected": -0.09091716259717941, |
|
"logps/chosen": -546.91455078125, |
|
"logps/rejected": -573.63134765625, |
|
"loss": 0.1758, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09987141191959381, |
|
"rewards/margins": 0.051563430577516556, |
|
"rewards/rejected": -0.15143482387065887, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3014132288708209e-06, |
|
"logits/chosen": -0.20659741759300232, |
|
"logits/rejected": -0.1793586015701294, |
|
"logps/chosen": -461.78741455078125, |
|
"logps/rejected": -546.7547607421875, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09007732570171356, |
|
"rewards/margins": 0.06504637002944946, |
|
"rewards/rejected": -0.15512368083000183, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2607669951176549e-06, |
|
"logits/chosen": -0.16400612890720367, |
|
"logits/rejected": -0.10733946412801743, |
|
"logps/chosen": -477.58673095703125, |
|
"logps/rejected": -483.0821838378906, |
|
"loss": 0.1908, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.09252551943063736, |
|
"rewards/margins": 0.03763090446591377, |
|
"rewards/rejected": -0.13015642762184143, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2205505055416891e-06, |
|
"logits/chosen": -0.23552432656288147, |
|
"logits/rejected": -0.05100318789482117, |
|
"logps/chosen": -508.714111328125, |
|
"logps/rejected": -520.7734375, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.10599436610937119, |
|
"rewards/margins": 0.04144153743982315, |
|
"rewards/rejected": -0.14743590354919434, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1807777065131002e-06, |
|
"logits/chosen": -0.23125293850898743, |
|
"logits/rejected": -0.17557989060878754, |
|
"logps/chosen": -544.9722900390625, |
|
"logps/rejected": -586.7325439453125, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10128601640462875, |
|
"rewards/margins": 0.07126955687999725, |
|
"rewards/rejected": -0.1725555956363678, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1414623905380012e-06, |
|
"logits/chosen": -0.23312774300575256, |
|
"logits/rejected": -0.14277367293834686, |
|
"logps/chosen": -485.92535400390625, |
|
"logps/rejected": -506.35205078125, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09930580854415894, |
|
"rewards/margins": 0.03314018249511719, |
|
"rewards/rejected": -0.13244597613811493, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1026181914754388e-06, |
|
"logits/chosen": -0.18930380046367645, |
|
"logits/rejected": -0.14925286173820496, |
|
"logps/chosen": -466.244140625, |
|
"logps/rejected": -539.2670288085938, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09682577848434448, |
|
"rewards/margins": 0.0532786026597023, |
|
"rewards/rejected": -0.1501043736934662, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0642585798094136e-06, |
|
"logits/chosen": -0.19082944095134735, |
|
"logits/rejected": -0.18094971776008606, |
|
"logps/chosen": -499.90362548828125, |
|
"logps/rejected": -542.33251953125, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09551337361335754, |
|
"rewards/margins": 0.051950085908174515, |
|
"rewards/rejected": -0.14746347069740295, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0263968579775522e-06, |
|
"logits/chosen": -0.19364750385284424, |
|
"logits/rejected": -0.22844457626342773, |
|
"logps/chosen": -481.7339782714844, |
|
"logps/rejected": -537.8257446289062, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10904572159051895, |
|
"rewards/margins": 0.05478460714221001, |
|
"rewards/rejected": -0.16383032500743866, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.89046155758058e-07, |
|
"logits/chosen": -0.1437099277973175, |
|
"logits/rejected": -0.1075567975640297, |
|
"logps/chosen": -463.90985107421875, |
|
"logps/rejected": -566.3432006835938, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09277530014514923, |
|
"rewards/margins": 0.0687888115644455, |
|
"rewards/rejected": -0.16156412661075592, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.52219425716534e-07, |
|
"logits/chosen": -0.242875337600708, |
|
"logits/rejected": -0.16699561476707458, |
|
"logps/chosen": -552.685302734375, |
|
"logps/rejected": -590.0584106445312, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11167445033788681, |
|
"rewards/margins": 0.04991639405488968, |
|
"rewards/rejected": -0.1615908443927765, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.15929438714262e-07, |
|
"logits/chosen": -0.11828355491161346, |
|
"logits/rejected": -0.20383377373218536, |
|
"logps/chosen": -546.3382568359375, |
|
"logps/rejected": -630.2655029296875, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10707902908325195, |
|
"rewards/margins": 0.07424376159906387, |
|
"rewards/rejected": -0.18132279813289642, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.801887794794911e-07, |
|
"logits/chosen": -0.14532892405986786, |
|
"logits/rejected": -0.23003335297107697, |
|
"logps/chosen": -478.4186096191406, |
|
"logps/rejected": -552.7274169921875, |
|
"loss": 0.1775, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09584512561559677, |
|
"rewards/margins": 0.06631821393966675, |
|
"rewards/rejected": -0.16216334700584412, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.450098422432787e-07, |
|
"logits/chosen": -0.20046038925647736, |
|
"logits/rejected": -0.15967608988285065, |
|
"logps/chosen": -544.1749267578125, |
|
"logps/rejected": -586.116943359375, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11548880487680435, |
|
"rewards/margins": 0.05985084921121597, |
|
"rewards/rejected": -0.17533965408802032, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.104048264413858e-07, |
|
"logits/chosen": -0.19410791993141174, |
|
"logits/rejected": -0.23477724194526672, |
|
"logps/chosen": -451.70941162109375, |
|
"logps/rejected": -480.71881103515625, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09721177071332932, |
|
"rewards/margins": 0.04050077125430107, |
|
"rewards/rejected": -0.1377125382423401, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.763857324837321e-07, |
|
"logits/chosen": -0.1642087996006012, |
|
"logits/rejected": -0.230143740773201, |
|
"logps/chosen": -522.2006225585938, |
|
"logps/rejected": -582.2677612304688, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.11323000490665436, |
|
"rewards/margins": 0.06264480203390121, |
|
"rewards/rejected": -0.17587482929229736, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.429643575928605e-07, |
|
"logits/chosen": -0.2773049473762512, |
|
"logits/rejected": -0.1615469753742218, |
|
"logps/chosen": -547.8629760742188, |
|
"logps/rejected": -587.5028076171875, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.11454708874225616, |
|
"rewards/margins": 0.0604601725935936, |
|
"rewards/rejected": -0.17500726878643036, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.101522917128709e-07, |
|
"logits/chosen": -0.16052532196044922, |
|
"logits/rejected": -0.25322675704956055, |
|
"logps/chosen": -515.0181884765625, |
|
"logps/rejected": -600.516357421875, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12128403037786484, |
|
"rewards/margins": 0.06581829488277435, |
|
"rewards/rejected": -0.1871023178100586, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.779609134902312e-07, |
|
"logits/chosen": -0.2754663825035095, |
|
"logits/rejected": -0.19558298587799072, |
|
"logps/chosen": -479.9947204589844, |
|
"logps/rejected": -522.5654296875, |
|
"loss": 0.1816, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.10307195037603378, |
|
"rewards/margins": 0.05991575866937637, |
|
"rewards/rejected": -0.16298770904541016, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.464013863278629e-07, |
|
"logits/chosen": -0.3147231340408325, |
|
"logits/rejected": -0.12713433802127838, |
|
"logps/chosen": -576.8265380859375, |
|
"logps/rejected": -582.334716796875, |
|
"loss": 0.2017, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13038143515586853, |
|
"rewards/margins": 0.05221244692802429, |
|
"rewards/rejected": -0.18259385228157043, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.154846545138696e-07, |
|
"logits/chosen": -0.20450320839881897, |
|
"logits/rejected": -0.2868250906467438, |
|
"logps/chosen": -535.9012451171875, |
|
"logps/rejected": -593.1867065429688, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11613848060369492, |
|
"rewards/margins": 0.05925974249839783, |
|
"rewards/rejected": -0.17539823055267334, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.852214394262515e-07, |
|
"logits/chosen": -0.20174920558929443, |
|
"logits/rejected": -0.17505855858325958, |
|
"logps/chosen": -544.5908813476562, |
|
"logps/rejected": -570.7057495117188, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.12965530157089233, |
|
"rewards/margins": 0.03963715583086014, |
|
"rewards/rejected": -0.16929244995117188, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.556222358149191e-07, |
|
"logits/chosen": -0.17067115008831024, |
|
"logits/rejected": -0.23377446830272675, |
|
"logps/chosen": -544.231201171875, |
|
"logps/rejected": -599.497802734375, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11236411333084106, |
|
"rewards/margins": 0.05633332207798958, |
|
"rewards/rejected": -0.16869743168354034, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.266973081622992e-07, |
|
"logits/chosen": -0.2045322209596634, |
|
"logits/rejected": -0.19451120495796204, |
|
"logps/chosen": -543.3025512695312, |
|
"logps/rejected": -558.47119140625, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.12296583503484726, |
|
"rewards/margins": 0.03358592838048935, |
|
"rewards/rejected": -0.1565517634153366, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.984566871237942e-07, |
|
"logits/chosen": -0.23366038501262665, |
|
"logits/rejected": -0.2533060610294342, |
|
"logps/chosen": -494.28082275390625, |
|
"logps/rejected": -536.08251953125, |
|
"loss": 0.1975, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.11815425008535385, |
|
"rewards/margins": 0.03765954449772835, |
|
"rewards/rejected": -0.1558137983083725, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.709101660493251e-07, |
|
"logits/chosen": -0.25420263409614563, |
|
"logits/rejected": -0.10746750980615616, |
|
"logps/chosen": -496.64349365234375, |
|
"logps/rejected": -555.2488403320312, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10732255131006241, |
|
"rewards/margins": 0.05255774408578873, |
|
"rewards/rejected": -0.15988029539585114, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.440672975871743e-07, |
|
"logits/chosen": -0.230036661028862, |
|
"logits/rejected": -0.16146495938301086, |
|
"logps/chosen": -498.93365478515625, |
|
"logps/rejected": -519.977783203125, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.09960173070430756, |
|
"rewards/margins": 0.04401581361889839, |
|
"rewards/rejected": -0.14361754059791565, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1793739037129134e-07, |
|
"logits/chosen": -0.14786578714847565, |
|
"logits/rejected": -0.1643018275499344, |
|
"logps/chosen": -516.0054931640625, |
|
"logps/rejected": -543.8809814453125, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10642917454242706, |
|
"rewards/margins": 0.05054362863302231, |
|
"rewards/rejected": -0.15697282552719116, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9252950579322405e-07, |
|
"logits/chosen": -0.26063111424446106, |
|
"logits/rejected": -0.20259733498096466, |
|
"logps/chosen": -477.3902893066406, |
|
"logps/rejected": -544.326416015625, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11126448959112167, |
|
"rewards/margins": 0.053704213351011276, |
|
"rewards/rejected": -0.16496869921684265, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6785245485978864e-07, |
|
"logits/chosen": -0.19165876507759094, |
|
"logits/rejected": -0.2015565186738968, |
|
"logps/chosen": -514.6396484375, |
|
"logps/rejected": -582.6300659179688, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11115659773349762, |
|
"rewards/margins": 0.05640328675508499, |
|
"rewards/rejected": -0.1675598919391632, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.43914795137566e-07, |
|
"logits/chosen": -0.24871298670768738, |
|
"logits/rejected": -0.2329408824443817, |
|
"logps/chosen": -527.1490478515625, |
|
"logps/rejected": -558.3999633789062, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10850969702005386, |
|
"rewards/margins": 0.04025677964091301, |
|
"rewards/rejected": -0.14876648783683777, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.207248277852901e-07, |
|
"logits/chosen": -0.1298540085554123, |
|
"logits/rejected": -0.22532661259174347, |
|
"logps/chosen": -549.50390625, |
|
"logps/rejected": -626.0857543945312, |
|
"loss": 0.1837, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13459596037864685, |
|
"rewards/margins": 0.053374581038951874, |
|
"rewards/rejected": -0.18797054886817932, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9829059467515074e-07, |
|
"logits/chosen": -0.1630309522151947, |
|
"logits/rejected": -0.2934538722038269, |
|
"logps/chosen": -513.16943359375, |
|
"logps/rejected": -542.3517456054688, |
|
"loss": 0.1856, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1167718768119812, |
|
"rewards/margins": 0.04849160462617874, |
|
"rewards/rejected": -0.16526347398757935, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.766198756040153e-07, |
|
"logits/chosen": -0.24708867073059082, |
|
"logits/rejected": -0.21414759755134583, |
|
"logps/chosen": -569.2642822265625, |
|
"logps/rejected": -592.0335693359375, |
|
"loss": 0.1832, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1170465499162674, |
|
"rewards/margins": 0.04982801154255867, |
|
"rewards/rejected": -0.16687455773353577, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5572018559553155e-07, |
|
"logits/chosen": -0.2323312759399414, |
|
"logits/rejected": -0.0636955201625824, |
|
"logps/chosen": -530.3974609375, |
|
"logps/rejected": -546.2402954101562, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10811781883239746, |
|
"rewards/margins": 0.05536898970603943, |
|
"rewards/rejected": -0.1634868085384369, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3559877229404864e-07, |
|
"logits/chosen": -0.2858187258243561, |
|
"logits/rejected": -0.16766619682312012, |
|
"logps/chosen": -497.91876220703125, |
|
"logps/rejected": -530.9742431640625, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09212108701467514, |
|
"rewards/margins": 0.06050460413098335, |
|
"rewards/rejected": -0.1526256948709488, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1626261345126576e-07, |
|
"logits/chosen": -0.18948575854301453, |
|
"logits/rejected": -0.2522360682487488, |
|
"logps/chosen": -498.93951416015625, |
|
"logps/rejected": -548.31787109375, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.10647443681955338, |
|
"rewards/margins": 0.06186903268098831, |
|
"rewards/rejected": -0.1683434695005417, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9771841450646505e-07, |
|
"logits/chosen": -0.20894873142242432, |
|
"logits/rejected": -0.07307926565408707, |
|
"logps/chosen": -496.451416015625, |
|
"logps/rejected": -536.22119140625, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10921342670917511, |
|
"rewards/margins": 0.05700286105275154, |
|
"rewards/rejected": -0.16621626913547516, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7997260626118758e-07, |
|
"logits/chosen": -0.2548653483390808, |
|
"logits/rejected": -0.2179504930973053, |
|
"logps/chosen": -520.787841796875, |
|
"logps/rejected": -576.6990966796875, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10316022485494614, |
|
"rewards/margins": 0.06640519946813583, |
|
"rewards/rejected": -0.16956540942192078, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6303134264914365e-07, |
|
"logits/chosen": -0.14907491207122803, |
|
"logits/rejected": -0.18132783472537994, |
|
"logps/chosen": -493.564697265625, |
|
"logps/rejected": -568.8427734375, |
|
"loss": 0.1922, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.11788590997457504, |
|
"rewards/margins": 0.05090482905507088, |
|
"rewards/rejected": -0.16879074275493622, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.469004986021355e-07, |
|
"logits/chosen": -0.2242707461118698, |
|
"logits/rejected": -0.16642411053180695, |
|
"logps/chosen": -527.6796875, |
|
"logps/rejected": -568.4700927734375, |
|
"loss": 0.1721, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10909800231456757, |
|
"rewards/margins": 0.05576147884130478, |
|
"rewards/rejected": -0.16485948860645294, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.315856680127367e-07, |
|
"logits/chosen": -0.1916467398405075, |
|
"logits/rejected": -0.21406717598438263, |
|
"logps/chosen": -564.77880859375, |
|
"logps/rejected": -602.3505859375, |
|
"loss": 0.1906, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12277360260486603, |
|
"rewards/margins": 0.04569784551858902, |
|
"rewards/rejected": -0.16847144067287445, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1709216179442817e-07, |
|
"logits/chosen": -0.20755720138549805, |
|
"logits/rejected": -0.1797691434621811, |
|
"logps/chosen": -478.31634521484375, |
|
"logps/rejected": -570.5222778320312, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10136885941028595, |
|
"rewards/margins": 0.07469947636127472, |
|
"rewards/rejected": -0.17606833577156067, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0342500603986421e-07, |
|
"logits/chosen": -0.15231382846832275, |
|
"logits/rejected": -0.20254746079444885, |
|
"logps/chosen": -493.1875915527344, |
|
"logps/rejected": -561.9268188476562, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.10837433487176895, |
|
"rewards/margins": 0.05267762392759323, |
|
"rewards/rejected": -0.16105195879936218, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.058894027791643e-08, |
|
"logits/chosen": -0.33545562624931335, |
|
"logits/rejected": -0.14177361130714417, |
|
"logps/chosen": -489.1234436035156, |
|
"logps/rejected": -541.3236694335938, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10399062931537628, |
|
"rewards/margins": 0.052609704434871674, |
|
"rewards/rejected": -0.15660032629966736, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.858841583008592e-08, |
|
"logits/chosen": -0.18932399153709412, |
|
"logits/rejected": -0.16892239451408386, |
|
"logps/chosen": -535.3873291015625, |
|
"logps/rejected": -541.3253173828125, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11377620697021484, |
|
"rewards/margins": 0.04663427174091339, |
|
"rewards/rejected": -0.16041049361228943, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.742759426686313e-08, |
|
"logits/chosen": -0.171333447098732, |
|
"logits/rejected": -0.1328922063112259, |
|
"logps/chosen": -491.8202209472656, |
|
"logps/rejected": -543.0697021484375, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1006278246641159, |
|
"rewards/margins": 0.05380718782544136, |
|
"rewards/rejected": -0.15443500876426697, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.7110345964571104e-08, |
|
"logits/chosen": -0.24088236689567566, |
|
"logits/rejected": -0.15874245762825012, |
|
"logps/chosen": -508.15496826171875, |
|
"logps/rejected": -554.2071533203125, |
|
"loss": 0.1761, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.10783319175243378, |
|
"rewards/margins": 0.05231726914644241, |
|
"rewards/rejected": -0.1601504534482956, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.764024876318357e-08, |
|
"logits/chosen": -0.35891515016555786, |
|
"logits/rejected": -0.10517171770334244, |
|
"logps/chosen": -558.290771484375, |
|
"logps/rejected": -577.0843505859375, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1132727861404419, |
|
"rewards/margins": 0.04631791263818741, |
|
"rewards/rejected": -0.1595907062292099, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.902058672559633e-08, |
|
"logits/chosen": -0.25900477170944214, |
|
"logits/rejected": -0.2353241741657257, |
|
"logps/chosen": -511.06231689453125, |
|
"logps/rejected": -571.1373291015625, |
|
"loss": 0.181, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1033167615532875, |
|
"rewards/margins": 0.05643168091773987, |
|
"rewards/rejected": -0.15974844992160797, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.125434899876933e-08, |
|
"logits/chosen": -0.25551778078079224, |
|
"logits/rejected": -0.1563359797000885, |
|
"logps/chosen": -518.1477661132812, |
|
"logps/rejected": -544.9768676757812, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1060783639550209, |
|
"rewards/margins": 0.061492882668972015, |
|
"rewards/rejected": -0.16757124662399292, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4344228777145873e-08, |
|
"logits/chosen": -0.306395947933197, |
|
"logits/rejected": -0.20862647891044617, |
|
"logps/chosen": -527.55908203125, |
|
"logps/rejected": -568.0324096679688, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11323089897632599, |
|
"rewards/margins": 0.051735300570726395, |
|
"rewards/rejected": -0.16496619582176208, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.829262236869772e-08, |
|
"logits/chosen": -0.2206902951002121, |
|
"logits/rejected": -0.22108717262744904, |
|
"logps/chosen": -513.4564208984375, |
|
"logps/rejected": -591.93701171875, |
|
"loss": 0.1774, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10885292291641235, |
|
"rewards/margins": 0.06501305848360062, |
|
"rewards/rejected": -0.17386598885059357, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3101628363929586e-08, |
|
"logits/chosen": -0.11251676082611084, |
|
"logits/rejected": -0.19971203804016113, |
|
"logps/chosen": -483.9027404785156, |
|
"logps/rejected": -568.6507568359375, |
|
"loss": 0.1638, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0930183082818985, |
|
"rewards/margins": 0.07240499556064606, |
|
"rewards/rejected": -0.16542330384254456, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.773046908123195e-09, |
|
"logits/chosen": -0.10743804275989532, |
|
"logits/rejected": -0.14361344277858734, |
|
"logps/chosen": -578.63671875, |
|
"logps/rejected": -649.34423828125, |
|
"loss": 0.1712, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12350692600011826, |
|
"rewards/margins": 0.06949726492166519, |
|
"rewards/rejected": -0.19300420582294464, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.308379077080817e-09, |
|
"logits/chosen": -0.2110254317522049, |
|
"logits/rejected": -0.14409136772155762, |
|
"logps/chosen": -484.0826110839844, |
|
"logps/rejected": -534.74755859375, |
|
"loss": 0.1695, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09997762739658356, |
|
"rewards/margins": 0.05725119262933731, |
|
"rewards/rejected": -0.15722879767417908, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7088263565760996e-09, |
|
"logits/chosen": -0.2690781056880951, |
|
"logits/rejected": -0.10777749866247177, |
|
"logps/chosen": -531.328125, |
|
"logps/rejected": -558.0218505859375, |
|
"loss": 0.192, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11898232996463776, |
|
"rewards/margins": 0.04323791339993477, |
|
"rewards/rejected": -0.16222023963928223, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.752902257023633e-10, |
|
"logits/chosen": -0.2458486258983612, |
|
"logits/rejected": -0.14588430523872375, |
|
"logps/chosen": -481.7171936035156, |
|
"logps/rejected": -523.7376708984375, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.10408926010131836, |
|
"rewards/margins": 0.047747764736413956, |
|
"rewards/rejected": -0.15183702111244202, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.083718442532189e-10, |
|
"logits/chosen": -0.2206140011548996, |
|
"logits/rejected": -0.13354091346263885, |
|
"logps/chosen": -543.6251220703125, |
|
"logps/rejected": -563.88330078125, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11700856685638428, |
|
"rewards/margins": 0.05272556096315384, |
|
"rewards/rejected": -0.1697341352701187, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1875, |
|
"total_flos": 0.0, |
|
"train_loss": 0.016774978733062745, |
|
"train_runtime": 1063.735, |
|
"train_samples_per_second": 28.203, |
|
"train_steps_per_second": 1.763 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|