|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9968602825745683, |
|
"eval_steps": 100, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6315789473684213e-07, |
|
"logits/chosen": 0.8826487064361572, |
|
"logits/rejected": 0.921362042427063, |
|
"logps/chosen": -36.58121871948242, |
|
"logps/rejected": -54.902320861816406, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.631578947368421e-06, |
|
"logits/chosen": 0.8915393352508545, |
|
"logits/rejected": 0.8742258548736572, |
|
"logps/chosen": -87.77196502685547, |
|
"logps/rejected": -96.38507843017578, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.2569444477558136, |
|
"rewards/chosen": 0.0003006549668498337, |
|
"rewards/margins": 0.0004423653008416295, |
|
"rewards/rejected": -0.00014171031943988055, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999578104083307e-06, |
|
"logits/chosen": 0.7802013158798218, |
|
"logits/rejected": 0.8469624519348145, |
|
"logps/chosen": -91.75413513183594, |
|
"logps/rejected": -85.1173095703125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": 3.31846640619915e-05, |
|
"rewards/margins": -0.00015664812235627323, |
|
"rewards/rejected": 0.00018983279005624354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.949122667718935e-06, |
|
"logits/chosen": 0.8652933835983276, |
|
"logits/rejected": 0.848902702331543, |
|
"logps/chosen": -85.29698944091797, |
|
"logps/rejected": -78.0544204711914, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.0002226830692961812, |
|
"rewards/margins": 0.00038711068918928504, |
|
"rewards/rejected": -0.00016442763444501907, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8162351680370046e-06, |
|
"logits/chosen": 0.7649837136268616, |
|
"logits/rejected": 0.841802716255188, |
|
"logps/chosen": -122.76881408691406, |
|
"logps/rejected": -108.8086166381836, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.000362707010935992, |
|
"rewards/margins": -7.97106167738093e-06, |
|
"rewards/rejected": -0.0003547359665390104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.605388304968915e-06, |
|
"logits/chosen": 0.8386613130569458, |
|
"logits/rejected": 0.8677732348442078, |
|
"logps/chosen": -65.77490997314453, |
|
"logps/rejected": -71.66779327392578, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": -0.00019812444224953651, |
|
"rewards/margins": -0.00015129637904465199, |
|
"rewards/rejected": -4.682801591116004e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.323678718546552e-06, |
|
"logits/chosen": 0.8152744174003601, |
|
"logits/rejected": 0.8866288065910339, |
|
"logps/chosen": -117.6309814453125, |
|
"logps/rejected": -110.9274673461914, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 2.0224386389600113e-05, |
|
"rewards/margins": -1.7978531104745343e-05, |
|
"rewards/rejected": 3.820297933998518e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.980588131662451e-06, |
|
"logits/chosen": 0.8049997091293335, |
|
"logits/rejected": 0.8617580533027649, |
|
"logps/chosen": -82.61628723144531, |
|
"logps/rejected": -83.92156982421875, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.00046920054592192173, |
|
"rewards/margins": 0.000604915083386004, |
|
"rewards/rejected": -0.00013571445015259087, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5876642162051833e-06, |
|
"logits/chosen": 0.7660447955131531, |
|
"logits/rejected": 0.8296969532966614, |
|
"logps/chosen": -105.82564544677734, |
|
"logps/rejected": -104.93913269042969, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": -0.0001158899613074027, |
|
"rewards/margins": -0.00039075990207493305, |
|
"rewards/rejected": 0.0002748699625954032, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1581319239114983e-06, |
|
"logits/chosen": 0.8339036107063293, |
|
"logits/rejected": 0.8548374176025391, |
|
"logps/chosen": -77.63902282714844, |
|
"logps/rejected": -90.03132629394531, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.0005246674409136176, |
|
"rewards/margins": 0.0012112573022022843, |
|
"rewards/rejected": -0.0006865898030810058, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.7064483636808314e-06, |
|
"logits/chosen": 0.7685378789901733, |
|
"logits/rejected": 0.8903138041496277, |
|
"logps/chosen": -91.08631896972656, |
|
"logps/rejected": -101.58308410644531, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.00034642056562006474, |
|
"rewards/margins": 0.00024903417215682566, |
|
"rewards/rejected": -0.0005954547086730599, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": 0.8011398911476135, |
|
"eval_logits/rejected": 0.8187842965126038, |
|
"eval_logps/chosen": -91.76709747314453, |
|
"eval_logps/rejected": -94.26233673095703, |
|
"eval_loss": 0.009981811977922916, |
|
"eval_rewards/accuracies": 0.25, |
|
"eval_rewards/chosen": -0.00039719167398288846, |
|
"eval_rewards/margins": 0.00025612558238208294, |
|
"eval_rewards/rejected": -0.0006533172563649714, |
|
"eval_runtime": 274.2663, |
|
"eval_samples_per_second": 7.292, |
|
"eval_steps_per_second": 0.456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.996716052911017e-06, |
|
"logits/chosen": 0.807642936706543, |
|
"logits/rejected": 0.8303602337837219, |
|
"logps/chosen": -110.20500183105469, |
|
"logps/rejected": -96.52973937988281, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0005453795311041176, |
|
"rewards/margins": 0.0005892693297937512, |
|
"rewards/rejected": -0.0011346489191055298, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": 0.8457789421081543, |
|
"logits/rejected": 0.9221089482307434, |
|
"logps/chosen": -84.42936706542969, |
|
"logps/rejected": -80.35577392578125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.0005200408631935716, |
|
"rewards/margins": -2.1466799807967618e-05, |
|
"rewards/rejected": -0.0004985741106793284, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9806521797692184e-06, |
|
"logits/chosen": 0.7757605910301208, |
|
"logits/rejected": 0.8338298797607422, |
|
"logps/chosen": -91.54916381835938, |
|
"logps/rejected": -80.19969177246094, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.00031306734308600426, |
|
"rewards/margins": 0.0007860729238018394, |
|
"rewards/rejected": -0.0010991402668878436, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": 0.8086369633674622, |
|
"logits/rejected": 0.8897687792778015, |
|
"logps/chosen": -102.42750549316406, |
|
"logps/rejected": -121.26502990722656, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.0017349247355014086, |
|
"rewards/margins": -0.0009382988209836185, |
|
"rewards/rejected": -0.0007966257398948073, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.95129120635556e-06, |
|
"logits/chosen": 0.8270760774612427, |
|
"logits/rejected": 0.8866473436355591, |
|
"logps/chosen": -98.09243774414062, |
|
"logps/rejected": -75.63480377197266, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.0008643465116620064, |
|
"rewards/margins": 0.0005676061264239252, |
|
"rewards/rejected": -0.0014319528127089143, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": 0.7157760858535767, |
|
"logits/rejected": 0.7634655237197876, |
|
"logps/chosen": -80.0224609375, |
|
"logps/rejected": -73.7667236328125, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.0008890128810890019, |
|
"rewards/margins": 0.0006957318400964141, |
|
"rewards/rejected": -0.001584744779393077, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.908790517010637e-06, |
|
"logits/chosen": 0.8680498003959656, |
|
"logits/rejected": 0.9332998394966125, |
|
"logps/chosen": -72.52728271484375, |
|
"logps/rejected": -81.39039611816406, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0010830673854798079, |
|
"rewards/margins": 0.0011036808136850595, |
|
"rewards/rejected": -0.0021867481991648674, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": 0.7853900790214539, |
|
"logits/rejected": 0.8009947538375854, |
|
"logps/chosen": -98.69686889648438, |
|
"logps/rejected": -94.05746459960938, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.0003774884680751711, |
|
"rewards/margins": 0.0011976181995123625, |
|
"rewards/rejected": -0.001575106754899025, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.853377929214243e-06, |
|
"logits/chosen": 0.7380321621894836, |
|
"logits/rejected": 0.8313556909561157, |
|
"logps/chosen": -97.42859649658203, |
|
"logps/rejected": -107.14707946777344, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0015280761290341616, |
|
"rewards/margins": 0.0013618851080536842, |
|
"rewards/rejected": -0.0028899614699184895, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": 0.7754073143005371, |
|
"logits/rejected": 0.8368635177612305, |
|
"logps/chosen": -77.53565216064453, |
|
"logps/rejected": -86.88373565673828, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.0016312580555677414, |
|
"rewards/margins": 0.002017855178564787, |
|
"rewards/rejected": -0.0036491132341325283, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.79280686378479, |
|
"eval_logits/rejected": 0.8107121586799622, |
|
"eval_logps/chosen": -91.90320587158203, |
|
"eval_logps/rejected": -94.51907348632812, |
|
"eval_loss": 0.009800148196518421, |
|
"eval_rewards/accuracies": 0.3059999942779541, |
|
"eval_rewards/chosen": -0.0017583017470315099, |
|
"eval_rewards/margins": 0.0014622843591496348, |
|
"eval_rewards/rejected": -0.0032205861061811447, |
|
"eval_runtime": 274.5029, |
|
"eval_samples_per_second": 7.286, |
|
"eval_steps_per_second": 0.455, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.785350472409792e-06, |
|
"logits/chosen": 0.8355112075805664, |
|
"logits/rejected": 0.8323017358779907, |
|
"logps/chosen": -93.525146484375, |
|
"logps/rejected": -86.69541931152344, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.002141474513337016, |
|
"rewards/margins": 0.0012898927088826895, |
|
"rewards/rejected": -0.0034313672222197056, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": 0.7978548407554626, |
|
"logits/rejected": 0.8422772288322449, |
|
"logps/chosen": -100.98792266845703, |
|
"logps/rejected": -95.15986633300781, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.0021895477548241615, |
|
"rewards/margins": 0.0018103765323758125, |
|
"rewards/rejected": -0.003999924287199974, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7050727958301505e-06, |
|
"logits/chosen": 0.8680189251899719, |
|
"logits/rejected": 0.8407084345817566, |
|
"logps/chosen": -88.7295913696289, |
|
"logps/rejected": -93.2841796875, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0033182327169924974, |
|
"rewards/margins": 0.001298791728913784, |
|
"rewards/rejected": -0.004617024213075638, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": 0.7951023578643799, |
|
"logits/rejected": 0.8105288743972778, |
|
"logps/chosen": -115.34117126464844, |
|
"logps/rejected": -101.9451904296875, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.002927349414676428, |
|
"rewards/margins": 0.0025170063599944115, |
|
"rewards/rejected": -0.005444356240332127, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.612975213859487e-06, |
|
"logits/chosen": 0.7722874879837036, |
|
"logits/rejected": 0.836758017539978, |
|
"logps/chosen": -116.63499450683594, |
|
"logps/rejected": -107.69172668457031, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.002974079456180334, |
|
"rewards/margins": 0.002990330569446087, |
|
"rewards/rejected": -0.005964409559965134, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": 0.7915774583816528, |
|
"logits/rejected": 0.8718246221542358, |
|
"logps/chosen": -99.33454132080078, |
|
"logps/rejected": -88.96430969238281, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.00338442693464458, |
|
"rewards/margins": 0.00286421668715775, |
|
"rewards/rejected": -0.006248644087463617, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.509551399408598e-06, |
|
"logits/chosen": 0.886553168296814, |
|
"logits/rejected": 0.9329681396484375, |
|
"logps/chosen": -114.57804870605469, |
|
"logps/rejected": -83.83116149902344, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.003462123218923807, |
|
"rewards/margins": 0.003769775154069066, |
|
"rewards/rejected": -0.007231898605823517, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": 0.8362187147140503, |
|
"logits/rejected": 0.8351699113845825, |
|
"logps/chosen": -121.24173736572266, |
|
"logps/rejected": -109.759521484375, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.004291792865842581, |
|
"rewards/margins": 0.0028506286907941103, |
|
"rewards/rejected": -0.007142421789467335, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.3953557376679856e-06, |
|
"logits/chosen": 0.820317268371582, |
|
"logits/rejected": 0.8414691686630249, |
|
"logps/chosen": -95.08177185058594, |
|
"logps/rejected": -92.17173767089844, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.006106963846832514, |
|
"rewards/margins": 0.0012445768807083368, |
|
"rewards/rejected": -0.007351540960371494, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": 0.7827913165092468, |
|
"logits/rejected": 0.8337699174880981, |
|
"logps/chosen": -79.63487243652344, |
|
"logps/rejected": -88.0806884765625, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.0031589865684509277, |
|
"rewards/margins": 0.0048677194863557816, |
|
"rewards/rejected": -0.00802670605480671, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.7800281643867493, |
|
"eval_logits/rejected": 0.7981991171836853, |
|
"eval_logps/chosen": -92.30917358398438, |
|
"eval_logps/rejected": -95.08193969726562, |
|
"eval_loss": 0.009581359103322029, |
|
"eval_rewards/accuracies": 0.3059999942779541, |
|
"eval_rewards/chosen": -0.005818030331283808, |
|
"eval_rewards/margins": 0.0030311874579638243, |
|
"eval_rewards/rejected": -0.008849218487739563, |
|
"eval_runtime": 274.4906, |
|
"eval_samples_per_second": 7.286, |
|
"eval_steps_per_second": 0.455, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.2710003544234255e-06, |
|
"logits/chosen": 0.7880622744560242, |
|
"logits/rejected": 0.8204299211502075, |
|
"logps/chosen": -86.09496307373047, |
|
"logps/rejected": -87.77622985839844, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.006854506675153971, |
|
"rewards/margins": 0.0034410678781569004, |
|
"rewards/rejected": -0.010295574553310871, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": 0.7853392958641052, |
|
"logits/rejected": 0.8362796902656555, |
|
"logps/chosen": -88.15892028808594, |
|
"logps/rejected": -91.532470703125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0067759170196950436, |
|
"rewards/margins": 0.003778536571189761, |
|
"rewards/rejected": -0.010554454289376736, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.137151834863213e-06, |
|
"logits/chosen": 0.7346684336662292, |
|
"logits/rejected": 0.7727020382881165, |
|
"logps/chosen": -101.71944427490234, |
|
"logps/rejected": -102.30587005615234, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.007574302610009909, |
|
"rewards/margins": 0.004506202414631844, |
|
"rewards/rejected": -0.012080504558980465, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": 0.7809281945228577, |
|
"logits/rejected": 0.8105946779251099, |
|
"logps/chosen": -83.26361846923828, |
|
"logps/rejected": -73.31196594238281, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.004678535740822554, |
|
"rewards/margins": 0.005057544447481632, |
|
"rewards/rejected": -0.009736080653965473, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.994527650465352e-06, |
|
"logits/chosen": 0.8511131405830383, |
|
"logits/rejected": 0.8692939877510071, |
|
"logps/chosen": -89.02024841308594, |
|
"logps/rejected": -83.25767517089844, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.009549392387270927, |
|
"rewards/margins": 0.004139441065490246, |
|
"rewards/rejected": -0.013688832521438599, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": 0.7794450521469116, |
|
"logits/rejected": 0.7929707765579224, |
|
"logps/chosen": -95.35264587402344, |
|
"logps/rejected": -99.0013198852539, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.009752290323376656, |
|
"rewards/margins": 0.005257262382656336, |
|
"rewards/rejected": -0.015009550377726555, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.843892313117724e-06, |
|
"logits/chosen": 0.8562378883361816, |
|
"logits/rejected": 0.8364348411560059, |
|
"logps/chosen": -108.0776596069336, |
|
"logps/rejected": -104.81854248046875, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.008355715312063694, |
|
"rewards/margins": 0.0053038811311125755, |
|
"rewards/rejected": -0.01365959644317627, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": 0.7989660501480103, |
|
"logits/rejected": 0.8410453796386719, |
|
"logps/chosen": -102.3397216796875, |
|
"logps/rejected": -88.8924789428711, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.008961253799498081, |
|
"rewards/margins": 0.006044700741767883, |
|
"rewards/rejected": -0.015005955472588539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.686053277086401e-06, |
|
"logits/chosen": 0.7839023470878601, |
|
"logits/rejected": 0.8361980319023132, |
|
"logps/chosen": -110.81888580322266, |
|
"logps/rejected": -100.21189880371094, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.007862605154514313, |
|
"rewards/margins": 0.007173668593168259, |
|
"rewards/rejected": -0.015036274679005146, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": 0.7966210246086121, |
|
"logits/rejected": 0.82738196849823, |
|
"logps/chosen": -100.70481872558594, |
|
"logps/rejected": -103.6056137084961, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.01019311510026455, |
|
"rewards/margins": 0.006234516389667988, |
|
"rewards/rejected": -0.01642763242125511, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.7564685344696045, |
|
"eval_logits/rejected": 0.7753021121025085, |
|
"eval_logps/chosen": -92.824951171875, |
|
"eval_logps/rejected": -95.7641830444336, |
|
"eval_loss": 0.009432977996766567, |
|
"eval_rewards/accuracies": 0.33399999141693115, |
|
"eval_rewards/chosen": -0.010975906625390053, |
|
"eval_rewards/margins": 0.004695890471339226, |
|
"eval_rewards/rejected": -0.01567179709672928, |
|
"eval_runtime": 274.283, |
|
"eval_samples_per_second": 7.292, |
|
"eval_steps_per_second": 0.456, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5218566107988872e-06, |
|
"logits/chosen": 0.7925983667373657, |
|
"logits/rejected": 0.7887567281723022, |
|
"logps/chosen": -96.2036361694336, |
|
"logps/rejected": -81.12339782714844, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.011769723147153854, |
|
"rewards/margins": 0.004597696475684643, |
|
"rewards/rejected": -0.016367420554161072, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": 0.7313151359558105, |
|
"logits/rejected": 0.791450560092926, |
|
"logps/chosen": -96.06840515136719, |
|
"logps/rejected": -98.45843505859375, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.012706448324024677, |
|
"rewards/margins": 0.004676566459238529, |
|
"rewards/rejected": -0.017383014783263206, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.352182461642929e-06, |
|
"logits/chosen": 0.836865246295929, |
|
"logits/rejected": 0.8605507612228394, |
|
"logps/chosen": -83.78216552734375, |
|
"logps/rejected": -90.726806640625, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.010321888141334057, |
|
"rewards/margins": 0.0064377314411103725, |
|
"rewards/rejected": -0.01675962097942829, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": 0.7546414136886597, |
|
"logits/rejected": 0.8063668012619019, |
|
"logps/chosen": -110.22758483886719, |
|
"logps/rejected": -82.65476989746094, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.009618332609534264, |
|
"rewards/margins": 0.007489732000976801, |
|
"rewards/rejected": -0.017108064144849777, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.177940338091043e-06, |
|
"logits/chosen": 0.7461640238761902, |
|
"logits/rejected": 0.8213142156600952, |
|
"logps/chosen": -81.55519104003906, |
|
"logps/rejected": -72.42239379882812, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.009043378755450249, |
|
"rewards/margins": 0.005746514070779085, |
|
"rewards/rejected": -0.014789892360568047, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": 0.7719414830207825, |
|
"logits/rejected": 0.8252090215682983, |
|
"logps/chosen": -106.2469482421875, |
|
"logps/rejected": -88.18677520751953, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.009636683389544487, |
|
"rewards/margins": 0.008729028515517712, |
|
"rewards/rejected": -0.018365712836384773, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0000642344401115e-06, |
|
"logits/chosen": 0.8243037462234497, |
|
"logits/rejected": 0.8203719854354858, |
|
"logps/chosen": -116.20052337646484, |
|
"logps/rejected": -111.63053131103516, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.01138549018651247, |
|
"rewards/margins": 0.00896347127854824, |
|
"rewards/rejected": -0.020348962396383286, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": 0.8390713930130005, |
|
"logits/rejected": 0.8485578298568726, |
|
"logps/chosen": -76.21663665771484, |
|
"logps/rejected": -66.13563537597656, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": -0.011686747893691063, |
|
"rewards/margins": 0.003907538950443268, |
|
"rewards/rejected": -0.01559428684413433, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.8195076242990124e-06, |
|
"logits/chosen": 0.781129777431488, |
|
"logits/rejected": 0.8074111938476562, |
|
"logps/chosen": -93.35603332519531, |
|
"logps/rejected": -100.03508758544922, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.014726340770721436, |
|
"rewards/margins": 0.004594289697706699, |
|
"rewards/rejected": -0.01932062767446041, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": 0.8007243871688843, |
|
"logits/rejected": 0.7850581407546997, |
|
"logps/chosen": -102.482177734375, |
|
"logps/rejected": -102.58174133300781, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.012946246191859245, |
|
"rewards/margins": 0.009540179744362831, |
|
"rewards/rejected": -0.022486427798867226, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_logits/chosen": 0.7492075562477112, |
|
"eval_logits/rejected": 0.7678821682929993, |
|
"eval_logps/chosen": -93.04632568359375, |
|
"eval_logps/rejected": -96.11502838134766, |
|
"eval_loss": 0.009345741011202335, |
|
"eval_rewards/accuracies": 0.3400000035762787, |
|
"eval_rewards/chosen": -0.013189575634896755, |
|
"eval_rewards/margins": 0.0059906188398599625, |
|
"eval_rewards/rejected": -0.019180195406079292, |
|
"eval_runtime": 272.2568, |
|
"eval_samples_per_second": 7.346, |
|
"eval_steps_per_second": 0.459, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.637238349660819e-06, |
|
"logits/chosen": 0.8036779165267944, |
|
"logits/rejected": 0.8043034672737122, |
|
"logps/chosen": -92.55281066894531, |
|
"logps/rejected": -90.49037170410156, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.014594177715480328, |
|
"rewards/margins": 0.005402697250247002, |
|
"rewards/rejected": -0.019996874034404755, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": 0.8009434938430786, |
|
"logits/rejected": 0.8440135717391968, |
|
"logps/chosen": -103.445068359375, |
|
"logps/rejected": -94.67227935791016, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.012103055603802204, |
|
"rewards/margins": 0.008832341991364956, |
|
"rewards/rejected": -0.02093539759516716, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4542334329558075e-06, |
|
"logits/chosen": 0.7876957058906555, |
|
"logits/rejected": 0.8697893023490906, |
|
"logps/chosen": -108.23606872558594, |
|
"logps/rejected": -90.39962768554688, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.012823512777686119, |
|
"rewards/margins": 0.00771692767739296, |
|
"rewards/rejected": -0.02054044045507908, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": 0.7362729907035828, |
|
"logits/rejected": 0.7685662508010864, |
|
"logps/chosen": -103.24140930175781, |
|
"logps/rejected": -88.57923889160156, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.013577492907643318, |
|
"rewards/margins": 0.006500400602817535, |
|
"rewards/rejected": -0.020077891647815704, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.271473839894331e-06, |
|
"logits/chosen": 0.727995753288269, |
|
"logits/rejected": 0.7580054402351379, |
|
"logps/chosen": -92.93126678466797, |
|
"logps/rejected": -84.96773529052734, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.012479904107749462, |
|
"rewards/margins": 0.006294636521488428, |
|
"rewards/rejected": -0.018774541094899178, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": 0.7571858167648315, |
|
"logits/rejected": 0.8027281761169434, |
|
"logps/chosen": -117.69078063964844, |
|
"logps/rejected": -117.47624206542969, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.014920057728886604, |
|
"rewards/margins": 0.00889978464692831, |
|
"rewards/rejected": -0.02381983958184719, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.089939221172446e-06, |
|
"logits/chosen": 0.7476423382759094, |
|
"logits/rejected": 0.7867797017097473, |
|
"logps/chosen": -96.03732299804688, |
|
"logps/rejected": -93.63196563720703, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.014638627879321575, |
|
"rewards/margins": 0.007565396372228861, |
|
"rewards/rejected": -0.022204022854566574, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": 0.7799301147460938, |
|
"logits/rejected": 0.7575241327285767, |
|
"logps/chosen": -106.30534362792969, |
|
"logps/rejected": -101.278564453125, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.015909461304545403, |
|
"rewards/margins": 0.0034894272685050964, |
|
"rewards/rejected": -0.01939888671040535, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9106026612264316e-06, |
|
"logits/chosen": 0.8206084370613098, |
|
"logits/rejected": 0.8035517930984497, |
|
"logps/chosen": -82.16791534423828, |
|
"logps/rejected": -81.92047882080078, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.015170453116297722, |
|
"rewards/margins": 0.006062434054911137, |
|
"rewards/rejected": -0.021232888102531433, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": 0.7637159824371338, |
|
"logits/rejected": 0.7464240789413452, |
|
"logps/chosen": -107.59675598144531, |
|
"logps/rejected": -108.54317474365234, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.016024868935346603, |
|
"rewards/margins": 0.006444328930228949, |
|
"rewards/rejected": -0.02246919646859169, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_logits/chosen": 0.7382512092590332, |
|
"eval_logits/rejected": 0.7577717900276184, |
|
"eval_logps/chosen": -93.16767120361328, |
|
"eval_logps/rejected": -96.26305389404297, |
|
"eval_loss": 0.00932270660996437, |
|
"eval_rewards/accuracies": 0.3440000116825104, |
|
"eval_rewards/chosen": -0.014403086155653, |
|
"eval_rewards/margins": 0.006257344502955675, |
|
"eval_rewards/rejected": -0.020660430192947388, |
|
"eval_runtime": 272.2545, |
|
"eval_samples_per_second": 7.346, |
|
"eval_steps_per_second": 0.459, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7344254621846018e-06, |
|
"logits/chosen": 0.7539538145065308, |
|
"logits/rejected": 0.773404598236084, |
|
"logps/chosen": -114.31193542480469, |
|
"logps/rejected": -104.0637435913086, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.013692615553736687, |
|
"rewards/margins": 0.007563846651464701, |
|
"rewards/rejected": -0.02125646360218525, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": 0.7876811027526855, |
|
"logits/rejected": 0.8452059626579285, |
|
"logps/chosen": -104.75425720214844, |
|
"logps/rejected": -99.91309356689453, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.016282986849546432, |
|
"rewards/margins": 0.007685069926083088, |
|
"rewards/rejected": -0.023968055844306946, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5623519909760953e-06, |
|
"logits/chosen": 0.7141858339309692, |
|
"logits/rejected": 0.7543732523918152, |
|
"logps/chosen": -94.53080749511719, |
|
"logps/rejected": -90.2872085571289, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.014917632564902306, |
|
"rewards/margins": 0.005212970077991486, |
|
"rewards/rejected": -0.02013060264289379, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": 0.7889922261238098, |
|
"logits/rejected": 0.8183968663215637, |
|
"logps/chosen": -90.72172546386719, |
|
"logps/rejected": -97.16673278808594, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.013522952795028687, |
|
"rewards/margins": 0.00625986885279417, |
|
"rewards/rejected": -0.019782820716500282, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.3953046172178413e-06, |
|
"logits/chosen": 0.7950395345687866, |
|
"logits/rejected": 0.851498007774353, |
|
"logps/chosen": -101.07930755615234, |
|
"logps/rejected": -114.48579406738281, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.016296306625008583, |
|
"rewards/margins": 0.008597773499786854, |
|
"rewards/rejected": -0.024894079193472862, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": 0.7387314438819885, |
|
"logits/rejected": 0.7517582178115845, |
|
"logps/chosen": -80.6947250366211, |
|
"logps/rejected": -86.94942474365234, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.01321180909872055, |
|
"rewards/margins": 0.0073172166012227535, |
|
"rewards/rejected": -0.02052902616560459, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.2341787690142436e-06, |
|
"logits/chosen": 0.6672351956367493, |
|
"logits/rejected": 0.6918442249298096, |
|
"logps/chosen": -115.32958984375, |
|
"logps/rejected": -96.97526550292969, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.014516430906951427, |
|
"rewards/margins": 0.009004795923829079, |
|
"rewards/rejected": -0.02352122589945793, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": 0.6968938708305359, |
|
"logits/rejected": 0.728511393070221, |
|
"logps/chosen": -76.32088470458984, |
|
"logps/rejected": -81.96578979492188, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.015820499509572983, |
|
"rewards/margins": 0.004979898687452078, |
|
"rewards/rejected": -0.0208003968000412, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.079838133172111e-06, |
|
"logits/chosen": 0.7537049055099487, |
|
"logits/rejected": 0.8117996454238892, |
|
"logps/chosen": -94.59342956542969, |
|
"logps/rejected": -80.20372009277344, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.014598280191421509, |
|
"rewards/margins": 0.008172960951924324, |
|
"rewards/rejected": -0.022771239280700684, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": 0.7987161874771118, |
|
"logits/rejected": 0.8007674217224121, |
|
"logps/chosen": -105.23921203613281, |
|
"logps/rejected": -97.75834655761719, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.017922451719641685, |
|
"rewards/margins": 0.008403345942497253, |
|
"rewards/rejected": -0.026325801387429237, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_logits/chosen": 0.7355116009712219, |
|
"eval_logits/rejected": 0.7544857859611511, |
|
"eval_logps/chosen": -93.24905395507812, |
|
"eval_logps/rejected": -96.31977844238281, |
|
"eval_loss": 0.009318255819380283, |
|
"eval_rewards/accuracies": 0.3479999899864197, |
|
"eval_rewards/chosen": -0.015216803178191185, |
|
"eval_rewards/margins": 0.006010868586599827, |
|
"eval_rewards/rejected": -0.021227672696113586, |
|
"eval_runtime": 272.5589, |
|
"eval_samples_per_second": 7.338, |
|
"eval_steps_per_second": 0.459, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.331100255592437e-07, |
|
"logits/chosen": 0.7550870180130005, |
|
"logits/rejected": 0.8333718180656433, |
|
"logps/chosen": -88.22233581542969, |
|
"logps/rejected": -81.27799224853516, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.01261846162378788, |
|
"rewards/margins": 0.007095014210790396, |
|
"rewards/rejected": -0.019713478162884712, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": 0.7663224339485168, |
|
"logits/rejected": 0.8483554720878601, |
|
"logps/chosen": -112.94798278808594, |
|
"logps/rejected": -100.04948425292969, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.014603344723582268, |
|
"rewards/margins": 0.008872651495039463, |
|
"rewards/rejected": -0.023475993424654007, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.947809564230446e-07, |
|
"logits/chosen": 0.7916958928108215, |
|
"logits/rejected": 0.8508380651473999, |
|
"logps/chosen": -96.55794525146484, |
|
"logps/rejected": -95.86845397949219, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.017020542174577713, |
|
"rewards/margins": 0.006455309689044952, |
|
"rewards/rejected": -0.023475851863622665, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": 0.716216504573822, |
|
"logits/rejected": 0.7626134157180786, |
|
"logps/chosen": -87.54243469238281, |
|
"logps/rejected": -81.1460952758789, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.012193633243441582, |
|
"rewards/margins": 0.004410223104059696, |
|
"rewards/rejected": -0.016603857278823853, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.655924144404907e-07, |
|
"logits/chosen": 0.7234289050102234, |
|
"logits/rejected": 0.7377198338508606, |
|
"logps/chosen": -89.62705993652344, |
|
"logps/rejected": -86.00987243652344, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.012271616607904434, |
|
"rewards/margins": 0.00808991864323616, |
|
"rewards/rejected": -0.020361537113785744, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": 0.7715792655944824, |
|
"logits/rejected": 0.803941547870636, |
|
"logps/chosen": -109.85160827636719, |
|
"logps/rejected": -105.02885437011719, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.018752435222268105, |
|
"rewards/margins": 0.005930652376264334, |
|
"rewards/rejected": -0.024683088064193726, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.462368920983249e-07, |
|
"logits/chosen": 0.7687041759490967, |
|
"logits/rejected": 0.7833288311958313, |
|
"logps/chosen": -85.72100067138672, |
|
"logps/rejected": -73.33113098144531, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.013071869499981403, |
|
"rewards/margins": 0.006707571446895599, |
|
"rewards/rejected": -0.019779440015554428, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": 0.7501081228256226, |
|
"logits/rejected": 0.7631546258926392, |
|
"logps/chosen": -118.79168701171875, |
|
"logps/rejected": -109.3132095336914, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.01289316825568676, |
|
"rewards/margins": 0.010348210111260414, |
|
"rewards/rejected": -0.023241376504302025, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.373541737087264e-07, |
|
"logits/chosen": 0.7186606526374817, |
|
"logits/rejected": 0.7436104416847229, |
|
"logps/chosen": -101.85713958740234, |
|
"logps/rejected": -97.64336395263672, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.015154453925788403, |
|
"rewards/margins": 0.006247458979487419, |
|
"rewards/rejected": -0.021401915699243546, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": 0.7555572986602783, |
|
"logits/rejected": 0.7140852808952332, |
|
"logps/chosen": -100.46399688720703, |
|
"logps/rejected": -92.70053100585938, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.013208119198679924, |
|
"rewards/margins": 0.012039312161505222, |
|
"rewards/rejected": -0.02524743042886257, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": 0.732753574848175, |
|
"eval_logits/rejected": 0.7523351311683655, |
|
"eval_logps/chosen": -93.27485656738281, |
|
"eval_logps/rejected": -96.37907409667969, |
|
"eval_loss": 0.009285110980272293, |
|
"eval_rewards/accuracies": 0.34200000762939453, |
|
"eval_rewards/chosen": -0.01547484565526247, |
|
"eval_rewards/margins": 0.006345819681882858, |
|
"eval_rewards/rejected": -0.021820668131113052, |
|
"eval_runtime": 272.3597, |
|
"eval_samples_per_second": 7.343, |
|
"eval_steps_per_second": 0.459, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.3952790595787986e-07, |
|
"logits/chosen": 0.8058096170425415, |
|
"logits/rejected": 0.8189395666122437, |
|
"logps/chosen": -116.7092514038086, |
|
"logps/rejected": -103.36915588378906, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.016209406778216362, |
|
"rewards/margins": 0.006806619465351105, |
|
"rewards/rejected": -0.023016026243567467, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": 0.6810709238052368, |
|
"logits/rejected": 0.6950341463088989, |
|
"logps/chosen": -95.54825592041016, |
|
"logps/rejected": -99.33860778808594, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.014285333454608917, |
|
"rewards/margins": 0.007627467624843121, |
|
"rewards/rejected": -0.021912802010774612, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.5328246937043526e-07, |
|
"logits/chosen": 0.756773829460144, |
|
"logits/rejected": 0.7951828837394714, |
|
"logps/chosen": -96.31170654296875, |
|
"logps/rejected": -100.44956970214844, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.016868023201823235, |
|
"rewards/margins": 0.008540956303477287, |
|
"rewards/rejected": -0.025408979505300522, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": 0.6711292266845703, |
|
"logits/rejected": 0.7201661467552185, |
|
"logps/chosen": -95.08512878417969, |
|
"logps/rejected": -97.06068420410156, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.014699302613735199, |
|
"rewards/margins": 0.005744755733758211, |
|
"rewards/rejected": -0.020444059744477272, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.790801674598186e-07, |
|
"logits/chosen": 0.7383053302764893, |
|
"logits/rejected": 0.7714890837669373, |
|
"logps/chosen": -124.93879699707031, |
|
"logps/rejected": -99.6561050415039, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.015930239111185074, |
|
"rewards/margins": 0.009742177091538906, |
|
"rewards/rejected": -0.025672415271401405, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": 0.7659719586372375, |
|
"logits/rejected": 0.7763293385505676, |
|
"logps/chosen": -90.79667663574219, |
|
"logps/rejected": -88.97042083740234, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0146828293800354, |
|
"rewards/margins": 0.010549478232860565, |
|
"rewards/rejected": -0.025232309475541115, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1731874863145143e-07, |
|
"logits/chosen": 0.7607251405715942, |
|
"logits/rejected": 0.7975329160690308, |
|
"logps/chosen": -93.11439514160156, |
|
"logps/rejected": -90.27216339111328, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.01333933137357235, |
|
"rewards/margins": 0.008563781157135963, |
|
"rewards/rejected": -0.021903112530708313, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": 0.7698312997817993, |
|
"logits/rejected": 0.8300016522407532, |
|
"logps/chosen": -88.53062438964844, |
|
"logps/rejected": -85.57463073730469, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.012708066031336784, |
|
"rewards/margins": 0.008814197964966297, |
|
"rewards/rejected": -0.021522263064980507, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.832927412229017e-08, |
|
"logits/chosen": 0.7292782068252563, |
|
"logits/rejected": 0.7688521146774292, |
|
"logps/chosen": -91.70903015136719, |
|
"logps/rejected": -94.77793884277344, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.014614684507250786, |
|
"rewards/margins": 0.009316334500908852, |
|
"rewards/rejected": -0.023931019008159637, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": 0.7349046468734741, |
|
"logits/rejected": 0.7809796333312988, |
|
"logps/chosen": -88.32861328125, |
|
"logps/rejected": -87.64916229248047, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.014183936640620232, |
|
"rewards/margins": 0.008784374222159386, |
|
"rewards/rejected": -0.022968310862779617, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_logits/chosen": 0.7319999933242798, |
|
"eval_logits/rejected": 0.75148606300354, |
|
"eval_logps/chosen": -93.28408813476562, |
|
"eval_logps/rejected": -96.38092041015625, |
|
"eval_loss": 0.009287585504353046, |
|
"eval_rewards/accuracies": 0.3479999899864197, |
|
"eval_rewards/chosen": -0.015567170456051826, |
|
"eval_rewards/margins": 0.00627197464928031, |
|
"eval_rewards/rejected": -0.021839145570993423, |
|
"eval_runtime": 272.272, |
|
"eval_samples_per_second": 7.346, |
|
"eval_steps_per_second": 0.459, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.237434340521789e-08, |
|
"logits/chosen": 0.7097499966621399, |
|
"logits/rejected": 0.781818687915802, |
|
"logps/chosen": -101.80133056640625, |
|
"logps/rejected": -103.1551742553711, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.015344848856329918, |
|
"rewards/margins": 0.008192854933440685, |
|
"rewards/rejected": -0.023537704721093178, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": 0.7037514448165894, |
|
"logits/rejected": 0.7321338653564453, |
|
"logps/chosen": -97.42137145996094, |
|
"logps/rejected": -94.04901123046875, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.013967466540634632, |
|
"rewards/margins": 0.005121930036693811, |
|
"rewards/rejected": -0.01908939704298973, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.646686570697062e-09, |
|
"logits/chosen": 0.7186129093170166, |
|
"logits/rejected": 0.8163198232650757, |
|
"logps/chosen": -103.3648681640625, |
|
"logps/rejected": -99.19805145263672, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.012637853622436523, |
|
"rewards/margins": 0.008680110797286034, |
|
"rewards/rejected": -0.021317964419722557, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": 0.7728757858276367, |
|
"logits/rejected": 0.7769932150840759, |
|
"logps/chosen": -98.65019989013672, |
|
"logps/rejected": -108.49007415771484, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.018378112465143204, |
|
"rewards/margins": 0.008477538824081421, |
|
"rewards/rejected": -0.026855653151869774, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.681312309735229e-10, |
|
"logits/chosen": 0.8041951060295105, |
|
"logits/rejected": 0.818127453327179, |
|
"logps/chosen": -92.90714263916016, |
|
"logps/rejected": -102.76998138427734, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.014978660270571709, |
|
"rewards/margins": 0.006398401223123074, |
|
"rewards/rejected": -0.021377062425017357, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 954, |
|
"total_flos": 0.0, |
|
"train_loss": 0.008370190804172112, |
|
"train_runtime": 12459.0229, |
|
"train_samples_per_second": 4.907, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|