|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994340690435767, |
|
"eval_steps": 100, |
|
"global_step": 883, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.617977528089887e-09, |
|
"logits/chosen": -2.7943434715270996, |
|
"logits/rejected": -2.817823886871338, |
|
"logps/chosen": -334.107666015625, |
|
"logps/rejected": -197.05621337890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.617977528089887e-08, |
|
"logits/chosen": -2.833451271057129, |
|
"logits/rejected": -2.7827768325805664, |
|
"logps/chosen": -323.80584716796875, |
|
"logps/rejected": -189.39964294433594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0005755923339165747, |
|
"rewards/margins": 0.0003566421801224351, |
|
"rewards/rejected": 0.00021895011013839394, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1235955056179774e-07, |
|
"logits/chosen": -2.778655767440796, |
|
"logits/rejected": -2.7627151012420654, |
|
"logps/chosen": -323.3365783691406, |
|
"logps/rejected": -168.40744018554688, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0010369193041697145, |
|
"rewards/margins": 0.0018870027270168066, |
|
"rewards/rejected": -0.0008500836556777358, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6853932584269663e-07, |
|
"logits/chosen": -2.7871737480163574, |
|
"logits/rejected": -2.7326064109802246, |
|
"logps/chosen": -305.997314453125, |
|
"logps/rejected": -180.06800842285156, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.009164368733763695, |
|
"rewards/margins": 0.015919247642159462, |
|
"rewards/rejected": -0.006754877977073193, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2471910112359549e-07, |
|
"logits/chosen": -2.7199320793151855, |
|
"logits/rejected": -2.711822032928467, |
|
"logps/chosen": -314.8984680175781, |
|
"logps/rejected": -178.45077514648438, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.034292496740818024, |
|
"rewards/margins": 0.06667112559080124, |
|
"rewards/rejected": -0.03237862139940262, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8089887640449437e-07, |
|
"logits/chosen": -2.6660404205322266, |
|
"logits/rejected": -2.6608872413635254, |
|
"logps/chosen": -340.89056396484375, |
|
"logps/rejected": -192.22543334960938, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0421828031539917, |
|
"rewards/margins": 0.14488723874092102, |
|
"rewards/rejected": -0.10270445048809052, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3707865168539325e-07, |
|
"logits/chosen": -2.6621761322021484, |
|
"logits/rejected": -2.6332790851593018, |
|
"logps/chosen": -290.0724182128906, |
|
"logps/rejected": -199.76377868652344, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03800051286816597, |
|
"rewards/margins": 0.1342642456293106, |
|
"rewards/rejected": -0.17226476967334747, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9325842696629214e-07, |
|
"logits/chosen": -2.5926709175109863, |
|
"logits/rejected": -2.5758044719696045, |
|
"logps/chosen": -318.26446533203125, |
|
"logps/rejected": -217.8231964111328, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.061497531831264496, |
|
"rewards/margins": 0.23234911262989044, |
|
"rewards/rejected": -0.29384663701057434, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4943820224719097e-07, |
|
"logits/chosen": -2.561908483505249, |
|
"logits/rejected": -2.5379791259765625, |
|
"logps/chosen": -396.86993408203125, |
|
"logps/rejected": -253.50143432617188, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.01939094439148903, |
|
"rewards/margins": 0.49067315459251404, |
|
"rewards/rejected": -0.5100641250610352, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999980431020109e-07, |
|
"logits/chosen": -2.5810797214508057, |
|
"logits/rejected": -2.5567336082458496, |
|
"logps/chosen": -380.4464416503906, |
|
"logps/rejected": -262.82904052734375, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.24769897758960724, |
|
"rewards/margins": 0.5812320709228516, |
|
"rewards/rejected": -0.8289310336112976, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997632524101301e-07, |
|
"logits/chosen": -2.6055984497070312, |
|
"logits/rejected": -2.5864219665527344, |
|
"logps/chosen": -367.29071044921875, |
|
"logps/rejected": -280.4869079589844, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2197243869304657, |
|
"rewards/margins": 0.5160216689109802, |
|
"rewards/rejected": -0.7357459664344788, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_logits/chosen": -2.590785264968872, |
|
"eval_logits/rejected": -2.5756187438964844, |
|
"eval_logps/chosen": -322.57501220703125, |
|
"eval_logps/rejected": -351.5352478027344, |
|
"eval_loss": 0.6285832524299622, |
|
"eval_rewards/accuracies": 0.65234375, |
|
"eval_rewards/chosen": -0.6553537845611572, |
|
"eval_rewards/margins": 0.2864663004875183, |
|
"eval_rewards/rejected": -0.9418200850486755, |
|
"eval_runtime": 53.1932, |
|
"eval_samples_per_second": 37.599, |
|
"eval_steps_per_second": 0.602, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.991375032514749e-07, |
|
"logits/chosen": -2.5533313751220703, |
|
"logits/rejected": -2.5264110565185547, |
|
"logps/chosen": -363.4510498046875, |
|
"logps/rejected": -284.7992248535156, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3205306828022003, |
|
"rewards/margins": 0.6818863749504089, |
|
"rewards/rejected": -1.002416968345642, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.98121775121344e-07, |
|
"logits/chosen": -2.6315197944641113, |
|
"logits/rejected": -2.5978212356567383, |
|
"logps/chosen": -410.644775390625, |
|
"logps/rejected": -323.01190185546875, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.27390944957733154, |
|
"rewards/margins": 0.8327864408493042, |
|
"rewards/rejected": -1.1066958904266357, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.96717657955441e-07, |
|
"logits/chosen": -2.59904408454895, |
|
"logits/rejected": -2.5410983562469482, |
|
"logps/chosen": -416.3720703125, |
|
"logps/rejected": -325.9648132324219, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.40370965003967285, |
|
"rewards/margins": 0.9006286859512329, |
|
"rewards/rejected": -1.3043382167816162, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949273496411216e-07, |
|
"logits/chosen": -2.545508861541748, |
|
"logits/rejected": -2.5205612182617188, |
|
"logps/chosen": -379.17767333984375, |
|
"logps/rejected": -337.29962158203125, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.451716810464859, |
|
"rewards/margins": 0.8486088514328003, |
|
"rewards/rejected": -1.300325632095337, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.927536525770046e-07, |
|
"logits/chosen": -2.5130438804626465, |
|
"logits/rejected": -2.487233877182007, |
|
"logps/chosen": -423.2710876464844, |
|
"logps/rejected": -352.5829772949219, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5199152231216431, |
|
"rewards/margins": 1.0131314992904663, |
|
"rewards/rejected": -1.5330466032028198, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901999692863326e-07, |
|
"logits/chosen": -2.520357847213745, |
|
"logits/rejected": -2.4684462547302246, |
|
"logps/chosen": -498.07098388671875, |
|
"logps/rejected": -388.2645263671875, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5807570219039917, |
|
"rewards/margins": 1.1767116785049438, |
|
"rewards/rejected": -1.757468581199646, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872702970909464e-07, |
|
"logits/chosen": -2.345059633255005, |
|
"logits/rejected": -2.281158924102783, |
|
"logps/chosen": -455.2555236816406, |
|
"logps/rejected": -373.2399597167969, |
|
"loss": 0.4471, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8019993901252747, |
|
"rewards/margins": 1.065147042274475, |
|
"rewards/rejected": -1.8671462535858154, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.839692218542131e-07, |
|
"logits/chosen": -2.167600631713867, |
|
"logits/rejected": -2.1524620056152344, |
|
"logps/chosen": -445.18963623046875, |
|
"logps/rejected": -420.07354736328125, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5789515972137451, |
|
"rewards/margins": 0.9403783082962036, |
|
"rewards/rejected": -2.5193300247192383, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.803019108026997e-07, |
|
"logits/chosen": -2.0659067630767822, |
|
"logits/rejected": -2.0179924964904785, |
|
"logps/chosen": -446.5098571777344, |
|
"logps/rejected": -408.96685791015625, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.1547313928604126, |
|
"rewards/margins": 1.099097490310669, |
|
"rewards/rejected": -2.253828525543213, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7627410443782887e-07, |
|
"logits/chosen": -1.9613704681396484, |
|
"logits/rejected": -1.9336235523223877, |
|
"logps/chosen": -434.38311767578125, |
|
"logps/rejected": -421.72308349609375, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2233312129974365, |
|
"rewards/margins": 1.0944594144821167, |
|
"rewards/rejected": -2.3177905082702637, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -1.9878398180007935, |
|
"eval_logits/rejected": -1.9677612781524658, |
|
"eval_logps/chosen": -405.3453674316406, |
|
"eval_logps/rejected": -474.3326721191406, |
|
"eval_loss": 0.5474696755409241, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -1.4830571413040161, |
|
"eval_rewards/margins": 0.6867368221282959, |
|
"eval_rewards/rejected": -2.1697940826416016, |
|
"eval_runtime": 53.0465, |
|
"eval_samples_per_second": 37.703, |
|
"eval_steps_per_second": 0.603, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7189210755018034e-07, |
|
"logits/chosen": -1.916168212890625, |
|
"logits/rejected": -1.849001169204712, |
|
"logps/chosen": -497.56134033203125, |
|
"logps/rejected": -451.7841796875, |
|
"loss": 0.4423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2633593082427979, |
|
"rewards/margins": 1.236897587776184, |
|
"rewards/rejected": -2.5002567768096924, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671627793504988e-07, |
|
"logits/chosen": -1.965778112411499, |
|
"logits/rejected": -1.8829681873321533, |
|
"logps/chosen": -516.19921875, |
|
"logps/rejected": -489.0526428222656, |
|
"loss": 0.4306, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.368606686592102, |
|
"rewards/margins": 1.4575475454330444, |
|
"rewards/rejected": -2.8261542320251465, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6209352273286095e-07, |
|
"logits/chosen": -1.8527837991714478, |
|
"logits/rejected": -1.7781047821044922, |
|
"logps/chosen": -492.2167053222656, |
|
"logps/rejected": -515.4146728515625, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5558216571807861, |
|
"rewards/margins": 1.2494769096374512, |
|
"rewards/rejected": -2.8052985668182373, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.56692272686805e-07, |
|
"logits/chosen": -1.8593418598175049, |
|
"logits/rejected": -1.7763780355453491, |
|
"logps/chosen": -473.20245361328125, |
|
"logps/rejected": -463.26849365234375, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5299947261810303, |
|
"rewards/margins": 1.291903018951416, |
|
"rewards/rejected": -2.8218979835510254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5096748387656326e-07, |
|
"logits/chosen": -1.6604913473129272, |
|
"logits/rejected": -1.5300872325897217, |
|
"logps/chosen": -527.0318603515625, |
|
"logps/rejected": -502.64129638671875, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.062129497528076, |
|
"rewards/margins": 1.1275193691253662, |
|
"rewards/rejected": -3.1896486282348633, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4492811740683877e-07, |
|
"logits/chosen": -1.5592234134674072, |
|
"logits/rejected": -1.3744081258773804, |
|
"logps/chosen": -491.737548828125, |
|
"logps/rejected": -486.6441345214844, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.907570481300354, |
|
"rewards/margins": 1.1632691621780396, |
|
"rewards/rejected": -3.0708391666412354, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3858362679584354e-07, |
|
"logits/chosen": -1.5746996402740479, |
|
"logits/rejected": -1.2380870580673218, |
|
"logps/chosen": -457.90753173828125, |
|
"logps/rejected": -446.56683349609375, |
|
"loss": 0.4103, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.1260545253753662, |
|
"rewards/margins": 1.6088136434555054, |
|
"rewards/rejected": -2.734868288040161, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3194394317755245e-07, |
|
"logits/chosen": -1.3573920726776123, |
|
"logits/rejected": -1.0481122732162476, |
|
"logps/chosen": -512.2153930664062, |
|
"logps/rejected": -469.4527893066406, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6660184860229492, |
|
"rewards/margins": 1.3182036876678467, |
|
"rewards/rejected": -2.984222173690796, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2501945975633914e-07, |
|
"logits/chosen": -1.5231261253356934, |
|
"logits/rejected": -1.2471725940704346, |
|
"logps/chosen": -508.29248046875, |
|
"logps/rejected": -447.50689697265625, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4383156299591064, |
|
"rewards/margins": 1.2735927104949951, |
|
"rewards/rejected": -2.7119078636169434, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1782101553832405e-07, |
|
"logits/chosen": -1.4166069030761719, |
|
"logits/rejected": -1.1425375938415527, |
|
"logps/chosen": -467.41717529296875, |
|
"logps/rejected": -439.3959045410156, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5179073810577393, |
|
"rewards/margins": 1.1478455066680908, |
|
"rewards/rejected": -2.665753126144409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -1.18406081199646, |
|
"eval_logits/rejected": -0.9782991409301758, |
|
"eval_logps/chosen": -442.4473571777344, |
|
"eval_logps/rejected": -545.2501220703125, |
|
"eval_loss": 0.5194380879402161, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -1.8540773391723633, |
|
"eval_rewards/margins": 1.0248912572860718, |
|
"eval_rewards/rejected": -2.8789682388305664, |
|
"eval_runtime": 53.0005, |
|
"eval_samples_per_second": 37.736, |
|
"eval_steps_per_second": 0.604, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103598783649029e-07, |
|
"logits/chosen": -1.0781385898590088, |
|
"logits/rejected": -0.6068095564842224, |
|
"logps/chosen": -542.6256713867188, |
|
"logps/rejected": -505.87078857421875, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.779624342918396, |
|
"rewards/margins": 1.5406283140182495, |
|
"rewards/rejected": -3.3202528953552246, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.026477272750119e-07, |
|
"logits/chosen": -0.7725287079811096, |
|
"logits/rejected": -0.2756701111793518, |
|
"logps/chosen": -545.5137329101562, |
|
"logps/rejected": -528.4269409179688, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.142789602279663, |
|
"rewards/margins": 1.3576524257659912, |
|
"rewards/rejected": -3.500441789627075, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9469663422373864e-07, |
|
"logits/chosen": -0.9761560559272766, |
|
"logits/rejected": -0.6311030983924866, |
|
"logps/chosen": -517.2960205078125, |
|
"logps/rejected": -506.86328125, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8108386993408203, |
|
"rewards/margins": 1.37090265750885, |
|
"rewards/rejected": -3.181741237640381, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.865190451858954e-07, |
|
"logits/chosen": -0.865078330039978, |
|
"logits/rejected": -0.3488244414329529, |
|
"logps/chosen": -540.340087890625, |
|
"logps/rejected": -525.5319213867188, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.597611904144287, |
|
"rewards/margins": 1.6154896020889282, |
|
"rewards/rejected": -3.213101625442505, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.781277606741327e-07, |
|
"logits/chosen": -1.0114878416061401, |
|
"logits/rejected": -0.7175018191337585, |
|
"logps/chosen": -450.4183654785156, |
|
"logps/rejected": -459.7533264160156, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.427380919456482, |
|
"rewards/margins": 1.2692419290542603, |
|
"rewards/rejected": -2.696622610092163, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6953591570208996e-07, |
|
"logits/chosen": -1.2963850498199463, |
|
"logits/rejected": -0.8218928575515747, |
|
"logps/chosen": -540.1664428710938, |
|
"logps/rejected": -555.89306640625, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.701040506362915, |
|
"rewards/margins": 1.8265488147735596, |
|
"rewards/rejected": -3.5275893211364746, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607569592239452e-07, |
|
"logits/chosen": -1.0880775451660156, |
|
"logits/rejected": -0.6546664237976074, |
|
"logps/chosen": -559.7450561523438, |
|
"logps/rejected": -538.1546630859375, |
|
"loss": 0.4192, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7286078929901123, |
|
"rewards/margins": 1.7532542943954468, |
|
"rewards/rejected": -3.4818618297576904, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.518046330825494e-07, |
|
"logits/chosen": -1.1186842918395996, |
|
"logits/rejected": -0.6067591905593872, |
|
"logps/chosen": -560.1696166992188, |
|
"logps/rejected": -522.5840454101562, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.8213374614715576, |
|
"rewards/margins": 1.5133308172225952, |
|
"rewards/rejected": -3.3346683979034424, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4269295049909713e-07, |
|
"logits/chosen": -1.1209189891815186, |
|
"logits/rejected": -0.7713836431503296, |
|
"logps/chosen": -473.28759765625, |
|
"logps/rejected": -484.11065673828125, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.7088820934295654, |
|
"rewards/margins": 1.425378441810608, |
|
"rewards/rejected": -3.1342601776123047, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3343617413800453e-07, |
|
"logits/chosen": -1.1869983673095703, |
|
"logits/rejected": -0.6728812456130981, |
|
"logps/chosen": -529.2347412109375, |
|
"logps/rejected": -498.1748962402344, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.649171233177185, |
|
"rewards/margins": 1.6641887426376343, |
|
"rewards/rejected": -3.3133597373962402, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -0.8579260110855103, |
|
"eval_logits/rejected": -0.6001935601234436, |
|
"eval_logps/chosen": -464.98876953125, |
|
"eval_logps/rejected": -575.0087280273438, |
|
"eval_loss": 0.5159767866134644, |
|
"eval_rewards/accuracies": 0.77734375, |
|
"eval_rewards/chosen": -2.079491376876831, |
|
"eval_rewards/margins": 1.0970630645751953, |
|
"eval_rewards/rejected": -3.1765542030334473, |
|
"eval_runtime": 53.0852, |
|
"eval_samples_per_second": 37.675, |
|
"eval_steps_per_second": 0.603, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2404879378132893e-07, |
|
"logits/chosen": -0.8699030876159668, |
|
"logits/rejected": -0.48875007033348083, |
|
"logps/chosen": -468.9755859375, |
|
"logps/rejected": -495.357421875, |
|
"loss": 0.4084, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.6207454204559326, |
|
"rewards/margins": 1.6607239246368408, |
|
"rewards/rejected": -3.2814698219299316, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1454550364767894e-07, |
|
"logits/chosen": -1.098257303237915, |
|
"logits/rejected": -0.709359347820282, |
|
"logps/chosen": -512.3826904296875, |
|
"logps/rejected": -523.95458984375, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7999498844146729, |
|
"rewards/margins": 1.4373884201049805, |
|
"rewards/rejected": -3.2373383045196533, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.049411793911154e-07, |
|
"logits/chosen": -0.9810858964920044, |
|
"logits/rejected": -0.6282259821891785, |
|
"logps/chosen": -509.84368896484375, |
|
"logps/rejected": -517.60107421875, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.8548433780670166, |
|
"rewards/margins": 1.386967420578003, |
|
"rewards/rejected": -3.2418110370635986, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9525085481604914e-07, |
|
"logits/chosen": -0.6511877775192261, |
|
"logits/rejected": -0.07081355899572372, |
|
"logps/chosen": -509.661376953125, |
|
"logps/rejected": -524.3201904296875, |
|
"loss": 0.4151, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.8540523052215576, |
|
"rewards/margins": 1.5340583324432373, |
|
"rewards/rejected": -3.388110399246216, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.854896983445833e-07, |
|
"logits/chosen": -0.5572197437286377, |
|
"logits/rejected": 0.0708194151520729, |
|
"logps/chosen": -562.8184814453125, |
|
"logps/rejected": -528.6909790039062, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.886749267578125, |
|
"rewards/margins": 1.5286136865615845, |
|
"rewards/rejected": -3.415362596511841, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7567298927313654e-07, |
|
"logits/chosen": -0.8817178606987, |
|
"logits/rejected": -0.6781443357467651, |
|
"logps/chosen": -470.3621520996094, |
|
"logps/rejected": -495.33111572265625, |
|
"loss": 0.4137, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4939197301864624, |
|
"rewards/margins": 1.3965364694595337, |
|
"rewards/rejected": -2.890456199645996, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.658160938555123e-07, |
|
"logits/chosen": -0.900059700012207, |
|
"logits/rejected": -0.38037875294685364, |
|
"logps/chosen": -530.0759887695312, |
|
"logps/rejected": -549.6453857421875, |
|
"loss": 0.3727, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.6465427875518799, |
|
"rewards/margins": 1.722328543663025, |
|
"rewards/rejected": -3.3688716888427734, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.559344412498532e-07, |
|
"logits/chosen": -0.5834644436836243, |
|
"logits/rejected": -0.024540895596146584, |
|
"logps/chosen": -526.4287719726562, |
|
"logps/rejected": -525.1407470703125, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8217204809188843, |
|
"rewards/margins": 1.5024107694625854, |
|
"rewards/rejected": -3.3241310119628906, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.460434993671294e-07, |
|
"logits/chosen": -0.999637246131897, |
|
"logits/rejected": -0.7088354825973511, |
|
"logps/chosen": -467.12353515625, |
|
"logps/rejected": -472.5782775878906, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5620094537734985, |
|
"rewards/margins": 1.4622641801834106, |
|
"rewards/rejected": -3.02427339553833, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.361587506589672e-07, |
|
"logits/chosen": -1.169862151145935, |
|
"logits/rejected": -0.6735583543777466, |
|
"logps/chosen": -547.8793334960938, |
|
"logps/rejected": -530.5782470703125, |
|
"loss": 0.3964, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.728243112564087, |
|
"rewards/margins": 1.6642783880233765, |
|
"rewards/rejected": -3.392521381378174, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -1.018913984298706, |
|
"eval_logits/rejected": -0.8011811375617981, |
|
"eval_logps/chosen": -476.00384521484375, |
|
"eval_logps/rejected": -588.1665649414062, |
|
"eval_loss": 0.49919986724853516, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -2.1896419525146484, |
|
"eval_rewards/margins": 1.11849045753479, |
|
"eval_rewards/rejected": -3.3081324100494385, |
|
"eval_runtime": 73.4341, |
|
"eval_samples_per_second": 27.235, |
|
"eval_steps_per_second": 0.436, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2629566788271613e-07, |
|
"logits/chosen": -1.1643812656402588, |
|
"logits/rejected": -0.6770884394645691, |
|
"logps/chosen": -498.718994140625, |
|
"logps/rejected": -513.8646240234375, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7343899011611938, |
|
"rewards/margins": 1.711033582687378, |
|
"rewards/rejected": -3.4454236030578613, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1646968988169135e-07, |
|
"logits/chosen": -1.2519400119781494, |
|
"logits/rejected": -0.7656970620155334, |
|
"logps/chosen": -552.4429931640625, |
|
"logps/rejected": -580.3065185546875, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.9031795263290405, |
|
"rewards/margins": 1.720510721206665, |
|
"rewards/rejected": -3.623690366744995, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0669619741850232e-07, |
|
"logits/chosen": -1.166473388671875, |
|
"logits/rejected": -0.5304248929023743, |
|
"logps/chosen": -543.8204345703125, |
|
"logps/rejected": -517.4561767578125, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9481357336044312, |
|
"rewards/margins": 1.5012562274932861, |
|
"rewards/rejected": -3.4493918418884277, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9699048909929518e-07, |
|
"logits/chosen": -1.3502863645553589, |
|
"logits/rejected": -0.972245991230011, |
|
"logps/chosen": -513.7689819335938, |
|
"logps/rejected": -506.95953369140625, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.8054568767547607, |
|
"rewards/margins": 1.3885786533355713, |
|
"rewards/rejected": -3.194035291671753, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8736775742659732e-07, |
|
"logits/chosen": -1.1914501190185547, |
|
"logits/rejected": -0.8519012331962585, |
|
"logps/chosen": -489.40234375, |
|
"logps/rejected": -517.2066040039062, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6697397232055664, |
|
"rewards/margins": 1.5511436462402344, |
|
"rewards/rejected": -3.2208831310272217, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7784306501824616e-07, |
|
"logits/chosen": -1.1693607568740845, |
|
"logits/rejected": -0.501569926738739, |
|
"logps/chosen": -549.26220703125, |
|
"logps/rejected": -523.9554443359375, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8511940240859985, |
|
"rewards/margins": 1.4909955263137817, |
|
"rewards/rejected": -3.342189311981201, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6843132102963025e-07, |
|
"logits/chosen": -1.1927831172943115, |
|
"logits/rejected": -0.8532694578170776, |
|
"logps/chosen": -539.3836669921875, |
|
"logps/rejected": -510.72637939453125, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6676031351089478, |
|
"rewards/margins": 1.5155996084213257, |
|
"rewards/rejected": -3.1832027435302734, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.591472578161458e-07, |
|
"logits/chosen": -1.3109443187713623, |
|
"logits/rejected": -0.9485646486282349, |
|
"logps/chosen": -494.5043029785156, |
|
"logps/rejected": -489.75537109375, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5513614416122437, |
|
"rewards/margins": 1.510770559310913, |
|
"rewards/rejected": -3.062131881713867, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5000540787240274e-07, |
|
"logits/chosen": -1.2452589273452759, |
|
"logits/rejected": -0.857632040977478, |
|
"logps/chosen": -504.5924377441406, |
|
"logps/rejected": -519.4934692382812, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.7807199954986572, |
|
"rewards/margins": 1.571396827697754, |
|
"rewards/rejected": -3.352116823196411, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.410200810842749e-07, |
|
"logits/chosen": -1.2575485706329346, |
|
"logits/rejected": -0.8479830622673035, |
|
"logps/chosen": -503.79388427734375, |
|
"logps/rejected": -516.9588012695312, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7127326726913452, |
|
"rewards/margins": 1.6169878244400024, |
|
"rewards/rejected": -3.3297202587127686, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.2397898435592651, |
|
"eval_logits/rejected": -1.0526514053344727, |
|
"eval_logps/chosen": -477.6524658203125, |
|
"eval_logps/rejected": -589.7600708007812, |
|
"eval_loss": 0.4948367774486542, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -2.2061285972595215, |
|
"eval_rewards/margins": 1.1179393529891968, |
|
"eval_rewards/rejected": -3.3240678310394287, |
|
"eval_runtime": 53.1159, |
|
"eval_samples_per_second": 37.654, |
|
"eval_steps_per_second": 0.602, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.322053423294041e-07, |
|
"logits/chosen": -1.256247639656067, |
|
"logits/rejected": -0.9272082448005676, |
|
"logps/chosen": -501.60675048828125, |
|
"logps/rejected": -531.3302001953125, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.7432218790054321, |
|
"rewards/margins": 1.7136541604995728, |
|
"rewards/rejected": -3.456876039505005, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2357498946121905e-07, |
|
"logits/chosen": -1.3026126623153687, |
|
"logits/rejected": -0.9675828218460083, |
|
"logps/chosen": -534.3182373046875, |
|
"logps/rejected": -527.3935546875, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8490867614746094, |
|
"rewards/margins": 1.5810914039611816, |
|
"rewards/rejected": -3.430178165435791, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1514253171093161e-07, |
|
"logits/chosen": -1.2203739881515503, |
|
"logits/rejected": -0.7822047472000122, |
|
"logps/chosen": -493.72821044921875, |
|
"logps/rejected": -506.88690185546875, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.6152998208999634, |
|
"rewards/margins": 1.5858867168426514, |
|
"rewards/rejected": -3.201186418533325, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0692116854131883e-07, |
|
"logits/chosen": -1.000585913658142, |
|
"logits/rejected": -0.7414053678512573, |
|
"logps/chosen": -493.9928283691406, |
|
"logps/rejected": -534.2704467773438, |
|
"loss": 0.3866, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7848412990570068, |
|
"rewards/margins": 1.534330129623413, |
|
"rewards/rejected": -3.319171905517578, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.89237689853889e-08, |
|
"logits/chosen": -0.9636529684066772, |
|
"logits/rejected": -0.6193439364433289, |
|
"logps/chosen": -499.71234130859375, |
|
"logps/rejected": -517.5823974609375, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.829134225845337, |
|
"rewards/margins": 1.6988353729248047, |
|
"rewards/rejected": -3.5279693603515625, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.11628515022765e-08, |
|
"logits/chosen": -1.0789777040481567, |
|
"logits/rejected": -0.6399408578872681, |
|
"logps/chosen": -513.3380737304688, |
|
"logps/rejected": -544.7978515625, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7524398565292358, |
|
"rewards/margins": 1.8130983114242554, |
|
"rewards/rejected": -3.565537929534912, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.365056438189486e-08, |
|
"logits/chosen": -1.0069674253463745, |
|
"logits/rejected": -0.5994616746902466, |
|
"logps/chosen": -542.05712890625, |
|
"logps/rejected": -564.6227416992188, |
|
"loss": 0.4122, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9871015548706055, |
|
"rewards/margins": 1.6281112432479858, |
|
"rewards/rejected": -3.6152126789093018, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.639866672902101e-08, |
|
"logits/chosen": -1.0949068069458008, |
|
"logits/rejected": -0.7090824246406555, |
|
"logps/chosen": -549.8911743164062, |
|
"logps/rejected": -559.15771484375, |
|
"loss": 0.4132, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8535239696502686, |
|
"rewards/margins": 1.733758568763733, |
|
"rewards/rejected": -3.587282657623291, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.941851005657851e-08, |
|
"logits/chosen": -1.1339385509490967, |
|
"logits/rejected": -0.738599419593811, |
|
"logps/chosen": -494.1913146972656, |
|
"logps/rejected": -504.7791442871094, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.752722978591919, |
|
"rewards/margins": 1.4443397521972656, |
|
"rewards/rejected": -3.1970624923706055, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.272102051693051e-08, |
|
"logits/chosen": -1.2199567556381226, |
|
"logits/rejected": -0.9412355422973633, |
|
"logps/chosen": -552.1275024414062, |
|
"logps/rejected": -515.4296264648438, |
|
"loss": 0.4004, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7129371166229248, |
|
"rewards/margins": 1.4609147310256958, |
|
"rewards/rejected": -3.1738522052764893, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.0643391609191895, |
|
"eval_logits/rejected": -0.8519161343574524, |
|
"eval_logps/chosen": -474.26617431640625, |
|
"eval_logps/rejected": -593.8731079101562, |
|
"eval_loss": 0.49052032828330994, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -2.1722652912139893, |
|
"eval_rewards/margins": 1.192933201789856, |
|
"eval_rewards/rejected": -3.3651983737945557, |
|
"eval_runtime": 53.0717, |
|
"eval_samples_per_second": 37.685, |
|
"eval_steps_per_second": 0.603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.6316681798995844e-08, |
|
"logits/chosen": -1.0180628299713135, |
|
"logits/rejected": -0.7236673831939697, |
|
"logps/chosen": -491.35565185546875, |
|
"logps/rejected": -525.1397705078125, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7499040365219116, |
|
"rewards/margins": 1.7332220077514648, |
|
"rewards/rejected": -3.483126163482666, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.0215518717961256e-08, |
|
"logits/chosen": -1.0655405521392822, |
|
"logits/rejected": -0.6608148813247681, |
|
"logps/chosen": -525.560302734375, |
|
"logps/rejected": -529.1053466796875, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8151214122772217, |
|
"rewards/margins": 1.7203428745269775, |
|
"rewards/rejected": -3.53546404838562, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4427081523275925e-08, |
|
"logits/chosen": -1.0117073059082031, |
|
"logits/rejected": -0.715721607208252, |
|
"logps/chosen": -504.0994567871094, |
|
"logps/rejected": -539.0814208984375, |
|
"loss": 0.3756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8760732412338257, |
|
"rewards/margins": 1.5654491186141968, |
|
"rewards/rejected": -3.4415221214294434, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.896043094949061e-08, |
|
"logits/chosen": -1.1520367860794067, |
|
"logits/rejected": -0.5986729860305786, |
|
"logps/chosen": -532.8388061523438, |
|
"logps/rejected": -558.7303466796875, |
|
"loss": 0.4003, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.8816320896148682, |
|
"rewards/margins": 1.7765041589736938, |
|
"rewards/rejected": -3.6581363677978516, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3824124033343557e-08, |
|
"logits/chosen": -0.8991321325302124, |
|
"logits/rejected": -0.6385317444801331, |
|
"logps/chosen": -567.1549072265625, |
|
"logps/rejected": -579.142578125, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1505260467529297, |
|
"rewards/margins": 1.6301181316375732, |
|
"rewards/rejected": -3.780644178390503, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9026200719291904e-08, |
|
"logits/chosen": -1.001379370689392, |
|
"logits/rejected": -0.6102081537246704, |
|
"logps/chosen": -508.70147705078125, |
|
"logps/rejected": -536.8555297851562, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.9450082778930664, |
|
"rewards/margins": 1.5907418727874756, |
|
"rewards/rejected": -3.535750150680542, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4574171274456433e-08, |
|
"logits/chosen": -1.0912601947784424, |
|
"logits/rejected": -0.6700750589370728, |
|
"logps/chosen": -519.89208984375, |
|
"logps/rejected": -525.0479736328125, |
|
"loss": 0.3678, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.884690523147583, |
|
"rewards/margins": 1.6182489395141602, |
|
"rewards/rejected": -3.502938747406006, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.047500453267881e-08, |
|
"logits/chosen": -1.0198689699172974, |
|
"logits/rejected": -0.5865429043769836, |
|
"logps/chosen": -526.2073974609375, |
|
"logps/rejected": -554.8323974609375, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9122520685195923, |
|
"rewards/margins": 1.7002776861190796, |
|
"rewards/rejected": -3.612529754638672, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.673511698609292e-08, |
|
"logits/chosen": -0.9797528982162476, |
|
"logits/rejected": -0.5832753777503967, |
|
"logps/chosen": -553.0879516601562, |
|
"logps/rejected": -561.693603515625, |
|
"loss": 0.3901, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.9004099369049072, |
|
"rewards/margins": 1.7125294208526611, |
|
"rewards/rejected": -3.6129393577575684, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3360362741285769e-08, |
|
"logits/chosen": -1.0027343034744263, |
|
"logits/rejected": -0.7030217051506042, |
|
"logps/chosen": -501.4288024902344, |
|
"logps/rejected": -526.0103759765625, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.869350790977478, |
|
"rewards/margins": 1.5730822086334229, |
|
"rewards/rejected": -3.4424331188201904, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/chosen": -0.9597108364105225, |
|
"eval_logits/rejected": -0.7242004871368408, |
|
"eval_logps/chosen": -487.77545166015625, |
|
"eval_logps/rejected": -609.3139038085938, |
|
"eval_loss": 0.4919503927230835, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -2.3073582649230957, |
|
"eval_rewards/margins": 1.21224844455719, |
|
"eval_rewards/rejected": -3.519606590270996, |
|
"eval_runtime": 53.0285, |
|
"eval_samples_per_second": 37.716, |
|
"eval_steps_per_second": 0.603, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0356024355769433e-08, |
|
"logits/chosen": -1.0092878341674805, |
|
"logits/rejected": -0.7896069884300232, |
|
"logps/chosen": -532.9703369140625, |
|
"logps/rejected": -526.88330078125, |
|
"loss": 0.3767, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.830877661705017, |
|
"rewards/margins": 1.5391663312911987, |
|
"rewards/rejected": -3.370044231414795, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.726804569108597e-09, |
|
"logits/chosen": -1.1117920875549316, |
|
"logits/rejected": -0.6487603187561035, |
|
"logps/chosen": -553.5621337890625, |
|
"logps/rejected": -571.2651977539062, |
|
"loss": 0.4191, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9834985733032227, |
|
"rewards/margins": 1.6336091756820679, |
|
"rewards/rejected": -3.61710786819458, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.476818941645561e-09, |
|
"logits/chosen": -1.1343705654144287, |
|
"logits/rejected": -0.5430102348327637, |
|
"logps/chosen": -569.5715942382812, |
|
"logps/rejected": -541.0032958984375, |
|
"loss": 0.3755, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8141847848892212, |
|
"rewards/margins": 1.6208727359771729, |
|
"rewards/rejected": -3.4350574016571045, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.609589412347347e-09, |
|
"logits/chosen": -1.0426546335220337, |
|
"logits/rejected": -0.6130795478820801, |
|
"logps/chosen": -517.9793701171875, |
|
"logps/rejected": -554.4876098632812, |
|
"loss": 0.3741, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7485253810882568, |
|
"rewards/margins": 1.8532413244247437, |
|
"rewards/rejected": -3.601766586303711, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1280387858572667e-09, |
|
"logits/chosen": -0.9971386194229126, |
|
"logits/rejected": -0.6877419352531433, |
|
"logps/chosen": -496.14208984375, |
|
"logps/rejected": -510.2310485839844, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8203623294830322, |
|
"rewards/margins": 1.555535078048706, |
|
"rewards/rejected": -3.3758976459503174, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.03448615738172e-09, |
|
"logits/chosen": -1.004620909690857, |
|
"logits/rejected": -0.6449930667877197, |
|
"logps/chosen": -515.4451904296875, |
|
"logps/rejected": -543.5704956054688, |
|
"loss": 0.3946, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7996273040771484, |
|
"rewards/margins": 1.7972816228866577, |
|
"rewards/rejected": -3.5969085693359375, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3064328257259575e-10, |
|
"logits/chosen": -1.0538240671157837, |
|
"logits/rejected": -0.67207270860672, |
|
"logps/chosen": -510.56414794921875, |
|
"logps/rejected": -538.0349731445312, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.884305715560913, |
|
"rewards/margins": 1.7009254693984985, |
|
"rewards/rejected": -3.585231065750122, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7611898088715216e-11, |
|
"logits/chosen": -1.063408613204956, |
|
"logits/rejected": -0.8093023300170898, |
|
"logps/chosen": -536.09716796875, |
|
"logps/rejected": -549.3472290039062, |
|
"loss": 0.4077, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7760584354400635, |
|
"rewards/margins": 1.5979098081588745, |
|
"rewards/rejected": -3.3739686012268066, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 883, |
|
"total_flos": 0.0, |
|
"train_loss": 0.43981607611020046, |
|
"train_runtime": 8273.4147, |
|
"train_samples_per_second": 13.662, |
|
"train_steps_per_second": 0.107 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 883, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|