|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994340690435767, |
|
"eval_steps": 100, |
|
"global_step": 883, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.617977528089887e-09, |
|
"logits/chosen": -2.8261122703552246, |
|
"logits/rejected": -2.782524824142456, |
|
"logps/chosen": -386.01312255859375, |
|
"logps/rejected": -174.26467895507812, |
|
"loss": 0.2845, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.617977528089887e-08, |
|
"logits/chosen": -2.770503044128418, |
|
"logits/rejected": -2.7067270278930664, |
|
"logps/chosen": -337.03875732421875, |
|
"logps/rejected": -169.81399536132812, |
|
"loss": 0.2812, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -0.0004154888156335801, |
|
"rewards/margins": -0.0005714126164093614, |
|
"rewards/rejected": 0.00015592378622386605, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1235955056179774e-07, |
|
"logits/chosen": -2.804004192352295, |
|
"logits/rejected": -2.800266742706299, |
|
"logps/chosen": -306.04486083984375, |
|
"logps/rejected": -189.47840881347656, |
|
"loss": 0.2839, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.001905800774693489, |
|
"rewards/margins": 0.003199492348358035, |
|
"rewards/rejected": -0.0012936916900798678, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6853932584269663e-07, |
|
"logits/chosen": -2.7842857837677, |
|
"logits/rejected": -2.7480905055999756, |
|
"logps/chosen": -340.1336669921875, |
|
"logps/rejected": -176.36483764648438, |
|
"loss": 0.2736, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.013033352792263031, |
|
"rewards/margins": 0.02086206153035164, |
|
"rewards/rejected": -0.007828707806766033, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2471910112359549e-07, |
|
"logits/chosen": -2.7320685386657715, |
|
"logits/rejected": -2.694530963897705, |
|
"logps/chosen": -318.46612548828125, |
|
"logps/rejected": -170.09690856933594, |
|
"loss": 0.2779, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.011926891282200813, |
|
"rewards/margins": 0.05010765790939331, |
|
"rewards/rejected": -0.038180768489837646, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8089887640449437e-07, |
|
"logits/chosen": -2.638516902923584, |
|
"logits/rejected": -2.6276156902313232, |
|
"logps/chosen": -326.9003601074219, |
|
"logps/rejected": -187.60606384277344, |
|
"loss": 0.2637, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04087567329406738, |
|
"rewards/margins": 0.14330647885799408, |
|
"rewards/rejected": -0.1024308055639267, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3707865168539325e-07, |
|
"logits/chosen": -2.584765672683716, |
|
"logits/rejected": -2.537776231765747, |
|
"logps/chosen": -350.67724609375, |
|
"logps/rejected": -225.81283569335938, |
|
"loss": 0.2493, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.013082382269203663, |
|
"rewards/margins": 0.19787843525409698, |
|
"rewards/rejected": -0.21096083521842957, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9325842696629214e-07, |
|
"logits/chosen": -2.5709831714630127, |
|
"logits/rejected": -2.5257468223571777, |
|
"logps/chosen": -342.55291748046875, |
|
"logps/rejected": -237.45059204101562, |
|
"loss": 0.2242, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.19872836768627167, |
|
"rewards/margins": 0.2264036238193512, |
|
"rewards/rejected": -0.42513203620910645, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4943820224719097e-07, |
|
"logits/chosen": -2.46580171585083, |
|
"logits/rejected": -2.4357619285583496, |
|
"logps/chosen": -338.7400817871094, |
|
"logps/rejected": -255.5446319580078, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2514013946056366, |
|
"rewards/margins": 0.39069053530693054, |
|
"rewards/rejected": -0.6420919895172119, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999980431020109e-07, |
|
"logits/chosen": -2.492363452911377, |
|
"logits/rejected": -2.4689993858337402, |
|
"logps/chosen": -373.74151611328125, |
|
"logps/rejected": -278.97564697265625, |
|
"loss": 0.1471, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2644743025302887, |
|
"rewards/margins": 0.5433769822120667, |
|
"rewards/rejected": -0.8078513145446777, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997632524101301e-07, |
|
"logits/chosen": -2.5200695991516113, |
|
"logits/rejected": -2.493776798248291, |
|
"logps/chosen": -365.40191650390625, |
|
"logps/rejected": -278.2363586425781, |
|
"loss": 0.1485, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31133976578712463, |
|
"rewards/margins": 0.6167228817939758, |
|
"rewards/rejected": -0.9280625581741333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_logits/chosen": -2.513258934020996, |
|
"eval_logits/rejected": -2.4997854232788086, |
|
"eval_logps/chosen": -313.2470703125, |
|
"eval_logps/rejected": -334.7263488769531, |
|
"eval_loss": 0.18025632202625275, |
|
"eval_rewards/accuracies": 0.640625, |
|
"eval_rewards/chosen": -0.5620743632316589, |
|
"eval_rewards/margins": 0.2116563767194748, |
|
"eval_rewards/rejected": -0.7737306952476501, |
|
"eval_runtime": 53.274, |
|
"eval_samples_per_second": 37.542, |
|
"eval_steps_per_second": 0.601, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.991375032514749e-07, |
|
"logits/chosen": -2.4822776317596436, |
|
"logits/rejected": -2.445827007293701, |
|
"logps/chosen": -388.3408508300781, |
|
"logps/rejected": -346.55419921875, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5192545652389526, |
|
"rewards/margins": 0.6625990867614746, |
|
"rewards/rejected": -1.1818536520004272, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.98121775121344e-07, |
|
"logits/chosen": -2.4391703605651855, |
|
"logits/rejected": -2.4168076515197754, |
|
"logps/chosen": -354.26580810546875, |
|
"logps/rejected": -301.9068298339844, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4692833423614502, |
|
"rewards/margins": 0.6351093053817749, |
|
"rewards/rejected": -1.104392647743225, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.96717657955441e-07, |
|
"logits/chosen": -2.456784963607788, |
|
"logits/rejected": -2.402589797973633, |
|
"logps/chosen": -380.1900939941406, |
|
"logps/rejected": -345.0179443359375, |
|
"loss": 0.1076, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7707170248031616, |
|
"rewards/margins": 0.8249391317367554, |
|
"rewards/rejected": -1.595656156539917, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949273496411216e-07, |
|
"logits/chosen": -2.3544960021972656, |
|
"logits/rejected": -2.34092378616333, |
|
"logps/chosen": -446.9285583496094, |
|
"logps/rejected": -376.6002197265625, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0127520561218262, |
|
"rewards/margins": 1.086138367652893, |
|
"rewards/rejected": -2.098890542984009, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.927536525770046e-07, |
|
"logits/chosen": -2.376620054244995, |
|
"logits/rejected": -2.3480007648468018, |
|
"logps/chosen": -434.1917419433594, |
|
"logps/rejected": -401.83587646484375, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.034598708152771, |
|
"rewards/margins": 0.8842358589172363, |
|
"rewards/rejected": -1.9188346862792969, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901999692863326e-07, |
|
"logits/chosen": -2.378688335418701, |
|
"logits/rejected": -2.3023369312286377, |
|
"logps/chosen": -454.9664611816406, |
|
"logps/rejected": -411.72027587890625, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2886327505111694, |
|
"rewards/margins": 0.9617815017700195, |
|
"rewards/rejected": -2.2504146099090576, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872702970909464e-07, |
|
"logits/chosen": -2.327148914337158, |
|
"logits/rejected": -2.255551338195801, |
|
"logps/chosen": -474.45538330078125, |
|
"logps/rejected": -428.8330078125, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.456198811531067, |
|
"rewards/margins": 0.9602320790290833, |
|
"rewards/rejected": -2.416430950164795, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.839692218542131e-07, |
|
"logits/chosen": -2.3194737434387207, |
|
"logits/rejected": -2.289604902267456, |
|
"logps/chosen": -398.30853271484375, |
|
"logps/rejected": -385.2601013183594, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9385713338851929, |
|
"rewards/margins": 1.0706655979156494, |
|
"rewards/rejected": -2.009237051010132, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.803019108026997e-07, |
|
"logits/chosen": -2.3338797092437744, |
|
"logits/rejected": -2.2716145515441895, |
|
"logps/chosen": -423.6573181152344, |
|
"logps/rejected": -402.0962219238281, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.050208568572998, |
|
"rewards/margins": 0.8981093168258667, |
|
"rewards/rejected": -1.9483178853988647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7627410443782887e-07, |
|
"logits/chosen": -2.3091745376586914, |
|
"logits/rejected": -2.2324705123901367, |
|
"logps/chosen": -481.0274353027344, |
|
"logps/rejected": -428.9169921875, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2154655456542969, |
|
"rewards/margins": 1.0429514646530151, |
|
"rewards/rejected": -2.2584171295166016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -2.2729034423828125, |
|
"eval_logits/rejected": -2.2396340370178223, |
|
"eval_logps/chosen": -431.05743408203125, |
|
"eval_logps/rejected": -490.1517639160156, |
|
"eval_loss": 0.06622401624917984, |
|
"eval_rewards/accuracies": 0.6796875, |
|
"eval_rewards/chosen": -1.7401777505874634, |
|
"eval_rewards/margins": 0.5878072381019592, |
|
"eval_rewards/rejected": -2.3279850482940674, |
|
"eval_runtime": 53.2522, |
|
"eval_samples_per_second": 37.557, |
|
"eval_steps_per_second": 0.601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7189210755018034e-07, |
|
"logits/chosen": -2.2162270545959473, |
|
"logits/rejected": -2.109973430633545, |
|
"logps/chosen": -463.5928649902344, |
|
"logps/rejected": -448.2275390625, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.341931939125061, |
|
"rewards/margins": 1.1762769222259521, |
|
"rewards/rejected": -2.5182089805603027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671627793504988e-07, |
|
"logits/chosen": -2.222045421600342, |
|
"logits/rejected": -2.127629280090332, |
|
"logps/chosen": -493.30133056640625, |
|
"logps/rejected": -467.193115234375, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.4817438125610352, |
|
"rewards/margins": 1.220350980758667, |
|
"rewards/rejected": -2.702094554901123, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6209352273286095e-07, |
|
"logits/chosen": -2.233363628387451, |
|
"logits/rejected": -2.184459686279297, |
|
"logps/chosen": -456.435302734375, |
|
"logps/rejected": -444.67449951171875, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.346243143081665, |
|
"rewards/margins": 1.1588122844696045, |
|
"rewards/rejected": -2.5050556659698486, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.56692272686805e-07, |
|
"logits/chosen": -2.227086067199707, |
|
"logits/rejected": -2.1657800674438477, |
|
"logps/chosen": -463.6180114746094, |
|
"logps/rejected": -463.9268493652344, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6306241750717163, |
|
"rewards/margins": 1.0510774850845337, |
|
"rewards/rejected": -2.68170166015625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5096748387656326e-07, |
|
"logits/chosen": -2.2646913528442383, |
|
"logits/rejected": -2.1847498416900635, |
|
"logps/chosen": -526.8988647460938, |
|
"logps/rejected": -455.5740661621094, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5727968215942383, |
|
"rewards/margins": 1.1092311143875122, |
|
"rewards/rejected": -2.68202805519104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4492811740683877e-07, |
|
"logits/chosen": -2.272037982940674, |
|
"logits/rejected": -2.1871819496154785, |
|
"logps/chosen": -510.32061767578125, |
|
"logps/rejected": -452.8402404785156, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5913618803024292, |
|
"rewards/margins": 1.0165178775787354, |
|
"rewards/rejected": -2.607879400253296, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3858362679584354e-07, |
|
"logits/chosen": -2.17218017578125, |
|
"logits/rejected": -2.114896774291992, |
|
"logps/chosen": -487.0856018066406, |
|
"logps/rejected": -454.93115234375, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7943446636199951, |
|
"rewards/margins": 0.9125874638557434, |
|
"rewards/rejected": -2.706932544708252, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3194394317755245e-07, |
|
"logits/chosen": -2.1766180992126465, |
|
"logits/rejected": -2.0868289470672607, |
|
"logps/chosen": -574.8035888671875, |
|
"logps/rejected": -496.2310485839844, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0265350341796875, |
|
"rewards/margins": 1.1947269439697266, |
|
"rewards/rejected": -3.221261501312256, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2501945975633914e-07, |
|
"logits/chosen": -2.130429744720459, |
|
"logits/rejected": -2.0812618732452393, |
|
"logps/chosen": -565.4262084960938, |
|
"logps/rejected": -521.2449951171875, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.1656954288482666, |
|
"rewards/margins": 1.03809654712677, |
|
"rewards/rejected": -3.203791856765747, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1782101553832405e-07, |
|
"logits/chosen": -2.1119463443756104, |
|
"logits/rejected": -2.0517754554748535, |
|
"logps/chosen": -525.1202392578125, |
|
"logps/rejected": -516.9113159179688, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1311917304992676, |
|
"rewards/margins": 1.1441915035247803, |
|
"rewards/rejected": -3.2753829956054688, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -2.138880968093872, |
|
"eval_logits/rejected": -2.1100966930389404, |
|
"eval_logps/chosen": -494.104736328125, |
|
"eval_logps/rejected": -555.0247802734375, |
|
"eval_loss": 0.04939676821231842, |
|
"eval_rewards/accuracies": 0.6953125, |
|
"eval_rewards/chosen": -2.37065052986145, |
|
"eval_rewards/margins": 0.606063961982727, |
|
"eval_rewards/rejected": -2.976714611053467, |
|
"eval_runtime": 53.3026, |
|
"eval_samples_per_second": 37.522, |
|
"eval_steps_per_second": 0.6, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103598783649029e-07, |
|
"logits/chosen": -2.1108057498931885, |
|
"logits/rejected": -2.0741026401519775, |
|
"logps/chosen": -521.8655395507812, |
|
"logps/rejected": -494.017578125, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8981420993804932, |
|
"rewards/margins": 0.9213889837265015, |
|
"rewards/rejected": -2.819530963897705, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.026477272750119e-07, |
|
"logits/chosen": -2.195026159286499, |
|
"logits/rejected": -2.128283977508545, |
|
"logps/chosen": -519.8826904296875, |
|
"logps/rejected": -489.1722717285156, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8402801752090454, |
|
"rewards/margins": 1.171037197113037, |
|
"rewards/rejected": -3.011317491531372, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9469663422373864e-07, |
|
"logits/chosen": -2.1483047008514404, |
|
"logits/rejected": -2.095182418823242, |
|
"logps/chosen": -515.9031372070312, |
|
"logps/rejected": -511.58416748046875, |
|
"loss": 0.0362, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.987370252609253, |
|
"rewards/margins": 1.1986653804779053, |
|
"rewards/rejected": -3.186035633087158, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.865190451858954e-07, |
|
"logits/chosen": -2.1236190795898438, |
|
"logits/rejected": -2.038846492767334, |
|
"logps/chosen": -567.5887451171875, |
|
"logps/rejected": -531.2391357421875, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.1050643920898438, |
|
"rewards/margins": 1.3257628679275513, |
|
"rewards/rejected": -3.4308273792266846, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.781277606741327e-07, |
|
"logits/chosen": -2.073742628097534, |
|
"logits/rejected": -2.0120556354522705, |
|
"logps/chosen": -546.9391479492188, |
|
"logps/rejected": -522.168701171875, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.975027084350586, |
|
"rewards/margins": 1.3364970684051514, |
|
"rewards/rejected": -3.311523914337158, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6953591570208996e-07, |
|
"logits/chosen": -2.0751125812530518, |
|
"logits/rejected": -1.9913240671157837, |
|
"logps/chosen": -532.4324951171875, |
|
"logps/rejected": -570.5128173828125, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.149209976196289, |
|
"rewards/margins": 1.5435600280761719, |
|
"rewards/rejected": -3.692770004272461, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607569592239452e-07, |
|
"logits/chosen": -2.0640273094177246, |
|
"logits/rejected": -1.9882128238677979, |
|
"logps/chosen": -538.2806396484375, |
|
"logps/rejected": -503.5306701660156, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8893718719482422, |
|
"rewards/margins": 1.269718050956726, |
|
"rewards/rejected": -3.159090042114258, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.518046330825494e-07, |
|
"logits/chosen": -2.1525139808654785, |
|
"logits/rejected": -2.09255051612854, |
|
"logps/chosen": -541.6168823242188, |
|
"logps/rejected": -525.7049560546875, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.759751319885254, |
|
"rewards/margins": 1.318419098854065, |
|
"rewards/rejected": -3.0781702995300293, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4269295049909713e-07, |
|
"logits/chosen": -2.111579179763794, |
|
"logits/rejected": -2.008594512939453, |
|
"logps/chosen": -519.3077392578125, |
|
"logps/rejected": -495.54736328125, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9454580545425415, |
|
"rewards/margins": 1.1256051063537598, |
|
"rewards/rejected": -3.0710630416870117, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3343617413800453e-07, |
|
"logits/chosen": -2.116332530975342, |
|
"logits/rejected": -2.0108766555786133, |
|
"logps/chosen": -560.86083984375, |
|
"logps/rejected": -513.68408203125, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.8742320537567139, |
|
"rewards/margins": 1.4719246625900269, |
|
"rewards/rejected": -3.346156597137451, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -2.0799078941345215, |
|
"eval_logits/rejected": -2.042891502380371, |
|
"eval_logps/chosen": -499.7916259765625, |
|
"eval_logps/rejected": -568.111572265625, |
|
"eval_loss": 0.052323117852211, |
|
"eval_rewards/accuracies": 0.703125, |
|
"eval_rewards/chosen": -2.4275197982788086, |
|
"eval_rewards/margins": 0.6800626516342163, |
|
"eval_rewards/rejected": -3.1075825691223145, |
|
"eval_runtime": 53.2431, |
|
"eval_samples_per_second": 37.564, |
|
"eval_steps_per_second": 0.601, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2404879378132893e-07, |
|
"logits/chosen": -2.1349825859069824, |
|
"logits/rejected": -2.0403599739074707, |
|
"logps/chosen": -573.2166137695312, |
|
"logps/rejected": -546.21484375, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9816631078720093, |
|
"rewards/margins": 1.3866467475891113, |
|
"rewards/rejected": -3.3683102130889893, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1454550364767894e-07, |
|
"logits/chosen": -2.0241355895996094, |
|
"logits/rejected": -1.955384612083435, |
|
"logps/chosen": -587.6964721679688, |
|
"logps/rejected": -564.29833984375, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.4298739433288574, |
|
"rewards/margins": 1.2927566766738892, |
|
"rewards/rejected": -3.722630262374878, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.049411793911154e-07, |
|
"logits/chosen": -2.0459389686584473, |
|
"logits/rejected": -1.9516912698745728, |
|
"logps/chosen": -587.4751586914062, |
|
"logps/rejected": -571.4771728515625, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2811179161071777, |
|
"rewards/margins": 1.313342571258545, |
|
"rewards/rejected": -3.5944607257843018, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9525085481604914e-07, |
|
"logits/chosen": -2.076460361480713, |
|
"logits/rejected": -2.008845090866089, |
|
"logps/chosen": -501.68743896484375, |
|
"logps/rejected": -492.162841796875, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8489151000976562, |
|
"rewards/margins": 1.1866779327392578, |
|
"rewards/rejected": -3.035592794418335, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.854896983445833e-07, |
|
"logits/chosen": -2.0890183448791504, |
|
"logits/rejected": -1.99982488155365, |
|
"logps/chosen": -555.748291015625, |
|
"logps/rejected": -527.487060546875, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0640885829925537, |
|
"rewards/margins": 1.2410598993301392, |
|
"rewards/rejected": -3.3051486015319824, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7567298927313654e-07, |
|
"logits/chosen": -2.0581395626068115, |
|
"logits/rejected": -1.9576694965362549, |
|
"logps/chosen": -517.2969970703125, |
|
"logps/rejected": -494.01580810546875, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7231872081756592, |
|
"rewards/margins": 1.3869811296463013, |
|
"rewards/rejected": -3.110168695449829, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.658160938555123e-07, |
|
"logits/chosen": -2.0949597358703613, |
|
"logits/rejected": -1.978247046470642, |
|
"logps/chosen": -530.58642578125, |
|
"logps/rejected": -518.8834838867188, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8325612545013428, |
|
"rewards/margins": 1.4025018215179443, |
|
"rewards/rejected": -3.235063076019287, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.559344412498532e-07, |
|
"logits/chosen": -2.0444254875183105, |
|
"logits/rejected": -1.958059310913086, |
|
"logps/chosen": -538.9998779296875, |
|
"logps/rejected": -531.4126586914062, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.9186712503433228, |
|
"rewards/margins": 1.5285937786102295, |
|
"rewards/rejected": -3.447265148162842, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.460434993671294e-07, |
|
"logits/chosen": -2.060708522796631, |
|
"logits/rejected": -1.9541358947753906, |
|
"logps/chosen": -524.92431640625, |
|
"logps/rejected": -490.29669189453125, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.956221342086792, |
|
"rewards/margins": 1.1774486303329468, |
|
"rewards/rejected": -3.1336700916290283, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.361587506589672e-07, |
|
"logits/chosen": -2.0169599056243896, |
|
"logits/rejected": -1.9417072534561157, |
|
"logps/chosen": -512.1517333984375, |
|
"logps/rejected": -506.2262268066406, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9970229864120483, |
|
"rewards/margins": 1.3175480365753174, |
|
"rewards/rejected": -3.314570665359497, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -2.0455641746520996, |
|
"eval_logits/rejected": -2.005706310272217, |
|
"eval_logps/chosen": -497.6726989746094, |
|
"eval_logps/rejected": -580.1129150390625, |
|
"eval_loss": 0.04609997943043709, |
|
"eval_rewards/accuracies": 0.71484375, |
|
"eval_rewards/chosen": -2.4063305854797363, |
|
"eval_rewards/margins": 0.8212659358978271, |
|
"eval_rewards/rejected": -3.2275965213775635, |
|
"eval_runtime": 53.2473, |
|
"eval_samples_per_second": 37.561, |
|
"eval_steps_per_second": 0.601, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2629566788271613e-07, |
|
"logits/chosen": -2.0371432304382324, |
|
"logits/rejected": -1.9557580947875977, |
|
"logps/chosen": -560.0842895507812, |
|
"logps/rejected": -532.668212890625, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.9359846115112305, |
|
"rewards/margins": 1.3490617275238037, |
|
"rewards/rejected": -3.285046339035034, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1646968988169135e-07, |
|
"logits/chosen": -2.011871814727783, |
|
"logits/rejected": -1.9335308074951172, |
|
"logps/chosen": -507.52337646484375, |
|
"logps/rejected": -516.07958984375, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9623944759368896, |
|
"rewards/margins": 1.2198023796081543, |
|
"rewards/rejected": -3.182196855545044, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0669619741850232e-07, |
|
"logits/chosen": -1.9674116373062134, |
|
"logits/rejected": -1.874136209487915, |
|
"logps/chosen": -560.131591796875, |
|
"logps/rejected": -497.582275390625, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1217665672302246, |
|
"rewards/margins": 1.2892667055130005, |
|
"rewards/rejected": -3.4110336303710938, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9699048909929518e-07, |
|
"logits/chosen": -1.9731553792953491, |
|
"logits/rejected": -1.9132474660873413, |
|
"logps/chosen": -545.8017578125, |
|
"logps/rejected": -487.0414123535156, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1676135063171387, |
|
"rewards/margins": 1.0356405973434448, |
|
"rewards/rejected": -3.203253984451294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8736775742659732e-07, |
|
"logits/chosen": -1.9223613739013672, |
|
"logits/rejected": -1.8391857147216797, |
|
"logps/chosen": -517.1666870117188, |
|
"logps/rejected": -498.1993713378906, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.948054313659668, |
|
"rewards/margins": 1.2562531232833862, |
|
"rewards/rejected": -3.2043070793151855, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7784306501824616e-07, |
|
"logits/chosen": -1.9654014110565186, |
|
"logits/rejected": -1.8999271392822266, |
|
"logps/chosen": -574.9034423828125, |
|
"logps/rejected": -553.96240234375, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0827620029449463, |
|
"rewards/margins": 1.3543148040771484, |
|
"rewards/rejected": -3.4370765686035156, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6843132102963025e-07, |
|
"logits/chosen": -1.9901962280273438, |
|
"logits/rejected": -1.8920910358428955, |
|
"logps/chosen": -571.3399658203125, |
|
"logps/rejected": -549.0781860351562, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1259512901306152, |
|
"rewards/margins": 1.4393842220306396, |
|
"rewards/rejected": -3.565335512161255, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.591472578161458e-07, |
|
"logits/chosen": -2.0036253929138184, |
|
"logits/rejected": -1.9164316654205322, |
|
"logps/chosen": -597.6680908203125, |
|
"logps/rejected": -563.1033935546875, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.2230658531188965, |
|
"rewards/margins": 1.4771394729614258, |
|
"rewards/rejected": -3.7002053260803223, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5000540787240274e-07, |
|
"logits/chosen": -1.9651165008544922, |
|
"logits/rejected": -1.9028050899505615, |
|
"logps/chosen": -573.396728515625, |
|
"logps/rejected": -580.0013427734375, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4309842586517334, |
|
"rewards/margins": 1.3270018100738525, |
|
"rewards/rejected": -3.757986068725586, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.410200810842749e-07, |
|
"logits/chosen": -1.9549171924591064, |
|
"logits/rejected": -1.8664356470108032, |
|
"logps/chosen": -546.6082153320312, |
|
"logps/rejected": -557.6231689453125, |
|
"loss": 0.0273, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.497020959854126, |
|
"rewards/margins": 1.3337533473968506, |
|
"rewards/rejected": -3.8307743072509766, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.9557578563690186, |
|
"eval_logits/rejected": -1.9161585569381714, |
|
"eval_logps/chosen": -541.6861572265625, |
|
"eval_logps/rejected": -628.8740844726562, |
|
"eval_loss": 0.04087229445576668, |
|
"eval_rewards/accuracies": 0.70703125, |
|
"eval_rewards/chosen": -2.8464653491973877, |
|
"eval_rewards/margins": 0.8687426447868347, |
|
"eval_rewards/rejected": -3.715208053588867, |
|
"eval_runtime": 53.2271, |
|
"eval_samples_per_second": 37.575, |
|
"eval_steps_per_second": 0.601, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.322053423294041e-07, |
|
"logits/chosen": -1.9264421463012695, |
|
"logits/rejected": -1.8596795797348022, |
|
"logps/chosen": -555.9921875, |
|
"logps/rejected": -528.2036743164062, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3642823696136475, |
|
"rewards/margins": 1.1525689363479614, |
|
"rewards/rejected": -3.5168514251708984, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2357498946121905e-07, |
|
"logits/chosen": -2.047217845916748, |
|
"logits/rejected": -1.9547252655029297, |
|
"logps/chosen": -573.0296630859375, |
|
"logps/rejected": -531.27880859375, |
|
"loss": 0.04, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1570980548858643, |
|
"rewards/margins": 1.236627459526062, |
|
"rewards/rejected": -3.393725633621216, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1514253171093161e-07, |
|
"logits/chosen": -2.0023763179779053, |
|
"logits/rejected": -1.9147300720214844, |
|
"logps/chosen": -558.9900512695312, |
|
"logps/rejected": -542.7515869140625, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.013390064239502, |
|
"rewards/margins": 1.3009288311004639, |
|
"rewards/rejected": -3.314318895339966, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0692116854131883e-07, |
|
"logits/chosen": -1.9406598806381226, |
|
"logits/rejected": -1.8927663564682007, |
|
"logps/chosen": -502.59344482421875, |
|
"logps/rejected": -507.50372314453125, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.104419231414795, |
|
"rewards/margins": 1.1473711729049683, |
|
"rewards/rejected": -3.251791000366211, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.89237689853889e-08, |
|
"logits/chosen": -1.9217647314071655, |
|
"logits/rejected": -1.840735673904419, |
|
"logps/chosen": -539.6429443359375, |
|
"logps/rejected": -520.7713623046875, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0584945678710938, |
|
"rewards/margins": 1.4084278345108032, |
|
"rewards/rejected": -3.4669222831726074, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.11628515022765e-08, |
|
"logits/chosen": -1.936431884765625, |
|
"logits/rejected": -1.8774102926254272, |
|
"logps/chosen": -507.42413330078125, |
|
"logps/rejected": -489.027099609375, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9116840362548828, |
|
"rewards/margins": 1.3113796710968018, |
|
"rewards/rejected": -3.2230639457702637, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.365056438189486e-08, |
|
"logits/chosen": -1.9381834268569946, |
|
"logits/rejected": -1.8834428787231445, |
|
"logps/chosen": -545.0704345703125, |
|
"logps/rejected": -511.5694885253906, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.27673602104187, |
|
"rewards/margins": 1.0493533611297607, |
|
"rewards/rejected": -3.326089382171631, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.639866672902101e-08, |
|
"logits/chosen": -1.9358975887298584, |
|
"logits/rejected": -1.8624534606933594, |
|
"logps/chosen": -552.2891845703125, |
|
"logps/rejected": -548.515625, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3348231315612793, |
|
"rewards/margins": 1.1951119899749756, |
|
"rewards/rejected": -3.529935121536255, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.941851005657851e-08, |
|
"logits/chosen": -1.9736747741699219, |
|
"logits/rejected": -1.8825995922088623, |
|
"logps/chosen": -592.0359497070312, |
|
"logps/rejected": -565.7269897460938, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2588744163513184, |
|
"rewards/margins": 1.3530534505844116, |
|
"rewards/rejected": -3.6119277477264404, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.272102051693051e-08, |
|
"logits/chosen": -1.9536447525024414, |
|
"logits/rejected": -1.8848438262939453, |
|
"logps/chosen": -544.1442260742188, |
|
"logps/rejected": -547.0964965820312, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0580058097839355, |
|
"rewards/margins": 1.3238760232925415, |
|
"rewards/rejected": -3.3818821907043457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.967289686203003, |
|
"eval_logits/rejected": -1.9273663759231567, |
|
"eval_logps/chosen": -510.2101745605469, |
|
"eval_logps/rejected": -594.1712036132812, |
|
"eval_loss": 0.0495593398809433, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -2.531705617904663, |
|
"eval_rewards/margins": 0.836473286151886, |
|
"eval_rewards/rejected": -3.3681788444519043, |
|
"eval_runtime": 53.2631, |
|
"eval_samples_per_second": 37.549, |
|
"eval_steps_per_second": 0.601, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.6316681798995844e-08, |
|
"logits/chosen": -1.9590046405792236, |
|
"logits/rejected": -1.8676058053970337, |
|
"logps/chosen": -530.419921875, |
|
"logps/rejected": -494.63720703125, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.950580358505249, |
|
"rewards/margins": 1.374487280845642, |
|
"rewards/rejected": -3.3250679969787598, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.0215518717961256e-08, |
|
"logits/chosen": -1.9477427005767822, |
|
"logits/rejected": -1.9216333627700806, |
|
"logps/chosen": -568.46337890625, |
|
"logps/rejected": -551.7440185546875, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0898067951202393, |
|
"rewards/margins": 1.2489385604858398, |
|
"rewards/rejected": -3.3387451171875, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4427081523275925e-08, |
|
"logits/chosen": -1.9811092615127563, |
|
"logits/rejected": -1.8924198150634766, |
|
"logps/chosen": -560.4685668945312, |
|
"logps/rejected": -545.5777587890625, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.166611433029175, |
|
"rewards/margins": 1.3478820323944092, |
|
"rewards/rejected": -3.514493465423584, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.896043094949061e-08, |
|
"logits/chosen": -1.952548623085022, |
|
"logits/rejected": -1.9003283977508545, |
|
"logps/chosen": -536.4952392578125, |
|
"logps/rejected": -554.130859375, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2593836784362793, |
|
"rewards/margins": 1.1702790260314941, |
|
"rewards/rejected": -3.4296627044677734, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3824124033343557e-08, |
|
"logits/chosen": -1.978271245956421, |
|
"logits/rejected": -1.8869798183441162, |
|
"logps/chosen": -595.8272094726562, |
|
"logps/rejected": -596.80908203125, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2419838905334473, |
|
"rewards/margins": 1.5834529399871826, |
|
"rewards/rejected": -3.825437068939209, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9026200719291904e-08, |
|
"logits/chosen": -1.9425359964370728, |
|
"logits/rejected": -1.8644979000091553, |
|
"logps/chosen": -541.2769775390625, |
|
"logps/rejected": -546.4044799804688, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3050169944763184, |
|
"rewards/margins": 1.2533900737762451, |
|
"rewards/rejected": -3.5584073066711426, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4574171274456433e-08, |
|
"logits/chosen": -1.9859609603881836, |
|
"logits/rejected": -1.8982467651367188, |
|
"logps/chosen": -581.1209716796875, |
|
"logps/rejected": -553.73291015625, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.9945024251937866, |
|
"rewards/margins": 1.5215613842010498, |
|
"rewards/rejected": -3.516064167022705, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.047500453267881e-08, |
|
"logits/chosen": -1.9515501260757446, |
|
"logits/rejected": -1.857346534729004, |
|
"logps/chosen": -565.3553466796875, |
|
"logps/rejected": -536.8668823242188, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.2416954040527344, |
|
"rewards/margins": 1.2734299898147583, |
|
"rewards/rejected": -3.515125274658203, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.673511698609292e-08, |
|
"logits/chosen": -1.9258711338043213, |
|
"logits/rejected": -1.8735402822494507, |
|
"logps/chosen": -532.5035400390625, |
|
"logps/rejected": -557.9683837890625, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2362167835235596, |
|
"rewards/margins": 1.4313932657241821, |
|
"rewards/rejected": -3.6676101684570312, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3360362741285769e-08, |
|
"logits/chosen": -1.9208987951278687, |
|
"logits/rejected": -1.8273556232452393, |
|
"logps/chosen": -566.8057861328125, |
|
"logps/rejected": -583.4982299804688, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.093482732772827, |
|
"rewards/margins": 1.81829035282135, |
|
"rewards/rejected": -3.911773681640625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/chosen": -1.9501041173934937, |
|
"eval_logits/rejected": -1.9090631008148193, |
|
"eval_logps/chosen": -521.0439453125, |
|
"eval_logps/rejected": -606.3505249023438, |
|
"eval_loss": 0.04646110162138939, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -2.640043258666992, |
|
"eval_rewards/margins": 0.8499288558959961, |
|
"eval_rewards/rejected": -3.4899723529815674, |
|
"eval_runtime": 53.2044, |
|
"eval_samples_per_second": 37.591, |
|
"eval_steps_per_second": 0.601, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0356024355769433e-08, |
|
"logits/chosen": -1.950596570968628, |
|
"logits/rejected": -1.882962942123413, |
|
"logps/chosen": -582.3919067382812, |
|
"logps/rejected": -592.8079223632812, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.399203062057495, |
|
"rewards/margins": 1.3655624389648438, |
|
"rewards/rejected": -3.7647652626037598, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.726804569108597e-09, |
|
"logits/chosen": -1.9748871326446533, |
|
"logits/rejected": -1.888811707496643, |
|
"logps/chosen": -553.1395263671875, |
|
"logps/rejected": -559.5152587890625, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.228854179382324, |
|
"rewards/margins": 1.4253013134002686, |
|
"rewards/rejected": -3.654155731201172, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.476818941645561e-09, |
|
"logits/chosen": -1.944598913192749, |
|
"logits/rejected": -1.8884025812149048, |
|
"logps/chosen": -540.9061279296875, |
|
"logps/rejected": -574.0789794921875, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.298133134841919, |
|
"rewards/margins": 1.5589947700500488, |
|
"rewards/rejected": -3.8571276664733887, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.609589412347347e-09, |
|
"logits/chosen": -1.995321273803711, |
|
"logits/rejected": -1.909641981124878, |
|
"logps/chosen": -570.28369140625, |
|
"logps/rejected": -541.8267211914062, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.329395055770874, |
|
"rewards/margins": 1.3247489929199219, |
|
"rewards/rejected": -3.654143810272217, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1280387858572667e-09, |
|
"logits/chosen": -1.9664127826690674, |
|
"logits/rejected": -1.8978990316390991, |
|
"logps/chosen": -567.4363403320312, |
|
"logps/rejected": -551.3905639648438, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.342649459838867, |
|
"rewards/margins": 1.2894551753997803, |
|
"rewards/rejected": -3.6321043968200684, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.03448615738172e-09, |
|
"logits/chosen": -1.91958749294281, |
|
"logits/rejected": -1.8549039363861084, |
|
"logps/chosen": -635.5400390625, |
|
"logps/rejected": -613.4205322265625, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.2879905700683594, |
|
"rewards/margins": 1.6663434505462646, |
|
"rewards/rejected": -3.954333782196045, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3064328257259575e-10, |
|
"logits/chosen": -1.9804208278656006, |
|
"logits/rejected": -1.891332983970642, |
|
"logps/chosen": -612.157958984375, |
|
"logps/rejected": -556.913818359375, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3392751216888428, |
|
"rewards/margins": 1.2889056205749512, |
|
"rewards/rejected": -3.628180980682373, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7611898088715216e-11, |
|
"logits/chosen": -1.952614426612854, |
|
"logits/rejected": -1.846652626991272, |
|
"logps/chosen": -577.5618896484375, |
|
"logps/rejected": -596.773193359375, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.3046746253967285, |
|
"rewards/margins": 1.4788968563079834, |
|
"rewards/rejected": -3.783571720123291, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 883, |
|
"total_flos": 0.0, |
|
"train_loss": 0.06580274857555349, |
|
"train_runtime": 7965.4661, |
|
"train_samples_per_second": 14.19, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 883, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|