|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988623435722411, |
|
"eval_steps": 10000000, |
|
"global_step": 439, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 120.07926705665244, |
|
"learning_rate": 2.2727272727272727e-09, |
|
"logits/chosen": -1.6768856048583984, |
|
"logits/rejected": -1.7259055376052856, |
|
"logps/chosen": -394.9654541015625, |
|
"logps/rejected": -320.0859069824219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 160.8036403422587, |
|
"learning_rate": 2.2727272727272725e-08, |
|
"logits/chosen": -1.703018069267273, |
|
"logits/rejected": -1.6685585975646973, |
|
"logps/chosen": -429.5360412597656, |
|
"logps/rejected": -403.7555236816406, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0004270696663297713, |
|
"rewards/margins": -0.0031983989756554365, |
|
"rewards/rejected": 0.00277132960036397, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 241.66358094434165, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -1.780827283859253, |
|
"logits/rejected": -1.7355620861053467, |
|
"logps/chosen": -442.1951599121094, |
|
"logps/rejected": -401.2707214355469, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.002479883376508951, |
|
"rewards/margins": 0.002258532214909792, |
|
"rewards/rejected": -0.004738415591418743, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 160.69914583171965, |
|
"learning_rate": 6.818181818181817e-08, |
|
"logits/chosen": -1.7514013051986694, |
|
"logits/rejected": -1.6820430755615234, |
|
"logps/chosen": -439.64373779296875, |
|
"logps/rejected": -401.2854919433594, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0074474625289440155, |
|
"rewards/margins": 0.054817844182252884, |
|
"rewards/rejected": -0.04737037047743797, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 144.27361385062915, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -1.7552951574325562, |
|
"logits/rejected": -1.6888301372528076, |
|
"logps/chosen": -428.416015625, |
|
"logps/rejected": -383.068603515625, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1923234909772873, |
|
"rewards/margins": 0.1899619996547699, |
|
"rewards/rejected": 0.0023614875972270966, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 115.83588190821067, |
|
"learning_rate": 9.994307990108962e-08, |
|
"logits/chosen": -1.7498763799667358, |
|
"logits/rejected": -1.6867773532867432, |
|
"logps/chosen": -437.7354431152344, |
|
"logps/rejected": -383.8248596191406, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5490495562553406, |
|
"rewards/margins": 0.33619847893714905, |
|
"rewards/rejected": 0.2128511667251587, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 111.21906155896082, |
|
"learning_rate": 9.959570405988094e-08, |
|
"logits/chosen": -1.8246532678604126, |
|
"logits/rejected": -1.7464463710784912, |
|
"logps/chosen": -379.9985046386719, |
|
"logps/rejected": -352.2860412597656, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.7562192678451538, |
|
"rewards/margins": 0.2702825665473938, |
|
"rewards/rejected": 0.4859367311000824, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 114.01926694263507, |
|
"learning_rate": 9.893476820924666e-08, |
|
"logits/chosen": -1.9211199283599854, |
|
"logits/rejected": -1.8387491703033447, |
|
"logps/chosen": -412.0660095214844, |
|
"logps/rejected": -374.1966247558594, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.0015352964401245, |
|
"rewards/margins": 0.3862503468990326, |
|
"rewards/rejected": 0.6152850389480591, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 143.95597429946028, |
|
"learning_rate": 9.796445099843647e-08, |
|
"logits/chosen": -1.9127334356307983, |
|
"logits/rejected": -1.8274517059326172, |
|
"logps/chosen": -424.8004455566406, |
|
"logps/rejected": -390.2203063964844, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.9376031160354614, |
|
"rewards/margins": 0.4474514126777649, |
|
"rewards/rejected": 0.4901517331600189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 126.42198014014429, |
|
"learning_rate": 9.669088708527066e-08, |
|
"logits/chosen": -1.8573029041290283, |
|
"logits/rejected": -1.7926852703094482, |
|
"logps/chosen": -435.7537536621094, |
|
"logps/rejected": -401.1055908203125, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.8144777417182922, |
|
"rewards/margins": 0.4378415048122406, |
|
"rewards/rejected": 0.376636266708374, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 132.16216232500156, |
|
"learning_rate": 9.512212835085849e-08, |
|
"logits/chosen": -1.898048758506775, |
|
"logits/rejected": -1.813433051109314, |
|
"logps/chosen": -413.4141540527344, |
|
"logps/rejected": -398.70526123046875, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.6169204115867615, |
|
"rewards/margins": 0.5242881178855896, |
|
"rewards/rejected": 0.09263229370117188, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 112.13374239362014, |
|
"learning_rate": 9.326809299301306e-08, |
|
"logits/chosen": -1.89919114112854, |
|
"logits/rejected": -1.7970205545425415, |
|
"logps/chosen": -454.9283142089844, |
|
"logps/rejected": -405.9847717285156, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.4878689646720886, |
|
"rewards/margins": 0.6205599308013916, |
|
"rewards/rejected": -0.1326909363269806, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 113.80543806203703, |
|
"learning_rate": 9.114050282021158e-08, |
|
"logits/chosen": -1.8961639404296875, |
|
"logits/rejected": -1.836488127708435, |
|
"logps/chosen": -453.51898193359375, |
|
"logps/rejected": -419.9806213378906, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4265638291835785, |
|
"rewards/margins": 0.6399323344230652, |
|
"rewards/rejected": -0.21336853504180908, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 122.38937390797612, |
|
"learning_rate": 8.875280914254802e-08, |
|
"logits/chosen": -1.9070549011230469, |
|
"logits/rejected": -1.8174508810043335, |
|
"logps/chosen": -408.0248107910156, |
|
"logps/rejected": -365.46185302734375, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.505743145942688, |
|
"rewards/margins": 0.6783953309059143, |
|
"rewards/rejected": -0.17265217006206512, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 104.81942476289146, |
|
"learning_rate": 8.612010772821971e-08, |
|
"logits/chosen": -1.9492871761322021, |
|
"logits/rejected": -1.903029203414917, |
|
"logps/chosen": -463.1678771972656, |
|
"logps/rejected": -408.36138916015625, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.7421985268592834, |
|
"rewards/margins": 0.7196205854415894, |
|
"rewards/rejected": 0.02257799357175827, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 130.65997070751789, |
|
"learning_rate": 8.325904336322055e-08, |
|
"logits/chosen": -1.9297358989715576, |
|
"logits/rejected": -1.872240424156189, |
|
"logps/chosen": -408.3719482421875, |
|
"logps/rejected": -374.2057189941406, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6860557198524475, |
|
"rewards/margins": 0.6311514973640442, |
|
"rewards/rejected": 0.05490417033433914, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 100.26257755554322, |
|
"learning_rate": 8.01877046176447e-08, |
|
"logits/chosen": -1.8793761730194092, |
|
"logits/rejected": -1.8072710037231445, |
|
"logps/chosen": -399.7383728027344, |
|
"logps/rejected": -373.10791015625, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.4368124008178711, |
|
"rewards/margins": 0.6604553461074829, |
|
"rewards/rejected": -0.22364301979541779, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 89.3633959368459, |
|
"learning_rate": 7.692550948392249e-08, |
|
"logits/chosen": -1.9449889659881592, |
|
"logits/rejected": -1.8793401718139648, |
|
"logps/chosen": -430.8158264160156, |
|
"logps/rejected": -383.00640869140625, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5035308599472046, |
|
"rewards/margins": 0.5975342988967896, |
|
"rewards/rejected": -0.09400341659784317, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 103.70599270965509, |
|
"learning_rate": 7.349308261002021e-08, |
|
"logits/chosen": -1.9056031703948975, |
|
"logits/rejected": -1.8477048873901367, |
|
"logps/chosen": -437.502197265625, |
|
"logps/rejected": -403.87762451171875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6943656206130981, |
|
"rewards/margins": 0.6242468357086182, |
|
"rewards/rejected": 0.07011876255273819, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 84.20282962041543, |
|
"learning_rate": 6.991212490377531e-08, |
|
"logits/chosen": -1.9574388265609741, |
|
"logits/rejected": -1.906002402305603, |
|
"logps/chosen": -464.6742248535156, |
|
"logps/rejected": -417.5992126464844, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6871397495269775, |
|
"rewards/margins": 0.8371666669845581, |
|
"rewards/rejected": -0.15002694725990295, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 166.8394060310626, |
|
"learning_rate": 6.620527633276978e-08, |
|
"logits/chosen": -1.8844044208526611, |
|
"logits/rejected": -1.8095699548721313, |
|
"logps/chosen": -425.5460510253906, |
|
"logps/rejected": -407.5855712890625, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5698906779289246, |
|
"rewards/margins": 0.8980382680892944, |
|
"rewards/rejected": -0.3281475901603699, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 100.6332916667586, |
|
"learning_rate": 6.239597278716581e-08, |
|
"logits/chosen": -1.9531447887420654, |
|
"logits/rejected": -1.8929874897003174, |
|
"logps/chosen": -414.9901428222656, |
|
"logps/rejected": -375.222412109375, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.5068634748458862, |
|
"rewards/margins": 0.9239821434020996, |
|
"rewards/rejected": -0.41711869835853577, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 109.57190452096494, |
|
"learning_rate": 5.8508297910462456e-08, |
|
"logits/chosen": -1.8995733261108398, |
|
"logits/rejected": -1.818933129310608, |
|
"logps/chosen": -414.49176025390625, |
|
"logps/rejected": -402.1959228515625, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.3541712164878845, |
|
"rewards/margins": 0.9701933860778809, |
|
"rewards/rejected": -0.6160220503807068, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 221.2346866193578, |
|
"learning_rate": 5.456683083494731e-08, |
|
"logits/chosen": -1.892313003540039, |
|
"logits/rejected": -1.8525936603546143, |
|
"logps/chosen": -472.8267517089844, |
|
"logps/rejected": -455.94305419921875, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2913890480995178, |
|
"rewards/margins": 0.6712461709976196, |
|
"rewards/rejected": -0.3798571825027466, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 199.8394238496795, |
|
"learning_rate": 5.059649078450834e-08, |
|
"logits/chosen": -1.8888638019561768, |
|
"logits/rejected": -1.8429927825927734, |
|
"logps/chosen": -432.7430725097656, |
|
"logps/rejected": -422.738037109375, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.274366557598114, |
|
"rewards/margins": 0.8101264834403992, |
|
"rewards/rejected": -0.5357599854469299, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 80.39074621948711, |
|
"learning_rate": 4.6622379527277186e-08, |
|
"logits/chosen": -1.8966060876846313, |
|
"logits/rejected": -1.8454921245574951, |
|
"logps/chosen": -394.818115234375, |
|
"logps/rejected": -373.1236877441406, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.21977496147155762, |
|
"rewards/margins": 0.7499412298202515, |
|
"rewards/rejected": -0.5301662683486938, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 161.76520518881122, |
|
"learning_rate": 4.26696226741691e-08, |
|
"logits/chosen": -1.9112564325332642, |
|
"logits/rejected": -1.839714765548706, |
|
"logps/chosen": -430.2705078125, |
|
"logps/rejected": -403.42633056640625, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.09559208154678345, |
|
"rewards/margins": 0.799443244934082, |
|
"rewards/rejected": -0.7038511037826538, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 92.5112436349587, |
|
"learning_rate": 3.876321082668098e-08, |
|
"logits/chosen": -1.9705326557159424, |
|
"logits/rejected": -1.896384596824646, |
|
"logps/chosen": -455.70849609375, |
|
"logps/rejected": -424.35528564453125, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.3973539471626282, |
|
"rewards/margins": 0.953347384929657, |
|
"rewards/rejected": -0.5559934377670288, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 178.08098991416173, |
|
"learning_rate": 3.492784157826244e-08, |
|
"logits/chosen": -1.8988901376724243, |
|
"logits/rejected": -1.8011884689331055, |
|
"logps/chosen": -438.60076904296875, |
|
"logps/rejected": -388.46978759765625, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.43949180841445923, |
|
"rewards/margins": 1.0134050846099854, |
|
"rewards/rejected": -0.5739132165908813, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 118.63444466442957, |
|
"learning_rate": 3.118776336817812e-08, |
|
"logits/chosen": -1.9487316608428955, |
|
"logits/rejected": -1.8821332454681396, |
|
"logps/chosen": -431.9124450683594, |
|
"logps/rejected": -391.89208984375, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3583192229270935, |
|
"rewards/margins": 0.9170917272567749, |
|
"rewards/rejected": -0.5587725639343262, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 105.93330787956464, |
|
"learning_rate": 2.7566622175067443e-08, |
|
"logits/chosen": -1.9368865489959717, |
|
"logits/rejected": -1.8702398538589478, |
|
"logps/chosen": -427.23101806640625, |
|
"logps/rejected": -406.6597595214844, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.41136330366134644, |
|
"rewards/margins": 0.9053149223327637, |
|
"rewards/rejected": -0.4939516484737396, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 94.82101284169875, |
|
"learning_rate": 2.408731201945432e-08, |
|
"logits/chosen": -1.9327596426010132, |
|
"logits/rejected": -1.8846778869628906, |
|
"logps/chosen": -422.32061767578125, |
|
"logps/rejected": -419.91131591796875, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4359460473060608, |
|
"rewards/margins": 0.7473770380020142, |
|
"rewards/rejected": -0.31143102049827576, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 197.8417598175193, |
|
"learning_rate": 2.0771830220378112e-08, |
|
"logits/chosen": -1.8852752447128296, |
|
"logits/rejected": -1.8286488056182861, |
|
"logps/chosen": -450.9109802246094, |
|
"logps/rejected": -427.3114318847656, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.44545871019363403, |
|
"rewards/margins": 0.8026180267333984, |
|
"rewards/rejected": -0.357159286737442, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 115.40932567329016, |
|
"learning_rate": 1.7641138321260257e-08, |
|
"logits/chosen": -1.9217668771743774, |
|
"logits/rejected": -1.8482532501220703, |
|
"logps/chosen": -424.6617736816406, |
|
"logps/rejected": -386.41943359375, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.49162572622299194, |
|
"rewards/margins": 1.0169591903686523, |
|
"rewards/rejected": -0.5253334045410156, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 116.14707152688237, |
|
"learning_rate": 1.4715029564277793e-08, |
|
"logits/chosen": -1.995141625404358, |
|
"logits/rejected": -1.9429680109024048, |
|
"logps/chosen": -437.26324462890625, |
|
"logps/rejected": -408.69403076171875, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6876565217971802, |
|
"rewards/margins": 0.9710708856582642, |
|
"rewards/rejected": -0.283414363861084, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 96.35236055831872, |
|
"learning_rate": 1.2012003751113343e-08, |
|
"logits/chosen": -1.9784055948257446, |
|
"logits/rejected": -1.913835883140564, |
|
"logps/chosen": -427.19921875, |
|
"logps/rejected": -406.6058044433594, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.41034063696861267, |
|
"rewards/margins": 0.9592168927192688, |
|
"rewards/rejected": -0.5488761067390442, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 119.40607594359629, |
|
"learning_rate": 9.549150281252633e-09, |
|
"logits/chosen": -1.9397361278533936, |
|
"logits/rejected": -1.884749174118042, |
|
"logps/chosen": -451.00531005859375, |
|
"logps/rejected": -423.86553955078125, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.5500633120536804, |
|
"rewards/margins": 0.9735897779464722, |
|
"rewards/rejected": -0.4235265851020813, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 110.76475497042553, |
|
"learning_rate": 7.3420401072985306e-09, |
|
"logits/chosen": -1.9737659692764282, |
|
"logits/rejected": -1.9217618703842163, |
|
"logps/chosen": -427.6600646972656, |
|
"logps/rejected": -412.3758850097656, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.44847536087036133, |
|
"rewards/margins": 0.9299023747444153, |
|
"rewards/rejected": -0.4814269542694092, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 95.99625394215789, |
|
"learning_rate": 5.404627290395369e-09, |
|
"logits/chosen": -1.9365133047103882, |
|
"logits/rejected": -1.8697948455810547, |
|
"logps/chosen": -429.69219970703125, |
|
"logps/rejected": -406.6961975097656, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6347268223762512, |
|
"rewards/margins": 0.9203673601150513, |
|
"rewards/rejected": -0.2856404185295105, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 157.20747053789427, |
|
"learning_rate": 3.74916077816162e-09, |
|
"logits/chosen": -1.945406198501587, |
|
"logits/rejected": -1.8826377391815186, |
|
"logps/chosen": -412.59619140625, |
|
"logps/rejected": -386.8226623535156, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.42942380905151367, |
|
"rewards/margins": 0.8297268152236938, |
|
"rewards/rejected": -0.4003029763698578, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 105.21807814382213, |
|
"learning_rate": 2.386106962899165e-09, |
|
"logits/chosen": -1.871506690979004, |
|
"logits/rejected": -1.7978681325912476, |
|
"logps/chosen": -419.5323791503906, |
|
"logps/rejected": -388.26239013671875, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3208548426628113, |
|
"rewards/margins": 0.8715551495552063, |
|
"rewards/rejected": -0.5507001876831055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 114.13246004934948, |
|
"learning_rate": 1.3240835096913706e-09, |
|
"logits/chosen": -1.9058313369750977, |
|
"logits/rejected": -1.8005132675170898, |
|
"logps/chosen": -423.19305419921875, |
|
"logps/rejected": -383.4271240234375, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.6571682095527649, |
|
"rewards/margins": 0.9992189407348633, |
|
"rewards/rejected": -0.3420506417751312, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 88.16218181131795, |
|
"learning_rate": 5.698048727497462e-10, |
|
"logits/chosen": -1.929990530014038, |
|
"logits/rejected": -1.8586419820785522, |
|
"logps/chosen": -402.3602294921875, |
|
"logps/rejected": -381.64605712890625, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.5359801650047302, |
|
"rewards/margins": 1.0312530994415283, |
|
"rewards/rejected": -0.4952728748321533, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 104.24331856340095, |
|
"learning_rate": 1.2803984447259387e-10, |
|
"logits/chosen": -1.9403343200683594, |
|
"logits/rejected": -1.881209135055542, |
|
"logps/chosen": -453.416259765625, |
|
"logps/rejected": -408.7674865722656, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.4977526068687439, |
|
"rewards/margins": 1.0032169818878174, |
|
"rewards/rejected": -0.5054643750190735, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 439, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5309282744935542, |
|
"train_runtime": 6847.998, |
|
"train_samples_per_second": 8.212, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 439, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|