|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987908101571947, |
|
"eval_steps": 10000000, |
|
"global_step": 413, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6401.270603874373, |
|
"learning_rate": 9.523809523809522e-09, |
|
"logits/chosen": -2.7005977630615234, |
|
"logits/rejected": -2.6288318634033203, |
|
"logps/chosen": -1.1158788204193115, |
|
"logps/rejected": -1.1333446502685547, |
|
"loss": 0.7544, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9369.590990783972, |
|
"learning_rate": 9.523809523809523e-08, |
|
"logits/chosen": -2.76228666305542, |
|
"logits/rejected": -2.6970374584198, |
|
"logps/chosen": -0.837486743927002, |
|
"logps/rejected": -0.8182350993156433, |
|
"loss": 0.9695, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.06597563624382019, |
|
"rewards/margins": 0.437710702419281, |
|
"rewards/rejected": -0.3717350959777832, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5966.657402243146, |
|
"learning_rate": 1.9047619047619045e-07, |
|
"logits/chosen": -2.6901049613952637, |
|
"logits/rejected": -2.6502909660339355, |
|
"logps/chosen": -0.9933319091796875, |
|
"logps/rejected": -1.0394352674484253, |
|
"loss": 1.0318, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.794396698474884, |
|
"rewards/margins": 0.7471516728401184, |
|
"rewards/rejected": -1.5415483713150024, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8820.198504372876, |
|
"learning_rate": 2.857142857142857e-07, |
|
"logits/chosen": -2.7333264350891113, |
|
"logits/rejected": -2.6793360710144043, |
|
"logps/chosen": -0.9710652232170105, |
|
"logps/rejected": -0.9799602627754211, |
|
"loss": 1.3198, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.275942325592041, |
|
"rewards/margins": 0.9020620584487915, |
|
"rewards/rejected": -3.178004264831543, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8453.783513094899, |
|
"learning_rate": 3.809523809523809e-07, |
|
"logits/chosen": -2.6771621704101562, |
|
"logits/rejected": -2.6321842670440674, |
|
"logps/chosen": -0.989823043346405, |
|
"logps/rejected": -0.9216930270195007, |
|
"loss": 2.0555, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6586966514587402, |
|
"rewards/margins": 5.100310325622559, |
|
"rewards/rejected": -5.759006500244141, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4192.139232222726, |
|
"learning_rate": 3.995412608484087e-07, |
|
"logits/chosen": -2.743403911590576, |
|
"logits/rejected": -2.6878693103790283, |
|
"logps/chosen": -0.9671042561531067, |
|
"logps/rejected": -0.917597770690918, |
|
"loss": 2.6495, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.16885781288146973, |
|
"rewards/margins": 5.186079978942871, |
|
"rewards/rejected": -5.35493803024292, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5643.860863524967, |
|
"learning_rate": 3.976812391485896e-07, |
|
"logits/chosen": -2.7438769340515137, |
|
"logits/rejected": -2.676765203475952, |
|
"logps/chosen": -0.911353588104248, |
|
"logps/rejected": -0.9122518301010132, |
|
"loss": 3.8047, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.4976494312286377, |
|
"rewards/margins": 5.426072120666504, |
|
"rewards/rejected": -2.928422212600708, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4497.230754903385, |
|
"learning_rate": 3.9440458281608213e-07, |
|
"logits/chosen": -2.740940570831299, |
|
"logits/rejected": -2.7162723541259766, |
|
"logps/chosen": -0.9154363870620728, |
|
"logps/rejected": -0.868497371673584, |
|
"loss": 3.6432, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 5.681364059448242, |
|
"rewards/margins": 8.000432968139648, |
|
"rewards/rejected": -2.319068431854248, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9843.974275847575, |
|
"learning_rate": 3.897347732134074e-07, |
|
"logits/chosen": -2.679215908050537, |
|
"logits/rejected": -2.625516891479492, |
|
"logps/chosen": -0.9146322011947632, |
|
"logps/rejected": -1.0181081295013428, |
|
"loss": 5.767, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -10.08639907836914, |
|
"rewards/margins": 6.582289695739746, |
|
"rewards/rejected": -16.668689727783203, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4773.013380320505, |
|
"learning_rate": 3.8370527539794614e-07, |
|
"logits/chosen": -2.6771388053894043, |
|
"logits/rejected": -2.6291418075561523, |
|
"logps/chosen": -1.003847360610962, |
|
"logps/rejected": -1.0297266244888306, |
|
"loss": 4.6354, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 4.863407611846924, |
|
"rewards/margins": 9.78220272064209, |
|
"rewards/rejected": -4.918795585632324, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3074.8663144850243, |
|
"learning_rate": 3.763592983027255e-07, |
|
"logits/chosen": -2.705735683441162, |
|
"logits/rejected": -2.6605448722839355, |
|
"logps/chosen": -0.9163268804550171, |
|
"logps/rejected": -0.9396775960922241, |
|
"loss": 5.8585, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -12.477940559387207, |
|
"rewards/margins": 7.702305793762207, |
|
"rewards/rejected": -20.180248260498047, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4099.610429119441, |
|
"learning_rate": 3.6774948509008527e-07, |
|
"logits/chosen": -2.714970111846924, |
|
"logits/rejected": -2.6705470085144043, |
|
"logps/chosen": -0.9598251581192017, |
|
"logps/rejected": -0.9319995641708374, |
|
"loss": 5.1529, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 9.19798755645752, |
|
"rewards/margins": 10.779365539550781, |
|
"rewards/rejected": -1.5813770294189453, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4599.711217449366, |
|
"learning_rate": 3.579375358972288e-07, |
|
"logits/chosen": -2.678779125213623, |
|
"logits/rejected": -2.6315762996673584, |
|
"logps/chosen": -0.9081487655639648, |
|
"logps/rejected": -1.0060938596725464, |
|
"loss": 4.0915, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -13.663342475891113, |
|
"rewards/margins": 9.755656242370605, |
|
"rewards/rejected": -23.418994903564453, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4010.334966061441, |
|
"learning_rate": 3.4699376567716156e-07, |
|
"logits/chosen": -2.7230353355407715, |
|
"logits/rejected": -2.684389591217041, |
|
"logps/chosen": -0.8652521967887878, |
|
"logps/rejected": -0.8799147605895996, |
|
"loss": 4.4027, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 2.19469952583313, |
|
"rewards/margins": 15.263641357421875, |
|
"rewards/rejected": -13.068939208984375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5239.11146834966, |
|
"learning_rate": 3.349966003036421e-07, |
|
"logits/chosen": -2.689558506011963, |
|
"logits/rejected": -2.649766445159912, |
|
"logps/chosen": -0.9352903366088867, |
|
"logps/rejected": -0.9416161775588989, |
|
"loss": 4.7953, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.734063148498535, |
|
"rewards/margins": 8.841203689575195, |
|
"rewards/rejected": -13.575268745422363, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5394.35498681908, |
|
"learning_rate": 3.220320145511884e-07, |
|
"logits/chosen": -2.7070841789245605, |
|
"logits/rejected": -2.647737979888916, |
|
"logps/chosen": -0.9441506266593933, |
|
"logps/rejected": -0.9885166883468628, |
|
"loss": 4.2219, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 5.9402689933776855, |
|
"rewards/margins": 12.97706413269043, |
|
"rewards/rejected": -7.036795139312744, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5022.189692479379, |
|
"learning_rate": 3.0819291597771795e-07, |
|
"logits/chosen": -2.7051825523376465, |
|
"logits/rejected": -2.667494297027588, |
|
"logps/chosen": -0.911395251750946, |
|
"logps/rejected": -0.939487099647522, |
|
"loss": 4.7963, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.6114660501480103, |
|
"rewards/margins": 9.443866729736328, |
|
"rewards/rejected": -10.055331230163574, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5428.944545727042, |
|
"learning_rate": 2.9357847912507786e-07, |
|
"logits/chosen": -2.6787288188934326, |
|
"logits/rejected": -2.609421968460083, |
|
"logps/chosen": -0.8976411819458008, |
|
"logps/rejected": -0.8857674598693848, |
|
"loss": 4.6262, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.82297945022583, |
|
"rewards/margins": 10.640687942504883, |
|
"rewards/rejected": -13.463666915893555, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7317.882582449178, |
|
"learning_rate": 2.7829343480875617e-07, |
|
"logits/chosen": -2.6716930866241455, |
|
"logits/rejected": -2.6018152236938477, |
|
"logps/chosen": -0.9342878460884094, |
|
"logps/rejected": -0.9536906480789185, |
|
"loss": 4.5209, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 4.173262596130371, |
|
"rewards/margins": 8.933877944946289, |
|
"rewards/rejected": -4.760615348815918, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 5046.2946182405685, |
|
"learning_rate": 2.624473195899052e-07, |
|
"logits/chosen": -2.737992763519287, |
|
"logits/rejected": -2.7089955806732178, |
|
"logps/chosen": -0.9629039764404297, |
|
"logps/rejected": -1.039236307144165, |
|
"loss": 4.5521, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.3569388389587402, |
|
"rewards/margins": 13.995905876159668, |
|
"rewards/rejected": -17.352848052978516, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4268.8163809344915, |
|
"learning_rate": 2.4615369080815547e-07, |
|
"logits/chosen": -2.6982626914978027, |
|
"logits/rejected": -2.6629488468170166, |
|
"logps/chosen": -0.8523995280265808, |
|
"logps/rejected": -0.9246847033500671, |
|
"loss": 3.8184, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.3979854583740234, |
|
"rewards/margins": 4.996596336364746, |
|
"rewards/rejected": -2.5986106395721436, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4211.242306423206, |
|
"learning_rate": 2.2952931280049625e-07, |
|
"logits/chosen": -2.7346115112304688, |
|
"logits/rejected": -2.6734609603881836, |
|
"logps/chosen": -1.0063531398773193, |
|
"logps/rejected": -0.9570119976997375, |
|
"loss": 4.9954, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 7.86081600189209, |
|
"rewards/margins": 13.075413703918457, |
|
"rewards/rejected": -5.214597225189209, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4906.2448320907815, |
|
"learning_rate": 2.1269332013798747e-07, |
|
"logits/chosen": -2.7431142330169678, |
|
"logits/rejected": -2.7241249084472656, |
|
"logps/chosen": -0.8835189938545227, |
|
"logps/rejected": -0.8670462369918823, |
|
"loss": 4.3795, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 4.582579135894775, |
|
"rewards/margins": 8.150335311889648, |
|
"rewards/rejected": -3.567755937576294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2995.5119741253625, |
|
"learning_rate": 1.9576636387676436e-07, |
|
"logits/chosen": -2.690732955932617, |
|
"logits/rejected": -2.653067111968994, |
|
"logps/chosen": -0.960831344127655, |
|
"logps/rejected": -0.9556485414505005, |
|
"loss": 4.0487, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -8.449748039245605, |
|
"rewards/margins": 10.095115661621094, |
|
"rewards/rejected": -18.544864654541016, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3504.414329050279, |
|
"learning_rate": 1.7886974694151976e-07, |
|
"logits/chosen": -2.7119805812835693, |
|
"logits/rejected": -2.6879172325134277, |
|
"logps/chosen": -0.990290641784668, |
|
"logps/rejected": -0.9934972524642944, |
|
"loss": 4.3644, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5965616106987, |
|
"rewards/margins": 9.211602210998535, |
|
"rewards/rejected": -9.808164596557617, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3400.5848210057216, |
|
"learning_rate": 1.6212455483752895e-07, |
|
"logits/chosen": -2.756906270980835, |
|
"logits/rejected": -2.6796135902404785, |
|
"logps/chosen": -0.8838168978691101, |
|
"logps/rejected": -0.9137406349182129, |
|
"loss": 4.5034, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 6.5281982421875, |
|
"rewards/margins": 8.64702033996582, |
|
"rewards/rejected": -2.1188230514526367, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6194.117841583386, |
|
"learning_rate": 1.4565078792075733e-07, |
|
"logits/chosen": -2.7132773399353027, |
|
"logits/rejected": -2.6494650840759277, |
|
"logps/chosen": -1.002362847328186, |
|
"logps/rejected": -0.9982520341873169, |
|
"loss": 4.8134, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 3.0224878787994385, |
|
"rewards/margins": 16.206506729125977, |
|
"rewards/rejected": -13.1840181350708, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4565.495892627232, |
|
"learning_rate": 1.295665014444281e-07, |
|
"logits/chosen": -2.7381529808044434, |
|
"logits/rejected": -2.6608738899230957, |
|
"logps/chosen": -0.9501218795776367, |
|
"logps/rejected": -0.9476363062858582, |
|
"loss": 5.3754, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.570526123046875, |
|
"rewards/margins": 12.367398262023926, |
|
"rewards/rejected": -12.9379243850708, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 5337.153187944306, |
|
"learning_rate": 1.1398695954469597e-07, |
|
"logits/chosen": -2.6872425079345703, |
|
"logits/rejected": -2.630267381668091, |
|
"logps/chosen": -0.9056104421615601, |
|
"logps/rejected": -0.8939152956008911, |
|
"loss": 4.1053, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 4.294297218322754, |
|
"rewards/margins": 7.472552299499512, |
|
"rewards/rejected": -3.1782548427581787, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3582.07962645892, |
|
"learning_rate": 9.902380922818425e-08, |
|
"logits/chosen": -2.7334370613098145, |
|
"logits/rejected": -2.6919913291931152, |
|
"logps/chosen": -0.9840775728225708, |
|
"logps/rejected": -0.9756690263748169, |
|
"loss": 3.2759, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 8.966680526733398, |
|
"rewards/margins": 11.496904373168945, |
|
"rewards/rejected": -2.5302233695983887, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4767.591882910886, |
|
"learning_rate": 8.478428028080398e-08, |
|
"logits/chosen": -2.7305169105529785, |
|
"logits/rejected": -2.6773815155029297, |
|
"logps/chosen": -0.8988749384880066, |
|
"logps/rejected": -0.9437707662582397, |
|
"loss": 4.3175, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.8898951411247253, |
|
"rewards/margins": 8.447718620300293, |
|
"rewards/rejected": -7.55782413482666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4819.380329592898, |
|
"learning_rate": 7.137041683151202e-08, |
|
"logits/chosen": -2.7228643894195557, |
|
"logits/rejected": -2.6581058502197266, |
|
"logps/chosen": -1.0781683921813965, |
|
"logps/rejected": -1.028840184211731, |
|
"loss": 2.9744, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.4226202964782715, |
|
"rewards/margins": 13.473236083984375, |
|
"rewards/rejected": -15.895855903625488, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7840.551721640683, |
|
"learning_rate": 5.8878346077822135e-08, |
|
"logits/chosen": -2.7280871868133545, |
|
"logits/rejected": -2.649958848953247, |
|
"logps/chosen": -0.9020591974258423, |
|
"logps/rejected": -0.9361578822135925, |
|
"loss": 2.7082, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.223115086555481, |
|
"rewards/margins": 10.582406997680664, |
|
"rewards/rejected": -9.35929012298584, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4662.77535052248, |
|
"learning_rate": 4.73975894135696e-08, |
|
"logits/chosen": -2.6770853996276855, |
|
"logits/rejected": -2.6099040508270264, |
|
"logps/chosen": -0.9263202548027039, |
|
"logps/rejected": -0.9608638882637024, |
|
"loss": 3.1985, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2365754395723343, |
|
"rewards/margins": 13.195585250854492, |
|
"rewards/rejected": -12.959010124206543, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 4550.588002339864, |
|
"learning_rate": 3.701042089556483e-08, |
|
"logits/chosen": -2.756493330001831, |
|
"logits/rejected": -2.687851667404175, |
|
"logps/chosen": -0.8901381492614746, |
|
"logps/rejected": -0.9301478266716003, |
|
"loss": 3.841, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.11963929980993271, |
|
"rewards/margins": 7.3289618492126465, |
|
"rewards/rejected": -7.209322929382324, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5464.471487236709, |
|
"learning_rate": 2.779127764652889e-08, |
|
"logits/chosen": -2.689107656478882, |
|
"logits/rejected": -2.6330015659332275, |
|
"logps/chosen": -0.9756801724433899, |
|
"logps/rejected": -0.9646003842353821, |
|
"loss": 3.6421, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.1974527835845947, |
|
"rewards/margins": 9.013090133666992, |
|
"rewards/rejected": -10.210542678833008, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 5949.708940984834, |
|
"learning_rate": 1.9806226419516193e-08, |
|
"logits/chosen": -2.704460620880127, |
|
"logits/rejected": -2.656071186065674, |
|
"logps/chosen": -0.9623576402664185, |
|
"logps/rejected": -1.0082406997680664, |
|
"loss": 3.5231, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.5273587703704834, |
|
"rewards/margins": 11.88086223602295, |
|
"rewards/rejected": -9.35350227355957, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4320.933402478669, |
|
"learning_rate": 1.3112490146559552e-08, |
|
"logits/chosen": -2.7451281547546387, |
|
"logits/rejected": -2.686728000640869, |
|
"logps/chosen": -0.8951610326766968, |
|
"logps/rejected": -0.89850914478302, |
|
"loss": 3.0053, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.6753175258636475, |
|
"rewards/margins": 12.29626750946045, |
|
"rewards/rejected": -11.620949745178223, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2514.940389992379, |
|
"learning_rate": 7.758037864413247e-09, |
|
"logits/chosen": -2.7158432006835938, |
|
"logits/rejected": -2.6906635761260986, |
|
"logps/chosen": -0.9033122062683105, |
|
"logps/rejected": -0.9709407091140747, |
|
"loss": 2.8751, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.9628832936286926, |
|
"rewards/margins": 10.83133316040039, |
|
"rewards/rejected": -9.868449211120605, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3504.225752431698, |
|
"learning_rate": 3.78124095609087e-09, |
|
"logits/chosen": -2.6947999000549316, |
|
"logits/rejected": -2.6553878784179688, |
|
"logps/chosen": -0.9263744354248047, |
|
"logps/rejected": -0.9935058355331421, |
|
"loss": 3.019, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.4072116613388062, |
|
"rewards/margins": 10.741894721984863, |
|
"rewards/rejected": -9.334683418273926, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 4987.634749508018, |
|
"learning_rate": 1.2105981716597603e-09, |
|
"logits/chosen": -2.7300946712493896, |
|
"logits/rejected": -2.6389007568359375, |
|
"logps/chosen": -0.9686774015426636, |
|
"logps/rejected": -0.9328421354293823, |
|
"loss": 3.7864, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.6465551853179932, |
|
"rewards/margins": 11.809611320495605, |
|
"rewards/rejected": -11.163057327270508, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5473.226219590305, |
|
"learning_rate": 6.453139886395398e-11, |
|
"logits/chosen": -2.7284317016601562, |
|
"logits/rejected": -2.6886637210845947, |
|
"logps/chosen": -0.9334842562675476, |
|
"logps/rejected": -0.9600637555122375, |
|
"loss": 3.6391, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.157397747039795, |
|
"rewards/margins": 13.2835054397583, |
|
"rewards/rejected": -15.440902709960938, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 413, |
|
"total_flos": 0.0, |
|
"train_loss": 3.8421780889894426, |
|
"train_runtime": 6381.4933, |
|
"train_samples_per_second": 8.293, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 413, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|