|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -3.0349411964416504, |
|
"logits/rejected": -2.9776864051818848, |
|
"logps/chosen": -456.54913330078125, |
|
"logps/rejected": -495.31854248046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.916372776031494, |
|
"logits/rejected": -2.8596677780151367, |
|
"logps/chosen": -410.91595458984375, |
|
"logps/rejected": -357.0899353027344, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": -0.00023399748897645622, |
|
"rewards/margins": -0.0003936050634365529, |
|
"rewards/rejected": 0.0001596075453562662, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.9189207553863525, |
|
"logits/rejected": -2.8314878940582275, |
|
"logps/chosen": -452.43377685546875, |
|
"logps/rejected": -340.9873046875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0003274443151894957, |
|
"rewards/margins": -0.0004972027963958681, |
|
"rewards/rejected": 0.00016975855396594852, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.922006607055664, |
|
"logits/rejected": -2.8864428997039795, |
|
"logps/chosen": -397.01446533203125, |
|
"logps/rejected": -380.24017333984375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00024825072614476085, |
|
"rewards/margins": -0.00015650910791009665, |
|
"rewards/rejected": 0.00040475986315868795, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.840169906616211, |
|
"logits/rejected": -2.8260841369628906, |
|
"logps/chosen": -335.4841613769531, |
|
"logps/rejected": -324.1778259277344, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0013349488144740462, |
|
"rewards/margins": 0.0006949803791940212, |
|
"rewards/rejected": 0.000639968435280025, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.875, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.9089934825897217, |
|
"logits/rejected": -2.8778884410858154, |
|
"logps/chosen": -403.4610900878906, |
|
"logps/rejected": -344.0279846191406, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.001873858505859971, |
|
"rewards/margins": 0.00040313409408554435, |
|
"rewards/rejected": 0.0014707243535667658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.842329502105713, |
|
"logits/rejected": -2.808168888092041, |
|
"logps/chosen": -382.2471923828125, |
|
"logps/rejected": -331.0003662109375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0033431951887905598, |
|
"rewards/margins": 0.00034264856367371976, |
|
"rewards/rejected": 0.0030005467124283314, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.939213991165161, |
|
"logits/rejected": -2.8729026317596436, |
|
"logps/chosen": -406.10760498046875, |
|
"logps/rejected": -353.65576171875, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003925986122339964, |
|
"rewards/margins": -0.0006284945411607623, |
|
"rewards/rejected": 0.004554481245577335, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.8512609004974365, |
|
"logits/rejected": -2.8039116859436035, |
|
"logps/chosen": -399.90130615234375, |
|
"logps/rejected": -359.86932373046875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.005817199591547251, |
|
"rewards/margins": 0.0015620887279510498, |
|
"rewards/rejected": 0.004255110863596201, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.8811564445495605, |
|
"logits/rejected": -2.8479971885681152, |
|
"logps/chosen": -381.4345703125, |
|
"logps/rejected": -349.1830139160156, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.009745048359036446, |
|
"rewards/margins": 0.0022289410699158907, |
|
"rewards/rejected": 0.007516107521951199, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.8783085346221924, |
|
"logits/rejected": -2.8482470512390137, |
|
"logps/chosen": -379.60736083984375, |
|
"logps/rejected": -338.56134033203125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.01279132068157196, |
|
"rewards/margins": 0.002047107554972172, |
|
"rewards/rejected": 0.010744214989244938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.836320638656616, |
|
"eval_logits/rejected": -2.784111261367798, |
|
"eval_logps/chosen": -394.6286315917969, |
|
"eval_logps/rejected": -350.2682800292969, |
|
"eval_loss": 0.6922685503959656, |
|
"eval_rewards/accuracies": 0.5644999742507935, |
|
"eval_rewards/chosen": 0.015961581841111183, |
|
"eval_rewards/margins": 0.0018089638324454427, |
|
"eval_rewards/rejected": 0.014152619056403637, |
|
"eval_runtime": 347.8719, |
|
"eval_samples_per_second": 5.749, |
|
"eval_steps_per_second": 0.719, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.9356420040130615, |
|
"logits/rejected": -2.863084554672241, |
|
"logps/chosen": -409.4505310058594, |
|
"logps/rejected": -340.94085693359375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.015902375802397728, |
|
"rewards/margins": 0.0012320507084950805, |
|
"rewards/rejected": 0.014670324511826038, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.9147744178771973, |
|
"logits/rejected": -2.8579554557800293, |
|
"logps/chosen": -428.4613342285156, |
|
"logps/rejected": -384.51239013671875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.019384171813726425, |
|
"rewards/margins": 0.004034861922264099, |
|
"rewards/rejected": 0.0153493108227849, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.9081578254699707, |
|
"logits/rejected": -2.849456787109375, |
|
"logps/chosen": -383.9152526855469, |
|
"logps/rejected": -361.4400329589844, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.014263955876231194, |
|
"rewards/margins": 0.0018312319880351424, |
|
"rewards/rejected": 0.012432724237442017, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.9242119789123535, |
|
"logits/rejected": -2.800161600112915, |
|
"logps/chosen": -417.3861389160156, |
|
"logps/rejected": -330.3959045410156, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.016321910545229912, |
|
"rewards/margins": 0.0035601272247731686, |
|
"rewards/rejected": 0.01276178378611803, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.943554639816284, |
|
"logits/rejected": -2.8792669773101807, |
|
"logps/chosen": -413.8592224121094, |
|
"logps/rejected": -343.1801452636719, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.018247485160827637, |
|
"rewards/margins": 0.0042217145673930645, |
|
"rewards/rejected": 0.014025771990418434, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.8945369720458984, |
|
"logits/rejected": -2.8573861122131348, |
|
"logps/chosen": -378.51092529296875, |
|
"logps/rejected": -363.04132080078125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.019316475838422775, |
|
"rewards/margins": 0.004308086819946766, |
|
"rewards/rejected": 0.01500838715583086, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.8808963298797607, |
|
"logits/rejected": -2.8329708576202393, |
|
"logps/chosen": -337.3949279785156, |
|
"logps/rejected": -312.98773193359375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.022478580474853516, |
|
"rewards/margins": 0.006769159343093634, |
|
"rewards/rejected": 0.015709420666098595, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.75, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.8937134742736816, |
|
"logits/rejected": -2.837139129638672, |
|
"logps/chosen": -379.23760986328125, |
|
"logps/rejected": -340.9472351074219, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.02863329090178013, |
|
"rewards/margins": 0.006192624568939209, |
|
"rewards/rejected": 0.02244066260755062, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.944746494293213, |
|
"logits/rejected": -2.892685651779175, |
|
"logps/chosen": -402.75653076171875, |
|
"logps/rejected": -342.20172119140625, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.03445928543806076, |
|
"rewards/margins": 0.003415898187085986, |
|
"rewards/rejected": 0.031043391674757004, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.8978257179260254, |
|
"logits/rejected": -2.837639570236206, |
|
"logps/chosen": -374.3539733886719, |
|
"logps/rejected": -313.32257080078125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.040689874440431595, |
|
"rewards/margins": 0.008053514175117016, |
|
"rewards/rejected": 0.032636359333992004, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.8333005905151367, |
|
"eval_logits/rejected": -2.781090021133423, |
|
"eval_logps/chosen": -391.8948669433594, |
|
"eval_logps/rejected": -348.1495361328125, |
|
"eval_loss": 0.6894406080245972, |
|
"eval_rewards/accuracies": 0.5920000076293945, |
|
"eval_rewards/chosen": 0.04329930990934372, |
|
"eval_rewards/margins": 0.007959411479532719, |
|
"eval_rewards/rejected": 0.03533989191055298, |
|
"eval_runtime": 347.8469, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.719, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.9700427055358887, |
|
"logits/rejected": -2.8941903114318848, |
|
"logps/chosen": -390.8605041503906, |
|
"logps/rejected": -332.96624755859375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.04209558665752411, |
|
"rewards/margins": 0.0070165605284273624, |
|
"rewards/rejected": 0.035079024732112885, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.862724542617798, |
|
"logits/rejected": -2.8397350311279297, |
|
"logps/chosen": -380.3348083496094, |
|
"logps/rejected": -332.48809814453125, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04277118295431137, |
|
"rewards/margins": 0.007822849787771702, |
|
"rewards/rejected": 0.034948334097862244, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.980745792388916, |
|
"logits/rejected": -2.915937900543213, |
|
"logps/chosen": -444.210205078125, |
|
"logps/rejected": -392.0636291503906, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04879799112677574, |
|
"rewards/margins": 0.009785487316548824, |
|
"rewards/rejected": 0.039012499153614044, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.897602081298828, |
|
"logits/rejected": -2.86655592918396, |
|
"logps/chosen": -422.80157470703125, |
|
"logps/rejected": -391.47808837890625, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05084707587957382, |
|
"rewards/margins": 0.014665389433503151, |
|
"rewards/rejected": 0.03618168458342552, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.885866165161133, |
|
"logits/rejected": -2.8721957206726074, |
|
"logps/chosen": -390.76702880859375, |
|
"logps/rejected": -342.5933837890625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.05445907637476921, |
|
"rewards/margins": 0.009773282334208488, |
|
"rewards/rejected": 0.044685788452625275, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.0, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.9248204231262207, |
|
"logits/rejected": -2.853421449661255, |
|
"logps/chosen": -404.9168701171875, |
|
"logps/rejected": -348.4195861816406, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0569482147693634, |
|
"rewards/margins": 0.0154123455286026, |
|
"rewards/rejected": 0.041535865515470505, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.888120174407959, |
|
"logits/rejected": -2.8404757976531982, |
|
"logps/chosen": -384.75872802734375, |
|
"logps/rejected": -330.0838928222656, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05628432705998421, |
|
"rewards/margins": 0.016307855024933815, |
|
"rewards/rejected": 0.039976466447114944, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.875, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.906266212463379, |
|
"logits/rejected": -2.832149028778076, |
|
"logps/chosen": -387.818115234375, |
|
"logps/rejected": -337.62017822265625, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.060541294515132904, |
|
"rewards/margins": 0.013487743213772774, |
|
"rewards/rejected": 0.04705354943871498, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.888960361480713, |
|
"logits/rejected": -2.847351551055908, |
|
"logps/chosen": -411.20904541015625, |
|
"logps/rejected": -362.61474609375, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07661953568458557, |
|
"rewards/margins": 0.022713415324687958, |
|
"rewards/rejected": 0.053906120359897614, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.868332862854004, |
|
"logits/rejected": -2.7791736125946045, |
|
"logps/chosen": -369.7691345214844, |
|
"logps/rejected": -329.0905456542969, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07754194736480713, |
|
"rewards/margins": 0.024631675332784653, |
|
"rewards/rejected": 0.05291026830673218, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.8349289894104004, |
|
"eval_logits/rejected": -2.783784866333008, |
|
"eval_logps/chosen": -388.1691589355469, |
|
"eval_logps/rejected": -345.5898132324219, |
|
"eval_loss": 0.6844429969787598, |
|
"eval_rewards/accuracies": 0.6025000214576721, |
|
"eval_rewards/chosen": 0.08055612444877625, |
|
"eval_rewards/margins": 0.019618848338723183, |
|
"eval_rewards/rejected": 0.06093727424740791, |
|
"eval_runtime": 347.9802, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.718, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.125, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.9738924503326416, |
|
"logits/rejected": -2.891376495361328, |
|
"logps/chosen": -419.47601318359375, |
|
"logps/rejected": -330.5395202636719, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.08353392034769058, |
|
"rewards/margins": 0.01769311912357807, |
|
"rewards/rejected": 0.06584079563617706, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.9381539821624756, |
|
"logits/rejected": -2.868596076965332, |
|
"logps/chosen": -382.90325927734375, |
|
"logps/rejected": -343.45361328125, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.07833738625049591, |
|
"rewards/margins": 0.018442410975694656, |
|
"rewards/rejected": 0.059894971549510956, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.8898563385009766, |
|
"logits/rejected": -2.863262176513672, |
|
"logps/chosen": -379.0876159667969, |
|
"logps/rejected": -343.98748779296875, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.11513115465641022, |
|
"rewards/margins": 0.03892900422215462, |
|
"rewards/rejected": 0.0762021541595459, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.924078941345215, |
|
"logits/rejected": -2.884129762649536, |
|
"logps/chosen": -412.20428466796875, |
|
"logps/rejected": -369.37615966796875, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.10442419350147247, |
|
"rewards/margins": 0.021783817559480667, |
|
"rewards/rejected": 0.0826403871178627, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -2.8822202682495117, |
|
"logits/rejected": -2.832418441772461, |
|
"logps/chosen": -419.82122802734375, |
|
"logps/rejected": -383.4366455078125, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.1065782904624939, |
|
"rewards/margins": 0.02887110412120819, |
|
"rewards/rejected": 0.0777071863412857, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -2.9021358489990234, |
|
"logits/rejected": -2.8665103912353516, |
|
"logps/chosen": -353.05645751953125, |
|
"logps/rejected": -322.67803955078125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.07063518464565277, |
|
"rewards/margins": 0.0060135251842439175, |
|
"rewards/rejected": 0.06462165713310242, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -2.914665699005127, |
|
"logits/rejected": -2.8427698612213135, |
|
"logps/chosen": -405.73809814453125, |
|
"logps/rejected": -331.1866149902344, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.09655407071113586, |
|
"rewards/margins": 0.05310269445180893, |
|
"rewards/rejected": 0.043451376259326935, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -2.900359869003296, |
|
"logits/rejected": -2.81068754196167, |
|
"logps/chosen": -427.6316833496094, |
|
"logps/rejected": -363.28839111328125, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.10045112669467926, |
|
"rewards/margins": 0.053518980741500854, |
|
"rewards/rejected": 0.046932149678468704, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -2.8918914794921875, |
|
"logits/rejected": -2.845247745513916, |
|
"logps/chosen": -412.29071044921875, |
|
"logps/rejected": -371.7367248535156, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.07834922522306442, |
|
"rewards/margins": 0.031100135296583176, |
|
"rewards/rejected": 0.04724908620119095, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -2.9121220111846924, |
|
"logits/rejected": -2.850386619567871, |
|
"logps/chosen": -381.02825927734375, |
|
"logps/rejected": -322.06927490234375, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.08469127863645554, |
|
"rewards/margins": 0.020099209621548653, |
|
"rewards/rejected": 0.06459207087755203, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.842348098754883, |
|
"eval_logits/rejected": -2.7931265830993652, |
|
"eval_logps/chosen": -390.1521911621094, |
|
"eval_logps/rejected": -348.9979248046875, |
|
"eval_loss": 0.6788274645805359, |
|
"eval_rewards/accuracies": 0.612500011920929, |
|
"eval_rewards/chosen": 0.0607261136174202, |
|
"eval_rewards/margins": 0.0338701568543911, |
|
"eval_rewards/rejected": 0.026855960488319397, |
|
"eval_runtime": 347.7786, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -2.8766117095947266, |
|
"logits/rejected": -2.8555123805999756, |
|
"logps/chosen": -361.97406005859375, |
|
"logps/rejected": -350.51287841796875, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05293396860361099, |
|
"rewards/margins": 0.0049603343941271305, |
|
"rewards/rejected": 0.0479736328125, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -2.8863377571105957, |
|
"logits/rejected": -2.8117470741271973, |
|
"logps/chosen": -351.9708251953125, |
|
"logps/rejected": -313.9596252441406, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.033623430877923965, |
|
"rewards/margins": 0.01676887646317482, |
|
"rewards/rejected": 0.016854556277394295, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -2.8675060272216797, |
|
"logits/rejected": -2.8136143684387207, |
|
"logps/chosen": -408.8647155761719, |
|
"logps/rejected": -369.70257568359375, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.041368208825588226, |
|
"rewards/margins": 0.02153196558356285, |
|
"rewards/rejected": 0.019836245104670525, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -2.9551219940185547, |
|
"logits/rejected": -2.9198532104492188, |
|
"logps/chosen": -399.39410400390625, |
|
"logps/rejected": -351.37982177734375, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03481561690568924, |
|
"rewards/margins": 0.02588939666748047, |
|
"rewards/rejected": 0.00892622210085392, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -2.879997968673706, |
|
"logits/rejected": -2.857247829437256, |
|
"logps/chosen": -394.7357482910156, |
|
"logps/rejected": -354.9325256347656, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04896073415875435, |
|
"rewards/margins": 0.04451023414731026, |
|
"rewards/rejected": 0.004450496751815081, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -2.89152455329895, |
|
"logits/rejected": -2.8554794788360596, |
|
"logps/chosen": -394.02325439453125, |
|
"logps/rejected": -363.3119201660156, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.05705699324607849, |
|
"rewards/margins": 0.047898683696985245, |
|
"rewards/rejected": 0.009158318862318993, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -2.8964645862579346, |
|
"logits/rejected": -2.844935417175293, |
|
"logps/chosen": -369.896484375, |
|
"logps/rejected": -337.79278564453125, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05254621058702469, |
|
"rewards/margins": 0.04723441228270531, |
|
"rewards/rejected": 0.005311795976012945, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -2.928675413131714, |
|
"logits/rejected": -2.873516798019409, |
|
"logps/chosen": -383.92852783203125, |
|
"logps/rejected": -358.11566162109375, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.039998866617679596, |
|
"rewards/margins": 0.04847729206085205, |
|
"rewards/rejected": -0.008478422649204731, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -2.9350996017456055, |
|
"logits/rejected": -2.8882603645324707, |
|
"logps/chosen": -420.064697265625, |
|
"logps/rejected": -359.68072509765625, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.030888009816408157, |
|
"rewards/margins": 0.04867198318243027, |
|
"rewards/rejected": -0.01778397336602211, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -2.916215658187866, |
|
"logits/rejected": -2.8541104793548584, |
|
"logps/chosen": -411.7395935058594, |
|
"logps/rejected": -343.6163330078125, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.018574411049485207, |
|
"rewards/margins": 0.047007013112306595, |
|
"rewards/rejected": -0.028432602062821388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -2.8370718955993652, |
|
"eval_logits/rejected": -2.7888970375061035, |
|
"eval_logps/chosen": -393.79833984375, |
|
"eval_logps/rejected": -354.1763610839844, |
|
"eval_loss": 0.6723790168762207, |
|
"eval_rewards/accuracies": 0.6209999918937683, |
|
"eval_rewards/chosen": 0.02426437847316265, |
|
"eval_rewards/margins": 0.04919267073273659, |
|
"eval_rewards/rejected": -0.024928290396928787, |
|
"eval_runtime": 347.9, |
|
"eval_samples_per_second": 5.749, |
|
"eval_steps_per_second": 0.719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -2.90047025680542, |
|
"logits/rejected": -2.8580737113952637, |
|
"logps/chosen": -407.69781494140625, |
|
"logps/rejected": -361.30267333984375, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.039911769330501556, |
|
"rewards/margins": 0.046458516269922256, |
|
"rewards/rejected": -0.0065467446111142635, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -2.914069414138794, |
|
"logits/rejected": -2.8952417373657227, |
|
"logps/chosen": -400.2842712402344, |
|
"logps/rejected": -370.97039794921875, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.015612982213497162, |
|
"rewards/margins": 0.06524328887462616, |
|
"rewards/rejected": -0.0496302992105484, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -2.8638806343078613, |
|
"logits/rejected": -2.8652124404907227, |
|
"logps/chosen": -400.7289123535156, |
|
"logps/rejected": -399.452880859375, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.001105122035369277, |
|
"rewards/margins": 0.03718414530158043, |
|
"rewards/rejected": -0.03828927129507065, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.125, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -2.869264841079712, |
|
"logits/rejected": -2.8013787269592285, |
|
"logps/chosen": -414.38800048828125, |
|
"logps/rejected": -343.013671875, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.005649521015584469, |
|
"rewards/margins": 0.10599911212921143, |
|
"rewards/rejected": -0.11164864152669907, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -2.867119073867798, |
|
"logits/rejected": -2.8448452949523926, |
|
"logps/chosen": -406.0426940917969, |
|
"logps/rejected": -376.4588623046875, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004353336989879608, |
|
"rewards/margins": 0.07087867707014084, |
|
"rewards/rejected": -0.06652534753084183, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -2.9276793003082275, |
|
"logits/rejected": -2.8994839191436768, |
|
"logps/chosen": -393.2682189941406, |
|
"logps/rejected": -381.3654479980469, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.01845262572169304, |
|
"rewards/margins": 0.057842254638671875, |
|
"rewards/rejected": -0.07629488408565521, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -2.9889473915100098, |
|
"logits/rejected": -2.9415230751037598, |
|
"logps/chosen": -441.64227294921875, |
|
"logps/rejected": -392.90789794921875, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.01707800105214119, |
|
"rewards/margins": 0.04300212487578392, |
|
"rewards/rejected": -0.06008012965321541, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -2.9623188972473145, |
|
"logits/rejected": -2.8731496334075928, |
|
"logps/chosen": -446.22662353515625, |
|
"logps/rejected": -381.77752685546875, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03769092634320259, |
|
"rewards/margins": 0.09427281469106674, |
|
"rewards/rejected": -0.13196374475955963, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -2.857860565185547, |
|
"logits/rejected": -2.8171629905700684, |
|
"logps/chosen": -391.1763916015625, |
|
"logps/rejected": -357.4033203125, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.09089629352092743, |
|
"rewards/margins": 0.07482485473155975, |
|
"rewards/rejected": -0.165721133351326, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -2.895017147064209, |
|
"logits/rejected": -2.8843705654144287, |
|
"logps/chosen": -395.64947509765625, |
|
"logps/rejected": -366.892822265625, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04552796110510826, |
|
"rewards/margins": 0.06777463853359222, |
|
"rewards/rejected": -0.11330260336399078, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.817946672439575, |
|
"eval_logits/rejected": -2.7708613872528076, |
|
"eval_logps/chosen": -401.88262939453125, |
|
"eval_logps/rejected": -365.14019775390625, |
|
"eval_loss": 0.6624515652656555, |
|
"eval_rewards/accuracies": 0.6265000104904175, |
|
"eval_rewards/chosen": -0.05657815560698509, |
|
"eval_rewards/margins": 0.07798823714256287, |
|
"eval_rewards/rejected": -0.13456639647483826, |
|
"eval_runtime": 348.0683, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.718, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.375, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -2.8852925300598145, |
|
"logits/rejected": -2.841823101043701, |
|
"logps/chosen": -383.7059020996094, |
|
"logps/rejected": -347.17669677734375, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.05533725023269653, |
|
"rewards/margins": 0.0660722628235817, |
|
"rewards/rejected": -0.12140952050685883, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -2.8929924964904785, |
|
"logits/rejected": -2.801697254180908, |
|
"logps/chosen": -444.3966369628906, |
|
"logps/rejected": -354.19598388671875, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.15692010521888733, |
|
"rewards/margins": 0.11467301845550537, |
|
"rewards/rejected": -0.2715931236743927, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -2.893341541290283, |
|
"logits/rejected": -2.8479011058807373, |
|
"logps/chosen": -392.2441711425781, |
|
"logps/rejected": -361.94268798828125, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.15643954277038574, |
|
"rewards/margins": 0.12407402694225311, |
|
"rewards/rejected": -0.28051358461380005, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -2.8857052326202393, |
|
"logits/rejected": -2.811861515045166, |
|
"logps/chosen": -418.484619140625, |
|
"logps/rejected": -368.8498840332031, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.21659335494041443, |
|
"rewards/margins": 0.10539694130420685, |
|
"rewards/rejected": -0.3219902813434601, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -2.8835432529449463, |
|
"logits/rejected": -2.842060089111328, |
|
"logps/chosen": -399.4352722167969, |
|
"logps/rejected": -387.4463806152344, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.17046542465686798, |
|
"rewards/margins": 0.11528462171554565, |
|
"rewards/rejected": -0.28575003147125244, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.625, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -2.8580405712127686, |
|
"logits/rejected": -2.7751071453094482, |
|
"logps/chosen": -410.6004943847656, |
|
"logps/rejected": -364.0141296386719, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.15695621073246002, |
|
"rewards/margins": 0.1257764846086502, |
|
"rewards/rejected": -0.28273266553878784, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -2.93772292137146, |
|
"logits/rejected": -2.882884979248047, |
|
"logps/chosen": -430.3829650878906, |
|
"logps/rejected": -389.68328857421875, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.19247478246688843, |
|
"rewards/margins": 0.12777897715568542, |
|
"rewards/rejected": -0.32025375962257385, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -2.8791463375091553, |
|
"logits/rejected": -2.821669101715088, |
|
"logps/chosen": -436.94708251953125, |
|
"logps/rejected": -379.05267333984375, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30955275893211365, |
|
"rewards/margins": 0.13841886818408966, |
|
"rewards/rejected": -0.4479715824127197, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -2.887767791748047, |
|
"logits/rejected": -2.8599493503570557, |
|
"logps/chosen": -401.97064208984375, |
|
"logps/rejected": -403.107421875, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3961626887321472, |
|
"rewards/margins": 0.1000264510512352, |
|
"rewards/rejected": -0.4961891770362854, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -2.8757412433624268, |
|
"logits/rejected": -2.86216139793396, |
|
"logps/chosen": -415.5210876464844, |
|
"logps/rejected": -404.1314697265625, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2919083535671234, |
|
"rewards/margins": 0.1432960480451584, |
|
"rewards/rejected": -0.435204416513443, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -2.8051064014434814, |
|
"eval_logits/rejected": -2.7595512866973877, |
|
"eval_logps/chosen": -421.9038391113281, |
|
"eval_logps/rejected": -388.2210998535156, |
|
"eval_loss": 0.6554521918296814, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": -0.256790429353714, |
|
"eval_rewards/margins": 0.10858490318059921, |
|
"eval_rewards/rejected": -0.3653753399848938, |
|
"eval_runtime": 348.1124, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.718, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.625, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -2.8942840099334717, |
|
"logits/rejected": -2.857219934463501, |
|
"logps/chosen": -418.62322998046875, |
|
"logps/rejected": -394.4143981933594, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1779194325208664, |
|
"rewards/margins": 0.0996006429195404, |
|
"rewards/rejected": -0.2775201201438904, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -2.8712573051452637, |
|
"logits/rejected": -2.8021373748779297, |
|
"logps/chosen": -414.42999267578125, |
|
"logps/rejected": -377.20977783203125, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1302914321422577, |
|
"rewards/margins": 0.09119856357574463, |
|
"rewards/rejected": -0.22148998081684113, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -2.872545003890991, |
|
"logits/rejected": -2.815669059753418, |
|
"logps/chosen": -410.472412109375, |
|
"logps/rejected": -358.6063232421875, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.07536014169454575, |
|
"rewards/margins": 0.11603609472513199, |
|
"rewards/rejected": -0.19139623641967773, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -2.844766139984131, |
|
"logits/rejected": -2.786101818084717, |
|
"logps/chosen": -370.5934143066406, |
|
"logps/rejected": -329.69647216796875, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.21907174587249756, |
|
"rewards/margins": 0.19244422018527985, |
|
"rewards/rejected": -0.41151589155197144, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.5, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -2.8021976947784424, |
|
"logits/rejected": -2.775529384613037, |
|
"logps/chosen": -399.1451110839844, |
|
"logps/rejected": -384.2425842285156, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.397163063287735, |
|
"rewards/margins": 0.11699549108743668, |
|
"rewards/rejected": -0.5141586065292358, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -2.816556453704834, |
|
"logits/rejected": -2.7907676696777344, |
|
"logps/chosen": -448.0435485839844, |
|
"logps/rejected": -432.1190490722656, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4579244554042816, |
|
"rewards/margins": 0.1237117201089859, |
|
"rewards/rejected": -0.5816361904144287, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -2.839500665664673, |
|
"logits/rejected": -2.799598455429077, |
|
"logps/chosen": -418.523193359375, |
|
"logps/rejected": -380.72637939453125, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3512226939201355, |
|
"rewards/margins": 0.12142340838909149, |
|
"rewards/rejected": -0.4726460874080658, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -2.8736701011657715, |
|
"logits/rejected": -2.815397262573242, |
|
"logps/chosen": -448.5498046875, |
|
"logps/rejected": -413.5223693847656, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3000739514827728, |
|
"rewards/margins": 0.14568910002708435, |
|
"rewards/rejected": -0.4457630515098572, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -2.8422582149505615, |
|
"logits/rejected": -2.8511698246002197, |
|
"logps/chosen": -416.14312744140625, |
|
"logps/rejected": -407.68487548828125, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.430819571018219, |
|
"rewards/margins": 0.0904776006937027, |
|
"rewards/rejected": -0.5212971568107605, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -2.9085071086883545, |
|
"logits/rejected": -2.8099634647369385, |
|
"logps/chosen": -441.54296875, |
|
"logps/rejected": -363.7981262207031, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.40520063042640686, |
|
"rewards/margins": 0.19839458167552948, |
|
"rewards/rejected": -0.6035951972007751, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.7961480617523193, |
|
"eval_logits/rejected": -2.7522692680358887, |
|
"eval_logps/chosen": -435.57562255859375, |
|
"eval_logps/rejected": -403.91156005859375, |
|
"eval_loss": 0.6488239765167236, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.39350807666778564, |
|
"eval_rewards/margins": 0.12877221405506134, |
|
"eval_rewards/rejected": -0.5222803354263306, |
|
"eval_runtime": 347.7457, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -2.8743693828582764, |
|
"logits/rejected": -2.890942096710205, |
|
"logps/chosen": -443.807373046875, |
|
"logps/rejected": -438.0557556152344, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4286819100379944, |
|
"rewards/margins": 0.10433633625507355, |
|
"rewards/rejected": -0.5330182313919067, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -2.844068765640259, |
|
"logits/rejected": -2.8668532371520996, |
|
"logps/chosen": -392.5022277832031, |
|
"logps/rejected": -408.68621826171875, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4813667833805084, |
|
"rewards/margins": 0.12066853046417236, |
|
"rewards/rejected": -0.6020352244377136, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.375, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -2.8786587715148926, |
|
"logits/rejected": -2.8575031757354736, |
|
"logps/chosen": -460.0040588378906, |
|
"logps/rejected": -449.45977783203125, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4967479705810547, |
|
"rewards/margins": 0.12318827956914902, |
|
"rewards/rejected": -0.6199362874031067, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -2.8527016639709473, |
|
"logits/rejected": -2.792952060699463, |
|
"logps/chosen": -423.39801025390625, |
|
"logps/rejected": -387.58355712890625, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.43838852643966675, |
|
"rewards/margins": 0.12122461944818497, |
|
"rewards/rejected": -0.5596131086349487, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -2.8446707725524902, |
|
"logits/rejected": -2.8177928924560547, |
|
"logps/chosen": -430.2727966308594, |
|
"logps/rejected": -414.4742736816406, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.44889992475509644, |
|
"rewards/margins": 0.10808303207159042, |
|
"rewards/rejected": -0.5569829940795898, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -2.8193821907043457, |
|
"logits/rejected": -2.782836675643921, |
|
"logps/chosen": -438.9239196777344, |
|
"logps/rejected": -410.18048095703125, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.36728745698928833, |
|
"rewards/margins": 0.1387586146593094, |
|
"rewards/rejected": -0.5060460567474365, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -2.7876265048980713, |
|
"logits/rejected": -2.7442731857299805, |
|
"logps/chosen": -412.59539794921875, |
|
"logps/rejected": -410.63653564453125, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.2634727656841278, |
|
"rewards/margins": 0.13101360201835632, |
|
"rewards/rejected": -0.39448636770248413, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -2.830331802368164, |
|
"logits/rejected": -2.815990924835205, |
|
"logps/chosen": -431.3417053222656, |
|
"logps/rejected": -406.23236083984375, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.308138370513916, |
|
"rewards/margins": 0.13623470067977905, |
|
"rewards/rejected": -0.44437307119369507, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -2.8040425777435303, |
|
"logits/rejected": -2.8051490783691406, |
|
"logps/chosen": -414.428466796875, |
|
"logps/rejected": -427.8641052246094, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3916015923023224, |
|
"rewards/margins": 0.15443366765975952, |
|
"rewards/rejected": -0.5460351705551147, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -2.7597362995147705, |
|
"logits/rejected": -2.7200279235839844, |
|
"logps/chosen": -403.2427673339844, |
|
"logps/rejected": -386.6829528808594, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.49025583267211914, |
|
"rewards/margins": 0.1762937605381012, |
|
"rewards/rejected": -0.666549563407898, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -2.7764461040496826, |
|
"eval_logits/rejected": -2.7324697971343994, |
|
"eval_logps/chosen": -441.37982177734375, |
|
"eval_logps/rejected": -412.10833740234375, |
|
"eval_loss": 0.6458185911178589, |
|
"eval_rewards/accuracies": 0.6380000114440918, |
|
"eval_rewards/chosen": -0.4515506625175476, |
|
"eval_rewards/margins": 0.1526976376771927, |
|
"eval_rewards/rejected": -0.6042482256889343, |
|
"eval_runtime": 347.8684, |
|
"eval_samples_per_second": 5.749, |
|
"eval_steps_per_second": 0.719, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -2.8672194480895996, |
|
"logits/rejected": -2.7797975540161133, |
|
"logps/chosen": -462.484375, |
|
"logps/rejected": -388.0072326660156, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3822649419307709, |
|
"rewards/margins": 0.13505886495113373, |
|
"rewards/rejected": -0.5173237919807434, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.0, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -2.7902486324310303, |
|
"logits/rejected": -2.733579158782959, |
|
"logps/chosen": -440.94390869140625, |
|
"logps/rejected": -411.362060546875, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4113929867744446, |
|
"rewards/margins": 0.18324372172355652, |
|
"rewards/rejected": -0.5946367383003235, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -2.855236768722534, |
|
"logits/rejected": -2.8222384452819824, |
|
"logps/chosen": -445.28094482421875, |
|
"logps/rejected": -420.7135314941406, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.42793193459510803, |
|
"rewards/margins": 0.2128107249736786, |
|
"rewards/rejected": -0.6407425999641418, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.5, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -2.787015676498413, |
|
"logits/rejected": -2.7473626136779785, |
|
"logps/chosen": -461.5128479003906, |
|
"logps/rejected": -439.97930908203125, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.39246460795402527, |
|
"rewards/margins": 0.2291673719882965, |
|
"rewards/rejected": -0.6216319799423218, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.25, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -2.8133838176727295, |
|
"logits/rejected": -2.7668540477752686, |
|
"logps/chosen": -431.5006408691406, |
|
"logps/rejected": -404.2099609375, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.518032431602478, |
|
"rewards/margins": 0.17736072838306427, |
|
"rewards/rejected": -0.6953932046890259, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -2.8185486793518066, |
|
"logits/rejected": -2.793464183807373, |
|
"logps/chosen": -465.65814208984375, |
|
"logps/rejected": -436.23931884765625, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6132515668869019, |
|
"rewards/margins": 0.16154471039772034, |
|
"rewards/rejected": -0.7747962474822998, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -2.827209234237671, |
|
"logits/rejected": -2.7537784576416016, |
|
"logps/chosen": -473.32196044921875, |
|
"logps/rejected": -427.2911071777344, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6051194071769714, |
|
"rewards/margins": 0.27736273407936096, |
|
"rewards/rejected": -0.8824821710586548, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -2.8257224559783936, |
|
"logits/rejected": -2.7971818447113037, |
|
"logps/chosen": -481.39697265625, |
|
"logps/rejected": -462.51416015625, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6484049558639526, |
|
"rewards/margins": 0.18180248141288757, |
|
"rewards/rejected": -0.8302074670791626, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -2.8246514797210693, |
|
"logits/rejected": -2.7782371044158936, |
|
"logps/chosen": -443.04925537109375, |
|
"logps/rejected": -426.6200256347656, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7901213765144348, |
|
"rewards/margins": 0.251308411359787, |
|
"rewards/rejected": -1.0414297580718994, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -2.8568129539489746, |
|
"logits/rejected": -2.7981626987457275, |
|
"logps/chosen": -530.5858154296875, |
|
"logps/rejected": -487.3711853027344, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7988755106925964, |
|
"rewards/margins": 0.19343645870685577, |
|
"rewards/rejected": -0.9923120737075806, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.753098964691162, |
|
"eval_logits/rejected": -2.7123119831085205, |
|
"eval_logps/chosen": -483.1429443359375, |
|
"eval_logps/rejected": -456.1025695800781, |
|
"eval_loss": 0.6405959725379944, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.869181215763092, |
|
"eval_rewards/margins": 0.17500866949558258, |
|
"eval_rewards/rejected": -1.0441899299621582, |
|
"eval_runtime": 347.7912, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -2.7716658115386963, |
|
"logits/rejected": -2.7464160919189453, |
|
"logps/chosen": -449.4707946777344, |
|
"logps/rejected": -440.67181396484375, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.828794002532959, |
|
"rewards/margins": 0.16667340695858002, |
|
"rewards/rejected": -0.995467483997345, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -2.82257342338562, |
|
"logits/rejected": -2.7864184379577637, |
|
"logps/chosen": -431.7757263183594, |
|
"logps/rejected": -390.1167907714844, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5249059200286865, |
|
"rewards/margins": 0.17124707996845245, |
|
"rewards/rejected": -0.6961530447006226, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -2.77396821975708, |
|
"logits/rejected": -2.745842456817627, |
|
"logps/chosen": -399.4544982910156, |
|
"logps/rejected": -385.6885070800781, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.313784658908844, |
|
"rewards/margins": 0.1287391483783722, |
|
"rewards/rejected": -0.44252386689186096, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -2.8060081005096436, |
|
"logits/rejected": -2.755823850631714, |
|
"logps/chosen": -465.9139709472656, |
|
"logps/rejected": -425.3163146972656, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2053132951259613, |
|
"rewards/margins": 0.21412332355976105, |
|
"rewards/rejected": -0.41943663358688354, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -2.8254618644714355, |
|
"logits/rejected": -2.7887418270111084, |
|
"logps/chosen": -431.9395446777344, |
|
"logps/rejected": -413.49468994140625, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3133324682712555, |
|
"rewards/margins": 0.14582258462905884, |
|
"rewards/rejected": -0.45915499329566956, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -2.744650363922119, |
|
"logits/rejected": -2.7300617694854736, |
|
"logps/chosen": -397.44451904296875, |
|
"logps/rejected": -379.4835205078125, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3276776671409607, |
|
"rewards/margins": 0.16546614468097687, |
|
"rewards/rejected": -0.49314385652542114, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -2.803278684616089, |
|
"logits/rejected": -2.7433362007141113, |
|
"logps/chosen": -464.9649963378906, |
|
"logps/rejected": -400.2354431152344, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.34644556045532227, |
|
"rewards/margins": 0.17325380444526672, |
|
"rewards/rejected": -0.5196993947029114, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.875, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -2.730520248413086, |
|
"logits/rejected": -2.7134251594543457, |
|
"logps/chosen": -448.7145080566406, |
|
"logps/rejected": -413.89276123046875, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.39723989367485046, |
|
"rewards/margins": 0.15517649054527283, |
|
"rewards/rejected": -0.5524164438247681, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -2.788811206817627, |
|
"logits/rejected": -2.7599310874938965, |
|
"logps/chosen": -435.73614501953125, |
|
"logps/rejected": -406.4289245605469, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4202515482902527, |
|
"rewards/margins": 0.11943890154361725, |
|
"rewards/rejected": -0.5396904945373535, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -2.809835195541382, |
|
"logits/rejected": -2.7414846420288086, |
|
"logps/chosen": -434.068115234375, |
|
"logps/rejected": -373.0021667480469, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4107758104801178, |
|
"rewards/margins": 0.09588425606489182, |
|
"rewards/rejected": -0.5066600441932678, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -2.7354044914245605, |
|
"eval_logits/rejected": -2.6946027278900146, |
|
"eval_logps/chosen": -430.67889404296875, |
|
"eval_logps/rejected": -401.52215576171875, |
|
"eval_loss": 0.6406324505805969, |
|
"eval_rewards/accuracies": 0.6365000009536743, |
|
"eval_rewards/chosen": -0.3445412218570709, |
|
"eval_rewards/margins": 0.1538446694612503, |
|
"eval_rewards/rejected": -0.49838587641716003, |
|
"eval_runtime": 347.8119, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -2.8207192420959473, |
|
"logits/rejected": -2.8083157539367676, |
|
"logps/chosen": -434.90960693359375, |
|
"logps/rejected": -418.67315673828125, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.29851752519607544, |
|
"rewards/margins": 0.14117324352264404, |
|
"rewards/rejected": -0.4396907389163971, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -2.775874614715576, |
|
"logits/rejected": -2.747631788253784, |
|
"logps/chosen": -398.10650634765625, |
|
"logps/rejected": -393.90191650390625, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.26665258407592773, |
|
"rewards/margins": 0.15709388256072998, |
|
"rewards/rejected": -0.4237464964389801, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -2.780519723892212, |
|
"logits/rejected": -2.726839065551758, |
|
"logps/chosen": -421.2362365722656, |
|
"logps/rejected": -409.1251525878906, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3165889382362366, |
|
"rewards/margins": 0.10841169208288193, |
|
"rewards/rejected": -0.4250006675720215, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -2.8075780868530273, |
|
"logits/rejected": -2.7242207527160645, |
|
"logps/chosen": -451.6830139160156, |
|
"logps/rejected": -388.5299987792969, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22056348621845245, |
|
"rewards/margins": 0.22768864035606384, |
|
"rewards/rejected": -0.4482521116733551, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -2.8118491172790527, |
|
"logits/rejected": -2.7709052562713623, |
|
"logps/chosen": -445.9686584472656, |
|
"logps/rejected": -409.93218994140625, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3341691493988037, |
|
"rewards/margins": 0.16431960463523865, |
|
"rewards/rejected": -0.49848875403404236, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -2.73759126663208, |
|
"logits/rejected": -2.6830625534057617, |
|
"logps/chosen": -443.0125427246094, |
|
"logps/rejected": -411.4647521972656, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.43005380034446716, |
|
"rewards/margins": 0.20315060019493103, |
|
"rewards/rejected": -0.6332044005393982, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -2.8012502193450928, |
|
"logits/rejected": -2.733692169189453, |
|
"logps/chosen": -441.42498779296875, |
|
"logps/rejected": -387.70147705078125, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5223753452301025, |
|
"rewards/margins": 0.18315255641937256, |
|
"rewards/rejected": -0.7055279016494751, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -2.682478427886963, |
|
"logits/rejected": -2.7226719856262207, |
|
"logps/chosen": -434.474365234375, |
|
"logps/rejected": -434.96771240234375, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4838402271270752, |
|
"rewards/margins": 0.1491493284702301, |
|
"rewards/rejected": -0.6329895257949829, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -2.769261121749878, |
|
"logits/rejected": -2.7192516326904297, |
|
"logps/chosen": -421.7001953125, |
|
"logps/rejected": -409.52642822265625, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4103321135044098, |
|
"rewards/margins": 0.1710616797208786, |
|
"rewards/rejected": -0.5813937783241272, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -2.7317872047424316, |
|
"logits/rejected": -2.693282127380371, |
|
"logps/chosen": -409.9453125, |
|
"logps/rejected": -401.80670166015625, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.508698582649231, |
|
"rewards/margins": 0.11917855590581894, |
|
"rewards/rejected": -0.6278771162033081, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -2.7076668739318848, |
|
"eval_logits/rejected": -2.6700782775878906, |
|
"eval_logps/chosen": -442.416259765625, |
|
"eval_logps/rejected": -415.98406982421875, |
|
"eval_loss": 0.6357947587966919, |
|
"eval_rewards/accuracies": 0.6424999833106995, |
|
"eval_rewards/chosen": -0.4619145095348358, |
|
"eval_rewards/margins": 0.18109098076820374, |
|
"eval_rewards/rejected": -0.6430054903030396, |
|
"eval_runtime": 348.1294, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.718, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -2.737431526184082, |
|
"logits/rejected": -2.6825287342071533, |
|
"logps/chosen": -423.69134521484375, |
|
"logps/rejected": -388.00445556640625, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4585542678833008, |
|
"rewards/margins": 0.19313645362854004, |
|
"rewards/rejected": -0.6516907811164856, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -2.806246519088745, |
|
"logits/rejected": -2.7676374912261963, |
|
"logps/chosen": -441.5145568847656, |
|
"logps/rejected": -449.9791564941406, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4756022095680237, |
|
"rewards/margins": 0.1646694839000702, |
|
"rewards/rejected": -0.6402716636657715, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -2.7820241451263428, |
|
"logits/rejected": -2.729149341583252, |
|
"logps/chosen": -496.11297607421875, |
|
"logps/rejected": -433.9457092285156, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5403844118118286, |
|
"rewards/margins": 0.1901749074459076, |
|
"rewards/rejected": -0.7305592894554138, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -2.7426555156707764, |
|
"logits/rejected": -2.711142063140869, |
|
"logps/chosen": -488.27862548828125, |
|
"logps/rejected": -438.2783203125, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5886915922164917, |
|
"rewards/margins": 0.22011339664459229, |
|
"rewards/rejected": -0.8088048696517944, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.0, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -2.769096851348877, |
|
"logits/rejected": -2.7023282051086426, |
|
"logps/chosen": -451.603759765625, |
|
"logps/rejected": -415.9313049316406, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6167994141578674, |
|
"rewards/margins": 0.1871766597032547, |
|
"rewards/rejected": -0.8039760589599609, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -2.7416205406188965, |
|
"logits/rejected": -2.7040672302246094, |
|
"logps/chosen": -444.20526123046875, |
|
"logps/rejected": -424.52972412109375, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5938581228256226, |
|
"rewards/margins": 0.1559075564146042, |
|
"rewards/rejected": -0.7497657537460327, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.875, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -2.7380728721618652, |
|
"logits/rejected": -2.6828763484954834, |
|
"logps/chosen": -430.66961669921875, |
|
"logps/rejected": -395.87994384765625, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5714975595474243, |
|
"rewards/margins": 0.20155255496501923, |
|
"rewards/rejected": -0.7730501294136047, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -2.7856247425079346, |
|
"logits/rejected": -2.7396748065948486, |
|
"logps/chosen": -438.529296875, |
|
"logps/rejected": -418.1429138183594, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6402245759963989, |
|
"rewards/margins": 0.21725162863731384, |
|
"rewards/rejected": -0.8574762344360352, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -2.7590296268463135, |
|
"logits/rejected": -2.7307534217834473, |
|
"logps/chosen": -455.66064453125, |
|
"logps/rejected": -426.7330017089844, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.744498074054718, |
|
"rewards/margins": 0.12376417219638824, |
|
"rewards/rejected": -0.8682621717453003, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.125, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -2.773496150970459, |
|
"logits/rejected": -2.764591693878174, |
|
"logps/chosen": -448.2276306152344, |
|
"logps/rejected": -449.07598876953125, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6141419410705566, |
|
"rewards/margins": 0.2553822696208954, |
|
"rewards/rejected": -0.8695241808891296, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -2.7122175693511963, |
|
"eval_logits/rejected": -2.6764349937438965, |
|
"eval_logps/chosen": -465.1626892089844, |
|
"eval_logps/rejected": -440.71441650390625, |
|
"eval_loss": 0.6297281384468079, |
|
"eval_rewards/accuracies": 0.6434999704360962, |
|
"eval_rewards/chosen": -0.6893790364265442, |
|
"eval_rewards/margins": 0.20092952251434326, |
|
"eval_rewards/rejected": -0.8903085589408875, |
|
"eval_runtime": 348.1512, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.718, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -2.8083739280700684, |
|
"logits/rejected": -2.752487897872925, |
|
"logps/chosen": -442.2110900878906, |
|
"logps/rejected": -403.32525634765625, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6493631601333618, |
|
"rewards/margins": 0.21884135901927948, |
|
"rewards/rejected": -0.8682045936584473, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -2.7438082695007324, |
|
"logits/rejected": -2.7184457778930664, |
|
"logps/chosen": -465.0062561035156, |
|
"logps/rejected": -466.47674560546875, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7264341711997986, |
|
"rewards/margins": 0.1592479646205902, |
|
"rewards/rejected": -0.8856821060180664, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.5, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -2.721998453140259, |
|
"logits/rejected": -2.7064220905303955, |
|
"logps/chosen": -416.6083984375, |
|
"logps/rejected": -412.7229919433594, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6872702836990356, |
|
"rewards/margins": 0.16901233792304993, |
|
"rewards/rejected": -0.8562827110290527, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.84375, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -2.7732884883880615, |
|
"logits/rejected": -2.7553248405456543, |
|
"logps/chosen": -469.11907958984375, |
|
"logps/rejected": -438.9508361816406, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6161693930625916, |
|
"rewards/margins": 0.18880559504032135, |
|
"rewards/rejected": -0.8049749135971069, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -2.713508129119873, |
|
"logits/rejected": -2.668172836303711, |
|
"logps/chosen": -456.80804443359375, |
|
"logps/rejected": -435.1529846191406, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6335210204124451, |
|
"rewards/margins": 0.1759893149137497, |
|
"rewards/rejected": -0.8095104098320007, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -2.730811595916748, |
|
"logits/rejected": -2.6753551959991455, |
|
"logps/chosen": -447.3460388183594, |
|
"logps/rejected": -414.4151306152344, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6381969451904297, |
|
"rewards/margins": 0.1885562241077423, |
|
"rewards/rejected": -0.8267530202865601, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.75, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -2.7473931312561035, |
|
"logits/rejected": -2.7274961471557617, |
|
"logps/chosen": -464.76397705078125, |
|
"logps/rejected": -457.59674072265625, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6140081882476807, |
|
"rewards/margins": 0.1607227772474289, |
|
"rewards/rejected": -0.7747309803962708, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -2.832428455352783, |
|
"logits/rejected": -2.760331153869629, |
|
"logps/chosen": -478.40185546875, |
|
"logps/rejected": -440.13043212890625, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6477630734443665, |
|
"rewards/margins": 0.22839903831481934, |
|
"rewards/rejected": -0.876162052154541, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -2.720529079437256, |
|
"logits/rejected": -2.6880316734313965, |
|
"logps/chosen": -449.77996826171875, |
|
"logps/rejected": -435.7823791503906, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6553342938423157, |
|
"rewards/margins": 0.21635405719280243, |
|
"rewards/rejected": -0.8716884851455688, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -2.7746074199676514, |
|
"logits/rejected": -2.7223048210144043, |
|
"logps/chosen": -469.13836669921875, |
|
"logps/rejected": -439.2986755371094, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6836882829666138, |
|
"rewards/margins": 0.19334295392036438, |
|
"rewards/rejected": -0.8770312070846558, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -2.7091026306152344, |
|
"eval_logits/rejected": -2.6711199283599854, |
|
"eval_logps/chosen": -467.6648254394531, |
|
"eval_logps/rejected": -444.7496337890625, |
|
"eval_loss": 0.6266594529151917, |
|
"eval_rewards/accuracies": 0.6504999995231628, |
|
"eval_rewards/chosen": -0.7144004702568054, |
|
"eval_rewards/margins": 0.2162601202726364, |
|
"eval_rewards/rejected": -0.930660605430603, |
|
"eval_runtime": 347.968, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.718, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.375, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -2.7707862854003906, |
|
"logits/rejected": -2.7450509071350098, |
|
"logps/chosen": -476.1197204589844, |
|
"logps/rejected": -445.98419189453125, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7250741720199585, |
|
"rewards/margins": 0.21456794440746307, |
|
"rewards/rejected": -0.9396421313285828, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -2.780161142349243, |
|
"logits/rejected": -2.7392024993896484, |
|
"logps/chosen": -497.08758544921875, |
|
"logps/rejected": -469.8622131347656, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7258157730102539, |
|
"rewards/margins": 0.2515636384487152, |
|
"rewards/rejected": -0.9773795008659363, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -2.685615062713623, |
|
"logits/rejected": -2.663395404815674, |
|
"logps/chosen": -459.1412048339844, |
|
"logps/rejected": -449.1641540527344, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7857618927955627, |
|
"rewards/margins": 0.22839538753032684, |
|
"rewards/rejected": -1.0141572952270508, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -2.772688388824463, |
|
"logits/rejected": -2.7538001537323, |
|
"logps/chosen": -472.8526916503906, |
|
"logps/rejected": -457.0860900878906, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9769464731216431, |
|
"rewards/margins": 0.26345548033714294, |
|
"rewards/rejected": -1.2404019832611084, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -2.7669315338134766, |
|
"logits/rejected": -2.722926378250122, |
|
"logps/chosen": -466.6268615722656, |
|
"logps/rejected": -452.5586853027344, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0403883457183838, |
|
"rewards/margins": 0.2461782991886139, |
|
"rewards/rejected": -1.2865667343139648, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.625, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -2.698967456817627, |
|
"logits/rejected": -2.680738925933838, |
|
"logps/chosen": -471.3136291503906, |
|
"logps/rejected": -444.6326599121094, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9365707635879517, |
|
"rewards/margins": 0.22086432576179504, |
|
"rewards/rejected": -1.1574350595474243, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.25, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -2.724776268005371, |
|
"logits/rejected": -2.6953415870666504, |
|
"logps/chosen": -462.2669982910156, |
|
"logps/rejected": -459.33428955078125, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8260868787765503, |
|
"rewards/margins": 0.2951758801937103, |
|
"rewards/rejected": -1.121262788772583, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -2.7450900077819824, |
|
"logits/rejected": -2.7022852897644043, |
|
"logps/chosen": -477.535888671875, |
|
"logps/rejected": -422.33587646484375, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.9595173597335815, |
|
"rewards/margins": 0.19346167147159576, |
|
"rewards/rejected": -1.1529791355133057, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -2.7165746688842773, |
|
"logits/rejected": -2.6513075828552246, |
|
"logps/chosen": -472.983642578125, |
|
"logps/rejected": -456.36956787109375, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9271873235702515, |
|
"rewards/margins": 0.28870025277137756, |
|
"rewards/rejected": -1.2158875465393066, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -2.6848392486572266, |
|
"logits/rejected": -2.6425843238830566, |
|
"logps/chosen": -507.7676696777344, |
|
"logps/rejected": -480.82501220703125, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9639270901679993, |
|
"rewards/margins": 0.28983956575393677, |
|
"rewards/rejected": -1.253766655921936, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -2.67969012260437, |
|
"eval_logits/rejected": -2.643465995788574, |
|
"eval_logps/chosen": -501.5469055175781, |
|
"eval_logps/rejected": -482.52557373046875, |
|
"eval_loss": 0.6213365197181702, |
|
"eval_rewards/accuracies": 0.6489999890327454, |
|
"eval_rewards/chosen": -1.053221344947815, |
|
"eval_rewards/margins": 0.25519895553588867, |
|
"eval_rewards/rejected": -1.308420181274414, |
|
"eval_runtime": 348.1901, |
|
"eval_samples_per_second": 5.744, |
|
"eval_steps_per_second": 0.718, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -2.711010456085205, |
|
"logits/rejected": -2.683377981185913, |
|
"logps/chosen": -460.9742736816406, |
|
"logps/rejected": -446.6480407714844, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0935702323913574, |
|
"rewards/margins": 0.11124851554632187, |
|
"rewards/rejected": -1.2048187255859375, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -2.748260021209717, |
|
"logits/rejected": -2.724116802215576, |
|
"logps/chosen": -488.9134826660156, |
|
"logps/rejected": -478.4491271972656, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9258454442024231, |
|
"rewards/margins": 0.2710776627063751, |
|
"rewards/rejected": -1.196923017501831, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -2.722660779953003, |
|
"logits/rejected": -2.7004356384277344, |
|
"logps/chosen": -515.2189331054688, |
|
"logps/rejected": -489.676025390625, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9935762286186218, |
|
"rewards/margins": 0.16242071986198425, |
|
"rewards/rejected": -1.1559970378875732, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -2.693944215774536, |
|
"logits/rejected": -2.662226438522339, |
|
"logps/chosen": -472.21392822265625, |
|
"logps/rejected": -458.48651123046875, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1790839433670044, |
|
"rewards/margins": 0.1845519244670868, |
|
"rewards/rejected": -1.363635778427124, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.75, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -2.7512943744659424, |
|
"logits/rejected": -2.7197911739349365, |
|
"logps/chosen": -525.9802856445312, |
|
"logps/rejected": -505.2586975097656, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1584722995758057, |
|
"rewards/margins": 0.232163667678833, |
|
"rewards/rejected": -1.3906362056732178, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -2.755866050720215, |
|
"logits/rejected": -2.70156192779541, |
|
"logps/chosen": -549.6534423828125, |
|
"logps/rejected": -496.219482421875, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2471288442611694, |
|
"rewards/margins": 0.21645119786262512, |
|
"rewards/rejected": -1.4635800123214722, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -2.751842498779297, |
|
"logits/rejected": -2.7206082344055176, |
|
"logps/chosen": -509.11871337890625, |
|
"logps/rejected": -488.1609802246094, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2405083179473877, |
|
"rewards/margins": 0.24299952387809753, |
|
"rewards/rejected": -1.483507752418518, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -2.7066681385040283, |
|
"logits/rejected": -2.7152516841888428, |
|
"logps/chosen": -505.77618408203125, |
|
"logps/rejected": -542.4833374023438, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2058336734771729, |
|
"rewards/margins": 0.2781684398651123, |
|
"rewards/rejected": -1.4840023517608643, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -2.750699520111084, |
|
"logits/rejected": -2.7490906715393066, |
|
"logps/chosen": -485.8789978027344, |
|
"logps/rejected": -506.06243896484375, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.244022250175476, |
|
"rewards/margins": 0.20155465602874756, |
|
"rewards/rejected": -1.4455769062042236, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -2.6556179523468018, |
|
"logits/rejected": -2.649691343307495, |
|
"logps/chosen": -509.494873046875, |
|
"logps/rejected": -483.21875, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0604232549667358, |
|
"rewards/margins": 0.21748527884483337, |
|
"rewards/rejected": -1.2779085636138916, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.6505815982818604, |
|
"eval_logits/rejected": -2.617206573486328, |
|
"eval_logps/chosen": -508.6858215332031, |
|
"eval_logps/rejected": -489.9322814941406, |
|
"eval_loss": 0.619657576084137, |
|
"eval_rewards/accuracies": 0.6489999890327454, |
|
"eval_rewards/chosen": -1.1246099472045898, |
|
"eval_rewards/margins": 0.2578776180744171, |
|
"eval_rewards/rejected": -1.3824876546859741, |
|
"eval_runtime": 347.7803, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.125, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -2.710347890853882, |
|
"logits/rejected": -2.695310354232788, |
|
"logps/chosen": -473.04541015625, |
|
"logps/rejected": -494.8163146972656, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0344406366348267, |
|
"rewards/margins": 0.3075736463069916, |
|
"rewards/rejected": -1.342014193534851, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.5, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -2.7335174083709717, |
|
"logits/rejected": -2.673346996307373, |
|
"logps/chosen": -509.60595703125, |
|
"logps/rejected": -487.2217712402344, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1273787021636963, |
|
"rewards/margins": 0.2849898934364319, |
|
"rewards/rejected": -1.4123685359954834, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.90625, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -2.588106155395508, |
|
"logits/rejected": -2.6156704425811768, |
|
"logps/chosen": -476.9378967285156, |
|
"logps/rejected": -474.3050842285156, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2659618854522705, |
|
"rewards/margins": 0.28414902091026306, |
|
"rewards/rejected": -1.5501108169555664, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -2.6407034397125244, |
|
"logits/rejected": -2.6249217987060547, |
|
"logps/chosen": -523.3114013671875, |
|
"logps/rejected": -515.78564453125, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3326655626296997, |
|
"rewards/margins": 0.2217075377702713, |
|
"rewards/rejected": -1.554373025894165, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.0, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -2.678744077682495, |
|
"logits/rejected": -2.6236231327056885, |
|
"logps/chosen": -500.12322998046875, |
|
"logps/rejected": -453.09375, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1778029203414917, |
|
"rewards/margins": 0.14696446061134338, |
|
"rewards/rejected": -1.3247674703598022, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -2.6644175052642822, |
|
"logits/rejected": -2.6675162315368652, |
|
"logps/chosen": -451.5030822753906, |
|
"logps/rejected": -466.20867919921875, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1360504627227783, |
|
"rewards/margins": 0.20215868949890137, |
|
"rewards/rejected": -1.3382090330123901, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -2.714818239212036, |
|
"logits/rejected": -2.6459782123565674, |
|
"logps/chosen": -528.015380859375, |
|
"logps/rejected": -481.2875061035156, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1526925563812256, |
|
"rewards/margins": 0.24837598204612732, |
|
"rewards/rejected": -1.4010684490203857, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -2.7072434425354004, |
|
"logits/rejected": -2.6440227031707764, |
|
"logps/chosen": -518.8106689453125, |
|
"logps/rejected": -480.94091796875, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9318562746047974, |
|
"rewards/margins": 0.3667041063308716, |
|
"rewards/rejected": -1.298560380935669, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -2.739563465118408, |
|
"logits/rejected": -2.681410312652588, |
|
"logps/chosen": -504.88580322265625, |
|
"logps/rejected": -463.4295959472656, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9455466270446777, |
|
"rewards/margins": 0.3077097535133362, |
|
"rewards/rejected": -1.2532564401626587, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -2.6793107986450195, |
|
"logits/rejected": -2.6711511611938477, |
|
"logps/chosen": -440.7421875, |
|
"logps/rejected": -438.96209716796875, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.124517798423767, |
|
"rewards/margins": 0.14334309101104736, |
|
"rewards/rejected": -1.267861008644104, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -2.6761555671691895, |
|
"eval_logits/rejected": -2.6407337188720703, |
|
"eval_logps/chosen": -496.5814514160156, |
|
"eval_logps/rejected": -478.1268310546875, |
|
"eval_loss": 0.6182043552398682, |
|
"eval_rewards/accuracies": 0.652999997138977, |
|
"eval_rewards/chosen": -1.0035661458969116, |
|
"eval_rewards/margins": 0.26086705923080444, |
|
"eval_rewards/rejected": -1.2644333839416504, |
|
"eval_runtime": 347.9034, |
|
"eval_samples_per_second": 5.749, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.0, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -2.7096104621887207, |
|
"logits/rejected": -2.717935562133789, |
|
"logps/chosen": -451.46923828125, |
|
"logps/rejected": -466.99798583984375, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9781481027603149, |
|
"rewards/margins": 0.2028496265411377, |
|
"rewards/rejected": -1.180997610092163, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -2.759023427963257, |
|
"logits/rejected": -2.7278056144714355, |
|
"logps/chosen": -501.0248107910156, |
|
"logps/rejected": -491.77374267578125, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9347749948501587, |
|
"rewards/margins": 0.3128504455089569, |
|
"rewards/rejected": -1.2476253509521484, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -2.640856981277466, |
|
"logits/rejected": -2.624553918838501, |
|
"logps/chosen": -483.8340759277344, |
|
"logps/rejected": -468.92327880859375, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9758474230766296, |
|
"rewards/margins": 0.2623240053653717, |
|
"rewards/rejected": -1.2381714582443237, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -2.6590754985809326, |
|
"logits/rejected": -2.620539426803589, |
|
"logps/chosen": -497.45587158203125, |
|
"logps/rejected": -465.5917053222656, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.884781002998352, |
|
"rewards/margins": 0.24650093913078308, |
|
"rewards/rejected": -1.1312817335128784, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.625, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -2.7382700443267822, |
|
"logits/rejected": -2.6750712394714355, |
|
"logps/chosen": -496.0645446777344, |
|
"logps/rejected": -476.931884765625, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1136571168899536, |
|
"rewards/margins": 0.2823924124240875, |
|
"rewards/rejected": -1.3960494995117188, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -2.6461880207061768, |
|
"logits/rejected": -2.6458840370178223, |
|
"logps/chosen": -484.7660217285156, |
|
"logps/rejected": -470.3802795410156, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.033327341079712, |
|
"rewards/margins": 0.2565310001373291, |
|
"rewards/rejected": -1.2898584604263306, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -2.664067268371582, |
|
"logits/rejected": -2.641017436981201, |
|
"logps/chosen": -490.5843200683594, |
|
"logps/rejected": -511.4412536621094, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0362211465835571, |
|
"rewards/margins": 0.35183295607566833, |
|
"rewards/rejected": -1.3880541324615479, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -2.7072222232818604, |
|
"logits/rejected": -2.6669273376464844, |
|
"logps/chosen": -479.81292724609375, |
|
"logps/rejected": -471.8095703125, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1294889450073242, |
|
"rewards/margins": 0.23517270386219025, |
|
"rewards/rejected": -1.3646615743637085, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -2.6887664794921875, |
|
"logits/rejected": -2.672578811645508, |
|
"logps/chosen": -530.8712158203125, |
|
"logps/rejected": -537.0011596679688, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.256080985069275, |
|
"rewards/margins": 0.2619550824165344, |
|
"rewards/rejected": -1.518036127090454, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -2.5891640186309814, |
|
"logits/rejected": -2.568915843963623, |
|
"logps/chosen": -489.94512939453125, |
|
"logps/rejected": -538.85986328125, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.305600881576538, |
|
"rewards/margins": 0.43258899450302124, |
|
"rewards/rejected": -1.738189935684204, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -2.618206739425659, |
|
"eval_logits/rejected": -2.5865581035614014, |
|
"eval_logps/chosen": -531.0144653320312, |
|
"eval_logps/rejected": -515.1605834960938, |
|
"eval_loss": 0.6218886375427246, |
|
"eval_rewards/accuracies": 0.6445000171661377, |
|
"eval_rewards/chosen": -1.3478968143463135, |
|
"eval_rewards/margins": 0.28687387704849243, |
|
"eval_rewards/rejected": -1.6347707509994507, |
|
"eval_runtime": 347.7653, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -2.5783843994140625, |
|
"logits/rejected": -2.530768871307373, |
|
"logps/chosen": -519.810546875, |
|
"logps/rejected": -486.0419921875, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3787872791290283, |
|
"rewards/margins": 0.29414287209510803, |
|
"rewards/rejected": -1.672930121421814, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 11.125, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -2.6292014122009277, |
|
"logits/rejected": -2.6139864921569824, |
|
"logps/chosen": -565.1087646484375, |
|
"logps/rejected": -546.5718994140625, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.386059284210205, |
|
"rewards/margins": 0.1806623637676239, |
|
"rewards/rejected": -1.5667215585708618, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -2.6707286834716797, |
|
"logits/rejected": -2.669381618499756, |
|
"logps/chosen": -514.181640625, |
|
"logps/rejected": -503.86663818359375, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.390178918838501, |
|
"rewards/margins": 0.18112266063690186, |
|
"rewards/rejected": -1.5713016986846924, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -2.7397656440734863, |
|
"logits/rejected": -2.692636013031006, |
|
"logps/chosen": -547.605224609375, |
|
"logps/rejected": -530.4255981445312, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2703100442886353, |
|
"rewards/margins": 0.33880940079689026, |
|
"rewards/rejected": -1.6091196537017822, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -2.7308197021484375, |
|
"logits/rejected": -2.691286087036133, |
|
"logps/chosen": -530.3106689453125, |
|
"logps/rejected": -512.4490356445312, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1016675233840942, |
|
"rewards/margins": 0.3237997591495514, |
|
"rewards/rejected": -1.4254672527313232, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -2.739076614379883, |
|
"logits/rejected": -2.7019972801208496, |
|
"logps/chosen": -540.4013671875, |
|
"logps/rejected": -528.4253540039062, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.127809762954712, |
|
"rewards/margins": 0.2989148199558258, |
|
"rewards/rejected": -1.4267246723175049, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -2.6942806243896484, |
|
"logits/rejected": -2.652289628982544, |
|
"logps/chosen": -524.1945190429688, |
|
"logps/rejected": -504.88848876953125, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0817714929580688, |
|
"rewards/margins": 0.3258208930492401, |
|
"rewards/rejected": -1.4075922966003418, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -2.7198615074157715, |
|
"logits/rejected": -2.676637649536133, |
|
"logps/chosen": -517.9676513671875, |
|
"logps/rejected": -481.9654846191406, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1094682216644287, |
|
"rewards/margins": 0.3417368233203888, |
|
"rewards/rejected": -1.4512050151824951, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -2.70827054977417, |
|
"logits/rejected": -2.6766083240509033, |
|
"logps/chosen": -514.5819091796875, |
|
"logps/rejected": -497.3114318847656, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.134453535079956, |
|
"rewards/margins": 0.22488650679588318, |
|
"rewards/rejected": -1.3593400716781616, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -2.6851532459259033, |
|
"logits/rejected": -2.6381797790527344, |
|
"logps/chosen": -517.7092895507812, |
|
"logps/rejected": -501.8164978027344, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9668029546737671, |
|
"rewards/margins": 0.30051741003990173, |
|
"rewards/rejected": -1.2673202753067017, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -2.674211025238037, |
|
"eval_logits/rejected": -2.6375982761383057, |
|
"eval_logps/chosen": -486.3656311035156, |
|
"eval_logps/rejected": -468.8457946777344, |
|
"eval_loss": 0.6153793931007385, |
|
"eval_rewards/accuracies": 0.6629999876022339, |
|
"eval_rewards/chosen": -0.9014082551002502, |
|
"eval_rewards/margins": 0.2702144682407379, |
|
"eval_rewards/rejected": -1.1716225147247314, |
|
"eval_runtime": 347.781, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -2.7260212898254395, |
|
"logits/rejected": -2.7005722522735596, |
|
"logps/chosen": -478.63385009765625, |
|
"logps/rejected": -446.9085998535156, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8971524238586426, |
|
"rewards/margins": 0.24045062065124512, |
|
"rewards/rejected": -1.1376030445098877, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -2.7050108909606934, |
|
"logits/rejected": -2.6705167293548584, |
|
"logps/chosen": -476.31854248046875, |
|
"logps/rejected": -468.4576110839844, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7747586369514465, |
|
"rewards/margins": 0.34002387523651123, |
|
"rewards/rejected": -1.1147825717926025, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.0, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -2.6942856311798096, |
|
"logits/rejected": -2.6358091831207275, |
|
"logps/chosen": -478.71759033203125, |
|
"logps/rejected": -443.03668212890625, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8447883725166321, |
|
"rewards/margins": 0.32821527123451233, |
|
"rewards/rejected": -1.1730036735534668, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -2.701122760772705, |
|
"logits/rejected": -2.682640790939331, |
|
"logps/chosen": -484.0836486816406, |
|
"logps/rejected": -488.54425048828125, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8852831125259399, |
|
"rewards/margins": 0.296100914478302, |
|
"rewards/rejected": -1.1813839673995972, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -2.728684902191162, |
|
"logits/rejected": -2.6673216819763184, |
|
"logps/chosen": -502.921875, |
|
"logps/rejected": -477.1438903808594, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9080885052680969, |
|
"rewards/margins": 0.27347394824028015, |
|
"rewards/rejected": -1.1815625429153442, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -2.741576671600342, |
|
"logits/rejected": -2.6904985904693604, |
|
"logps/chosen": -522.7763671875, |
|
"logps/rejected": -488.14080810546875, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9640465974807739, |
|
"rewards/margins": 0.36099857091903687, |
|
"rewards/rejected": -1.325045108795166, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -2.70404052734375, |
|
"logits/rejected": -2.65217924118042, |
|
"logps/chosen": -512.2633666992188, |
|
"logps/rejected": -466.35870361328125, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0254846811294556, |
|
"rewards/margins": 0.28917989134788513, |
|
"rewards/rejected": -1.3146644830703735, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -2.6516964435577393, |
|
"logits/rejected": -2.666243076324463, |
|
"logps/chosen": -464.35076904296875, |
|
"logps/rejected": -491.3067932128906, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0909736156463623, |
|
"rewards/margins": 0.2263956516981125, |
|
"rewards/rejected": -1.3173692226409912, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -2.6758196353912354, |
|
"logits/rejected": -2.6307640075683594, |
|
"logps/chosen": -480.7010803222656, |
|
"logps/rejected": -462.5948791503906, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9626979827880859, |
|
"rewards/margins": 0.29944872856140137, |
|
"rewards/rejected": -1.2621467113494873, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -2.732978105545044, |
|
"logits/rejected": -2.6916584968566895, |
|
"logps/chosen": -537.5288696289062, |
|
"logps/rejected": -481.4242248535156, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0523701906204224, |
|
"rewards/margins": 0.30254489183425903, |
|
"rewards/rejected": -1.354914903640747, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.6580026149749756, |
|
"eval_logits/rejected": -2.6232478618621826, |
|
"eval_logps/chosen": -511.5793151855469, |
|
"eval_logps/rejected": -496.3810119628906, |
|
"eval_loss": 0.6120737791061401, |
|
"eval_rewards/accuracies": 0.6575000286102295, |
|
"eval_rewards/chosen": -1.1535453796386719, |
|
"eval_rewards/margins": 0.29342907667160034, |
|
"eval_rewards/rejected": -1.4469746351242065, |
|
"eval_runtime": 347.7542, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -2.6824750900268555, |
|
"logits/rejected": -2.6871109008789062, |
|
"logps/chosen": -506.978759765625, |
|
"logps/rejected": -510.89569091796875, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1684012413024902, |
|
"rewards/margins": 0.284812867641449, |
|
"rewards/rejected": -1.4532140493392944, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -2.732447624206543, |
|
"logits/rejected": -2.692403554916382, |
|
"logps/chosen": -490.4368591308594, |
|
"logps/rejected": -467.0401306152344, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0760316848754883, |
|
"rewards/margins": 0.24762101471424103, |
|
"rewards/rejected": -1.3236526250839233, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -2.7549805641174316, |
|
"logits/rejected": -2.7140159606933594, |
|
"logps/chosen": -533.2722778320312, |
|
"logps/rejected": -497.967041015625, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9551935195922852, |
|
"rewards/margins": 0.3695451319217682, |
|
"rewards/rejected": -1.324738621711731, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -2.7105820178985596, |
|
"logits/rejected": -2.7078709602355957, |
|
"logps/chosen": -511.4810485839844, |
|
"logps/rejected": -498.16705322265625, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.0229012966156006, |
|
"rewards/margins": 0.2677188515663147, |
|
"rewards/rejected": -1.290619969367981, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -2.7516863346099854, |
|
"logits/rejected": -2.6846871376037598, |
|
"logps/chosen": -489.28924560546875, |
|
"logps/rejected": -436.2731018066406, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1246410608291626, |
|
"rewards/margins": 0.29066139459609985, |
|
"rewards/rejected": -1.4153025150299072, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -2.744058132171631, |
|
"logits/rejected": -2.71818208694458, |
|
"logps/chosen": -528.1295166015625, |
|
"logps/rejected": -513.4764404296875, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1548454761505127, |
|
"rewards/margins": 0.3176589012145996, |
|
"rewards/rejected": -1.4725043773651123, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -2.7094826698303223, |
|
"logits/rejected": -2.7104077339172363, |
|
"logps/chosen": -496.2884826660156, |
|
"logps/rejected": -523.0441284179688, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2200782299041748, |
|
"rewards/margins": 0.2470276653766632, |
|
"rewards/rejected": -1.4671061038970947, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -2.7130637168884277, |
|
"logits/rejected": -2.6693716049194336, |
|
"logps/chosen": -506.1182556152344, |
|
"logps/rejected": -476.56866455078125, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2350577116012573, |
|
"rewards/margins": 0.21940436959266663, |
|
"rewards/rejected": -1.4544621706008911, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -2.6736958026885986, |
|
"logits/rejected": -2.632709264755249, |
|
"logps/chosen": -545.9395751953125, |
|
"logps/rejected": -542.23583984375, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1226942539215088, |
|
"rewards/margins": 0.33349329233169556, |
|
"rewards/rejected": -1.4561874866485596, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -2.6629996299743652, |
|
"logits/rejected": -2.6413462162017822, |
|
"logps/chosen": -490.80059814453125, |
|
"logps/rejected": -472.08319091796875, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1775743961334229, |
|
"rewards/margins": 0.25760284066200256, |
|
"rewards/rejected": -1.4351773262023926, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -2.662881851196289, |
|
"eval_logits/rejected": -2.627786159515381, |
|
"eval_logps/chosen": -512.2247314453125, |
|
"eval_logps/rejected": -496.9116516113281, |
|
"eval_loss": 0.6116329431533813, |
|
"eval_rewards/accuracies": 0.6650000214576721, |
|
"eval_rewards/chosen": -1.1599992513656616, |
|
"eval_rewards/margins": 0.29228222370147705, |
|
"eval_rewards/rejected": -1.4522814750671387, |
|
"eval_runtime": 347.7217, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -2.703998565673828, |
|
"logits/rejected": -2.6619462966918945, |
|
"logps/chosen": -514.3597412109375, |
|
"logps/rejected": -477.54974365234375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1271086931228638, |
|
"rewards/margins": 0.2521311044692993, |
|
"rewards/rejected": -1.379239797592163, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.125, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -2.6903960704803467, |
|
"logits/rejected": -2.680983304977417, |
|
"logps/chosen": -468.71826171875, |
|
"logps/rejected": -445.9656677246094, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9441890716552734, |
|
"rewards/margins": 0.2759680449962616, |
|
"rewards/rejected": -1.2201570272445679, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -2.6948788166046143, |
|
"logits/rejected": -2.668149471282959, |
|
"logps/chosen": -500.23248291015625, |
|
"logps/rejected": -478.42437744140625, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.892300009727478, |
|
"rewards/margins": 0.28213489055633545, |
|
"rewards/rejected": -1.174435019493103, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -2.7459912300109863, |
|
"logits/rejected": -2.701768398284912, |
|
"logps/chosen": -485.0846252441406, |
|
"logps/rejected": -448.44140625, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9321788549423218, |
|
"rewards/margins": 0.30308184027671814, |
|
"rewards/rejected": -1.2352608442306519, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -2.7058706283569336, |
|
"logits/rejected": -2.6738340854644775, |
|
"logps/chosen": -463.795654296875, |
|
"logps/rejected": -433.58721923828125, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0220860242843628, |
|
"rewards/margins": 0.22584767639636993, |
|
"rewards/rejected": -1.2479338645935059, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 5.0, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -2.757347583770752, |
|
"logits/rejected": -2.6829416751861572, |
|
"logps/chosen": -521.1004638671875, |
|
"logps/rejected": -448.514404296875, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9433916211128235, |
|
"rewards/margins": 0.3374863266944885, |
|
"rewards/rejected": -1.2808778285980225, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -2.7074646949768066, |
|
"logits/rejected": -2.6540534496307373, |
|
"logps/chosen": -489.44866943359375, |
|
"logps/rejected": -449.8168029785156, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0346986055374146, |
|
"rewards/margins": 0.2758663594722748, |
|
"rewards/rejected": -1.3105649948120117, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -2.69421124458313, |
|
"logits/rejected": -2.6716065406799316, |
|
"logps/chosen": -481.97833251953125, |
|
"logps/rejected": -441.35443115234375, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0301321744918823, |
|
"rewards/margins": 0.2506099343299866, |
|
"rewards/rejected": -1.2807420492172241, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -2.66270112991333, |
|
"logits/rejected": -2.640023946762085, |
|
"logps/chosen": -484.82354736328125, |
|
"logps/rejected": -490.209716796875, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0577830076217651, |
|
"rewards/margins": 0.22141680121421814, |
|
"rewards/rejected": -1.2791998386383057, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.375, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -2.7031655311584473, |
|
"logits/rejected": -2.675705909729004, |
|
"logps/chosen": -489.45269775390625, |
|
"logps/rejected": -480.9483337402344, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9436511993408203, |
|
"rewards/margins": 0.3099278509616852, |
|
"rewards/rejected": -1.2535789012908936, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -2.6674294471740723, |
|
"eval_logits/rejected": -2.631662130355835, |
|
"eval_logps/chosen": -492.14892578125, |
|
"eval_logps/rejected": -475.9957580566406, |
|
"eval_loss": 0.6131682991981506, |
|
"eval_rewards/accuracies": 0.6654999852180481, |
|
"eval_rewards/chosen": -0.959242045879364, |
|
"eval_rewards/margins": 0.28388017416000366, |
|
"eval_rewards/rejected": -1.2431222200393677, |
|
"eval_runtime": 347.6695, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -2.764758586883545, |
|
"logits/rejected": -2.708240032196045, |
|
"logps/chosen": -508.2550354003906, |
|
"logps/rejected": -496.92022705078125, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9579612612724304, |
|
"rewards/margins": 0.28434932231903076, |
|
"rewards/rejected": -1.242310643196106, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -2.628192901611328, |
|
"logits/rejected": -2.6235828399658203, |
|
"logps/chosen": -510.7039489746094, |
|
"logps/rejected": -509.13592529296875, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.038236141204834, |
|
"rewards/margins": 0.2541654407978058, |
|
"rewards/rejected": -1.2924015522003174, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -2.65356707572937, |
|
"logits/rejected": -2.639221668243408, |
|
"logps/chosen": -514.1550903320312, |
|
"logps/rejected": -486.2848205566406, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0424216985702515, |
|
"rewards/margins": 0.28543582558631897, |
|
"rewards/rejected": -1.3278576135635376, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -2.676987409591675, |
|
"logits/rejected": -2.6617724895477295, |
|
"logps/chosen": -531.5819091796875, |
|
"logps/rejected": -520.0554809570312, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0140860080718994, |
|
"rewards/margins": 0.2539765238761902, |
|
"rewards/rejected": -1.2680623531341553, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -2.7012898921966553, |
|
"logits/rejected": -2.684084415435791, |
|
"logps/chosen": -495.6434631347656, |
|
"logps/rejected": -480.5547790527344, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9610779881477356, |
|
"rewards/margins": 0.3606341779232025, |
|
"rewards/rejected": -1.3217121362686157, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 4.25, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -2.6837801933288574, |
|
"logits/rejected": -2.6432583332061768, |
|
"logps/chosen": -504.1858825683594, |
|
"logps/rejected": -483.0741271972656, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0147348642349243, |
|
"rewards/margins": 0.23686587810516357, |
|
"rewards/rejected": -1.2516006231307983, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -2.679379940032959, |
|
"logits/rejected": -2.6332995891571045, |
|
"logps/chosen": -515.0089721679688, |
|
"logps/rejected": -473.6858825683594, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9973223805427551, |
|
"rewards/margins": 0.3110753893852234, |
|
"rewards/rejected": -1.3083977699279785, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -2.688920497894287, |
|
"logits/rejected": -2.629225492477417, |
|
"logps/chosen": -494.71026611328125, |
|
"logps/rejected": -466.7489318847656, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9701055288314819, |
|
"rewards/margins": 0.36299929022789, |
|
"rewards/rejected": -1.3331048488616943, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.0, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -2.679488182067871, |
|
"logits/rejected": -2.660877227783203, |
|
"logps/chosen": -480.1832580566406, |
|
"logps/rejected": -459.4652404785156, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9950377345085144, |
|
"rewards/margins": 0.2864169776439667, |
|
"rewards/rejected": -1.2814548015594482, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -2.7407097816467285, |
|
"logits/rejected": -2.6703708171844482, |
|
"logps/chosen": -514.2717895507812, |
|
"logps/rejected": -493.68157958984375, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.067603588104248, |
|
"rewards/margins": 0.3232964277267456, |
|
"rewards/rejected": -1.390899896621704, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -2.661860704421997, |
|
"eval_logits/rejected": -2.6282894611358643, |
|
"eval_logps/chosen": -505.5737609863281, |
|
"eval_logps/rejected": -489.7906494140625, |
|
"eval_loss": 0.6137638092041016, |
|
"eval_rewards/accuracies": 0.6625000238418579, |
|
"eval_rewards/chosen": -1.0934895277023315, |
|
"eval_rewards/margins": 0.2875814139842987, |
|
"eval_rewards/rejected": -1.3810709714889526, |
|
"eval_runtime": 348.0995, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.718, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -2.611936092376709, |
|
"logits/rejected": -2.5979480743408203, |
|
"logps/chosen": -461.7267150878906, |
|
"logps/rejected": -432.57012939453125, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.189932107925415, |
|
"rewards/margins": 0.21743163466453552, |
|
"rewards/rejected": -1.4073638916015625, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.75, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -2.705918788909912, |
|
"logits/rejected": -2.672658920288086, |
|
"logps/chosen": -493.9043884277344, |
|
"logps/rejected": -467.73699951171875, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0233014822006226, |
|
"rewards/margins": 0.26420658826828003, |
|
"rewards/rejected": -1.2875080108642578, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -2.700793743133545, |
|
"logits/rejected": -2.6994223594665527, |
|
"logps/chosen": -514.3643798828125, |
|
"logps/rejected": -513.8361206054688, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9839721918106079, |
|
"rewards/margins": 0.20725660026073456, |
|
"rewards/rejected": -1.1912287473678589, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -2.6701126098632812, |
|
"logits/rejected": -2.639960289001465, |
|
"logps/chosen": -459.0147399902344, |
|
"logps/rejected": -447.525634765625, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0298200845718384, |
|
"rewards/margins": 0.19326387345790863, |
|
"rewards/rejected": -1.2230839729309082, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -2.7172811031341553, |
|
"logits/rejected": -2.704822063446045, |
|
"logps/chosen": -486.2060546875, |
|
"logps/rejected": -455.1549377441406, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9620019197463989, |
|
"rewards/margins": 0.29844018816947937, |
|
"rewards/rejected": -1.2604421377182007, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -2.7060694694519043, |
|
"logits/rejected": -2.650001049041748, |
|
"logps/chosen": -501.8260803222656, |
|
"logps/rejected": -487.3619079589844, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9851962327957153, |
|
"rewards/margins": 0.33760061860084534, |
|
"rewards/rejected": -1.3227968215942383, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.125, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -2.707420825958252, |
|
"logits/rejected": -2.6578211784362793, |
|
"logps/chosen": -503.98114013671875, |
|
"logps/rejected": -489.624755859375, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.100565791130066, |
|
"rewards/margins": 0.3875434398651123, |
|
"rewards/rejected": -1.4881092309951782, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -2.6093525886535645, |
|
"logits/rejected": -2.603379726409912, |
|
"logps/chosen": -481.91998291015625, |
|
"logps/rejected": -481.68890380859375, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1177568435668945, |
|
"rewards/margins": 0.2344493865966797, |
|
"rewards/rejected": -1.3522062301635742, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -2.685995101928711, |
|
"logits/rejected": -2.6452722549438477, |
|
"logps/chosen": -505.78961181640625, |
|
"logps/rejected": -489.805419921875, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0744038820266724, |
|
"rewards/margins": 0.29028916358947754, |
|
"rewards/rejected": -1.3646929264068604, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -2.692858934402466, |
|
"logits/rejected": -2.651174306869507, |
|
"logps/chosen": -500.21099853515625, |
|
"logps/rejected": -488.8199157714844, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0547789335250854, |
|
"rewards/margins": 0.330585777759552, |
|
"rewards/rejected": -1.3853647708892822, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.6432461738586426, |
|
"eval_logits/rejected": -2.608781337738037, |
|
"eval_logps/chosen": -501.4175109863281, |
|
"eval_logps/rejected": -486.4694519042969, |
|
"eval_loss": 0.6107898950576782, |
|
"eval_rewards/accuracies": 0.6610000133514404, |
|
"eval_rewards/chosen": -1.051926612854004, |
|
"eval_rewards/margins": 0.2959325611591339, |
|
"eval_rewards/rejected": -1.3478593826293945, |
|
"eval_runtime": 347.9473, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.718, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -2.7207000255584717, |
|
"logits/rejected": -2.6985878944396973, |
|
"logps/chosen": -473.2940979003906, |
|
"logps/rejected": -458.9827575683594, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0358033180236816, |
|
"rewards/margins": 0.3086177110671997, |
|
"rewards/rejected": -1.344421148300171, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 5.0, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -2.704127311706543, |
|
"logits/rejected": -2.6785435676574707, |
|
"logps/chosen": -496.7994689941406, |
|
"logps/rejected": -474.5835876464844, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0449730157852173, |
|
"rewards/margins": 0.3369174897670746, |
|
"rewards/rejected": -1.3818905353546143, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -2.6866023540496826, |
|
"logits/rejected": -2.659691095352173, |
|
"logps/chosen": -503.9330139160156, |
|
"logps/rejected": -494.0077209472656, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1332998275756836, |
|
"rewards/margins": 0.2737593650817871, |
|
"rewards/rejected": -1.4070589542388916, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.390625, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -2.696007251739502, |
|
"logits/rejected": -2.6478171348571777, |
|
"logps/chosen": -518.2098999023438, |
|
"logps/rejected": -465.0953063964844, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.170287013053894, |
|
"rewards/margins": 0.31294775009155273, |
|
"rewards/rejected": -1.4832347631454468, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -2.61708927154541, |
|
"logits/rejected": -2.5760369300842285, |
|
"logps/chosen": -529.7711181640625, |
|
"logps/rejected": -493.506591796875, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.097019076347351, |
|
"rewards/margins": 0.28592607378959656, |
|
"rewards/rejected": -1.3829452991485596, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -2.737034320831299, |
|
"logits/rejected": -2.706390857696533, |
|
"logps/chosen": -520.4344482421875, |
|
"logps/rejected": -464.23492431640625, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.011051058769226, |
|
"rewards/margins": 0.3496933877468109, |
|
"rewards/rejected": -1.360744595527649, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.25, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -2.6961193084716797, |
|
"logits/rejected": -2.677546501159668, |
|
"logps/chosen": -491.8929748535156, |
|
"logps/rejected": -502.51959228515625, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.006574273109436, |
|
"rewards/margins": 0.23815563321113586, |
|
"rewards/rejected": -1.244729995727539, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.75, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -2.604997158050537, |
|
"logits/rejected": -2.5811421871185303, |
|
"logps/chosen": -477.0950622558594, |
|
"logps/rejected": -455.02410888671875, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0661613941192627, |
|
"rewards/margins": 0.30554550886154175, |
|
"rewards/rejected": -1.3717070817947388, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -2.6581666469573975, |
|
"logits/rejected": -2.642603874206543, |
|
"logps/chosen": -471.28521728515625, |
|
"logps/rejected": -457.45794677734375, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.046363115310669, |
|
"rewards/margins": 0.2672274708747864, |
|
"rewards/rejected": -1.3135906457901, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.125, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -2.718479633331299, |
|
"logits/rejected": -2.663071870803833, |
|
"logps/chosen": -489.6497497558594, |
|
"logps/rejected": -492.8302307128906, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0412070751190186, |
|
"rewards/margins": 0.3211270868778229, |
|
"rewards/rejected": -1.3623343706130981, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -2.647676706314087, |
|
"eval_logits/rejected": -2.6143462657928467, |
|
"eval_logps/chosen": -500.49822998046875, |
|
"eval_logps/rejected": -485.87298583984375, |
|
"eval_loss": 0.6108289957046509, |
|
"eval_rewards/accuracies": 0.6589999794960022, |
|
"eval_rewards/chosen": -1.0427342653274536, |
|
"eval_rewards/margins": 0.299160897731781, |
|
"eval_rewards/rejected": -1.3418951034545898, |
|
"eval_runtime": 347.8052, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -2.711664915084839, |
|
"logits/rejected": -2.6842312812805176, |
|
"logps/chosen": -523.2516479492188, |
|
"logps/rejected": -486.0882873535156, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0051112174987793, |
|
"rewards/margins": 0.2787768244743347, |
|
"rewards/rejected": -1.2838881015777588, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -2.6525261402130127, |
|
"logits/rejected": -2.5945048332214355, |
|
"logps/chosen": -518.8192138671875, |
|
"logps/rejected": -473.05615234375, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9474281072616577, |
|
"rewards/margins": 0.451382577419281, |
|
"rewards/rejected": -1.398810625076294, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -2.678679943084717, |
|
"logits/rejected": -2.631782054901123, |
|
"logps/chosen": -477.3982849121094, |
|
"logps/rejected": -477.5099182128906, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0620421171188354, |
|
"rewards/margins": 0.28218984603881836, |
|
"rewards/rejected": -1.3442319631576538, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -2.6359200477600098, |
|
"logits/rejected": -2.611725330352783, |
|
"logps/chosen": -475.1856994628906, |
|
"logps/rejected": -478.42254638671875, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0813992023468018, |
|
"rewards/margins": 0.27633678913116455, |
|
"rewards/rejected": -1.3577358722686768, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -2.6500914096832275, |
|
"logits/rejected": -2.6187453269958496, |
|
"logps/chosen": -469.1063537597656, |
|
"logps/rejected": -479.3811950683594, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0489628314971924, |
|
"rewards/margins": 0.3200768828392029, |
|
"rewards/rejected": -1.36903977394104, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -2.6609740257263184, |
|
"logits/rejected": -2.613036632537842, |
|
"logps/chosen": -528.0738525390625, |
|
"logps/rejected": -479.12847900390625, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0216680765151978, |
|
"rewards/margins": 0.3447554111480713, |
|
"rewards/rejected": -1.3664233684539795, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 3.875, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -2.6330864429473877, |
|
"logits/rejected": -2.6078662872314453, |
|
"logps/chosen": -480.5138244628906, |
|
"logps/rejected": -466.89813232421875, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.1947067975997925, |
|
"rewards/margins": 0.2124728262424469, |
|
"rewards/rejected": -1.4071797132492065, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -2.6624321937561035, |
|
"logits/rejected": -2.634147882461548, |
|
"logps/chosen": -490.93731689453125, |
|
"logps/rejected": -508.0350646972656, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0125614404678345, |
|
"rewards/margins": 0.31729286909103394, |
|
"rewards/rejected": -1.3298542499542236, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.25, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -2.6459295749664307, |
|
"logits/rejected": -2.632132053375244, |
|
"logps/chosen": -476.1249084472656, |
|
"logps/rejected": -480.6978454589844, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0420411825180054, |
|
"rewards/margins": 0.32264775037765503, |
|
"rewards/rejected": -1.3646891117095947, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -2.6568338871002197, |
|
"logits/rejected": -2.613276481628418, |
|
"logps/chosen": -504.14825439453125, |
|
"logps/rejected": -478.09832763671875, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0711807012557983, |
|
"rewards/margins": 0.307957261800766, |
|
"rewards/rejected": -1.3791382312774658, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -2.6304023265838623, |
|
"eval_logits/rejected": -2.5973665714263916, |
|
"eval_logps/chosen": -498.1077575683594, |
|
"eval_logps/rejected": -483.6012878417969, |
|
"eval_loss": 0.611174464225769, |
|
"eval_rewards/accuracies": 0.6545000076293945, |
|
"eval_rewards/chosen": -1.0188294649124146, |
|
"eval_rewards/margins": 0.30034834146499634, |
|
"eval_rewards/rejected": -1.3191777467727661, |
|
"eval_runtime": 347.7844, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -2.691493034362793, |
|
"logits/rejected": -2.6732335090637207, |
|
"logps/chosen": -509.51202392578125, |
|
"logps/rejected": -488.71826171875, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9540036916732788, |
|
"rewards/margins": 0.33757534623146057, |
|
"rewards/rejected": -1.2915791273117065, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -2.6027607917785645, |
|
"logits/rejected": -2.576066255569458, |
|
"logps/chosen": -490.2715759277344, |
|
"logps/rejected": -477.48992919921875, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0267969369888306, |
|
"rewards/margins": 0.3205047845840454, |
|
"rewards/rejected": -1.347301721572876, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -2.6897635459899902, |
|
"logits/rejected": -2.6353797912597656, |
|
"logps/chosen": -514.4585571289062, |
|
"logps/rejected": -483.48565673828125, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.041078805923462, |
|
"rewards/margins": 0.2664690613746643, |
|
"rewards/rejected": -1.3075478076934814, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -2.7139453887939453, |
|
"logits/rejected": -2.6701016426086426, |
|
"logps/chosen": -471.3026428222656, |
|
"logps/rejected": -466.50689697265625, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0769729614257812, |
|
"rewards/margins": 0.31179046630859375, |
|
"rewards/rejected": -1.3887633085250854, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -2.697122573852539, |
|
"logits/rejected": -2.6447901725769043, |
|
"logps/chosen": -536.4827270507812, |
|
"logps/rejected": -519.1363525390625, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9090393781661987, |
|
"rewards/margins": 0.34489864110946655, |
|
"rewards/rejected": -1.25393807888031, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -2.68558931350708, |
|
"logits/rejected": -2.6322624683380127, |
|
"logps/chosen": -508.85711669921875, |
|
"logps/rejected": -504.25762939453125, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9868243932723999, |
|
"rewards/margins": 0.371315598487854, |
|
"rewards/rejected": -1.358140230178833, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -2.6563174724578857, |
|
"logits/rejected": -2.6325182914733887, |
|
"logps/chosen": -482.51422119140625, |
|
"logps/rejected": -471.16632080078125, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.011075496673584, |
|
"rewards/margins": 0.36485710740089417, |
|
"rewards/rejected": -1.3759326934814453, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -2.649975299835205, |
|
"logits/rejected": -2.6116244792938232, |
|
"logps/chosen": -506.97186279296875, |
|
"logps/rejected": -472.00714111328125, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0412980318069458, |
|
"rewards/margins": 0.2263387143611908, |
|
"rewards/rejected": -1.2676366567611694, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -2.704833984375, |
|
"logits/rejected": -2.6896462440490723, |
|
"logps/chosen": -495.80877685546875, |
|
"logps/rejected": -516.6968994140625, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0221621990203857, |
|
"rewards/margins": 0.38270777463912964, |
|
"rewards/rejected": -1.404869794845581, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -2.675306558609009, |
|
"logits/rejected": -2.650527000427246, |
|
"logps/chosen": -492.5806579589844, |
|
"logps/rejected": -478.58258056640625, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0126721858978271, |
|
"rewards/margins": 0.3190918564796448, |
|
"rewards/rejected": -1.3317642211914062, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -2.627389669418335, |
|
"eval_logits/rejected": -2.594527244567871, |
|
"eval_logps/chosen": -504.3044738769531, |
|
"eval_logps/rejected": -490.2562255859375, |
|
"eval_loss": 0.6105741262435913, |
|
"eval_rewards/accuracies": 0.659500002861023, |
|
"eval_rewards/chosen": -1.0807968378067017, |
|
"eval_rewards/margins": 0.3049302399158478, |
|
"eval_rewards/rejected": -1.385727047920227, |
|
"eval_runtime": 347.7409, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -2.6303482055664062, |
|
"logits/rejected": -2.6323742866516113, |
|
"logps/chosen": -503.49786376953125, |
|
"logps/rejected": -513.8046875, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0185651779174805, |
|
"rewards/margins": 0.29084575176239014, |
|
"rewards/rejected": -1.3094110488891602, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -2.663734197616577, |
|
"logits/rejected": -2.6387317180633545, |
|
"logps/chosen": -552.3123779296875, |
|
"logps/rejected": -516.0528564453125, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1312639713287354, |
|
"rewards/margins": 0.2474222630262375, |
|
"rewards/rejected": -1.3786863088607788, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -2.66949725151062, |
|
"logits/rejected": -2.630190849304199, |
|
"logps/chosen": -495.2144470214844, |
|
"logps/rejected": -458.618408203125, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1124293804168701, |
|
"rewards/margins": 0.3431801199913025, |
|
"rewards/rejected": -1.4556094408035278, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.75, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -2.6718780994415283, |
|
"logits/rejected": -2.6413564682006836, |
|
"logps/chosen": -537.8629150390625, |
|
"logps/rejected": -509.77056884765625, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1508649587631226, |
|
"rewards/margins": 0.2626705765724182, |
|
"rewards/rejected": -1.4135355949401855, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -2.6759867668151855, |
|
"logits/rejected": -2.66206955909729, |
|
"logps/chosen": -496.76458740234375, |
|
"logps/rejected": -523.6871948242188, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.131340503692627, |
|
"rewards/margins": 0.26272568106651306, |
|
"rewards/rejected": -1.3940664529800415, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -2.6411399841308594, |
|
"logits/rejected": -2.598507881164551, |
|
"logps/chosen": -491.46722412109375, |
|
"logps/rejected": -468.1087951660156, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1277272701263428, |
|
"rewards/margins": 0.36656227707862854, |
|
"rewards/rejected": -1.494289517402649, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -2.6684410572052, |
|
"logits/rejected": -2.619168996810913, |
|
"logps/chosen": -516.2086181640625, |
|
"logps/rejected": -494.1477966308594, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.11739182472229, |
|
"rewards/margins": 0.31762081384658813, |
|
"rewards/rejected": -1.4350125789642334, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -2.632700204849243, |
|
"logits/rejected": -2.6073365211486816, |
|
"logps/chosen": -529.2457885742188, |
|
"logps/rejected": -526.523681640625, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0624479055404663, |
|
"rewards/margins": 0.49154725670814514, |
|
"rewards/rejected": -1.5539953708648682, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -2.6516880989074707, |
|
"logits/rejected": -2.6374242305755615, |
|
"logps/chosen": -508.30126953125, |
|
"logps/rejected": -489.03680419921875, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1453887224197388, |
|
"rewards/margins": 0.35155534744262695, |
|
"rewards/rejected": -1.4969440698623657, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -2.6636240482330322, |
|
"logits/rejected": -2.643951654434204, |
|
"logps/chosen": -511.94073486328125, |
|
"logps/rejected": -502.1498107910156, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1842275857925415, |
|
"rewards/margins": 0.28109192848205566, |
|
"rewards/rejected": -1.4653196334838867, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -2.6303441524505615, |
|
"eval_logits/rejected": -2.5978312492370605, |
|
"eval_logps/chosen": -511.7178955078125, |
|
"eval_logps/rejected": -498.03662109375, |
|
"eval_loss": 0.6096385717391968, |
|
"eval_rewards/accuracies": 0.6585000157356262, |
|
"eval_rewards/chosen": -1.1549309492111206, |
|
"eval_rewards/margins": 0.3085997700691223, |
|
"eval_rewards/rejected": -1.4635308980941772, |
|
"eval_runtime": 347.7207, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -2.677860736846924, |
|
"logits/rejected": -2.6445260047912598, |
|
"logps/chosen": -506.71746826171875, |
|
"logps/rejected": -513.6080932617188, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1613363027572632, |
|
"rewards/margins": 0.32913991808891296, |
|
"rewards/rejected": -1.490476369857788, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -2.6382384300231934, |
|
"logits/rejected": -2.5947837829589844, |
|
"logps/chosen": -486.73236083984375, |
|
"logps/rejected": -469.90087890625, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2020397186279297, |
|
"rewards/margins": 0.1584251970052719, |
|
"rewards/rejected": -1.3604648113250732, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -2.6399216651916504, |
|
"logits/rejected": -2.635921001434326, |
|
"logps/chosen": -508.64031982421875, |
|
"logps/rejected": -471.86297607421875, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1394246816635132, |
|
"rewards/margins": 0.22725781798362732, |
|
"rewards/rejected": -1.3666824102401733, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -2.6898536682128906, |
|
"logits/rejected": -2.655017137527466, |
|
"logps/chosen": -504.71441650390625, |
|
"logps/rejected": -477.46258544921875, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1481832265853882, |
|
"rewards/margins": 0.22253009676933289, |
|
"rewards/rejected": -1.370713472366333, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.125, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -2.66972017288208, |
|
"logits/rejected": -2.6421947479248047, |
|
"logps/chosen": -510.1853942871094, |
|
"logps/rejected": -497.7322692871094, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0694119930267334, |
|
"rewards/margins": 0.37935250997543335, |
|
"rewards/rejected": -1.448764681816101, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -2.651383638381958, |
|
"logits/rejected": -2.6579291820526123, |
|
"logps/chosen": -488.3523864746094, |
|
"logps/rejected": -507.35272216796875, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9926462173461914, |
|
"rewards/margins": 0.34531423449516296, |
|
"rewards/rejected": -1.3379603624343872, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.25, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -2.722695827484131, |
|
"logits/rejected": -2.719184398651123, |
|
"logps/chosen": -504.15966796875, |
|
"logps/rejected": -503.24078369140625, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9827505350112915, |
|
"rewards/margins": 0.25871509313583374, |
|
"rewards/rejected": -1.2414658069610596, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.25, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -2.6997554302215576, |
|
"logits/rejected": -2.6661736965179443, |
|
"logps/chosen": -492.560791015625, |
|
"logps/rejected": -473.57403564453125, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.149613618850708, |
|
"rewards/margins": 0.23957280814647675, |
|
"rewards/rejected": -1.3891866207122803, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -2.7065916061401367, |
|
"logits/rejected": -2.6797971725463867, |
|
"logps/chosen": -494.64764404296875, |
|
"logps/rejected": -473.16558837890625, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1044108867645264, |
|
"rewards/margins": 0.31540459394454956, |
|
"rewards/rejected": -1.4198153018951416, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -2.6506381034851074, |
|
"logits/rejected": -2.6390433311462402, |
|
"logps/chosen": -493.5978088378906, |
|
"logps/rejected": -498.072509765625, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0802371501922607, |
|
"rewards/margins": 0.29849973320961, |
|
"rewards/rejected": -1.378736972808838, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -2.649965763092041, |
|
"eval_logits/rejected": -2.6174795627593994, |
|
"eval_logps/chosen": -501.72564697265625, |
|
"eval_logps/rejected": -486.77386474609375, |
|
"eval_loss": 0.6097070574760437, |
|
"eval_rewards/accuracies": 0.6585000157356262, |
|
"eval_rewards/chosen": -1.0550086498260498, |
|
"eval_rewards/margins": 0.2958948612213135, |
|
"eval_rewards/rejected": -1.3509035110473633, |
|
"eval_runtime": 348.4416, |
|
"eval_samples_per_second": 5.74, |
|
"eval_steps_per_second": 0.717, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.0, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -2.640998363494873, |
|
"logits/rejected": -2.595895290374756, |
|
"logps/chosen": -470.3514709472656, |
|
"logps/rejected": -444.5486755371094, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.065406084060669, |
|
"rewards/margins": 0.3272332549095154, |
|
"rewards/rejected": -1.3926395177841187, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -2.6588757038116455, |
|
"logits/rejected": -2.636864423751831, |
|
"logps/chosen": -520.3292846679688, |
|
"logps/rejected": -518.36328125, |
|
"loss": 0.6253, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0216096639633179, |
|
"rewards/margins": 0.25970107316970825, |
|
"rewards/rejected": -1.281310796737671, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -2.7186899185180664, |
|
"logits/rejected": -2.671178102493286, |
|
"logps/chosen": -507.38653564453125, |
|
"logps/rejected": -495.093994140625, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.029103398323059, |
|
"rewards/margins": 0.2967410683631897, |
|
"rewards/rejected": -1.325844407081604, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -2.6119322776794434, |
|
"logits/rejected": -2.5850017070770264, |
|
"logps/chosen": -498.72705078125, |
|
"logps/rejected": -450.4840393066406, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.052886724472046, |
|
"rewards/margins": 0.27135077118873596, |
|
"rewards/rejected": -1.32423734664917, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -2.6534907817840576, |
|
"logits/rejected": -2.6120922565460205, |
|
"logps/chosen": -490.2318420410156, |
|
"logps/rejected": -474.75811767578125, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0758155584335327, |
|
"rewards/margins": 0.31523874402046204, |
|
"rewards/rejected": -1.391054391860962, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.75, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -2.648266315460205, |
|
"logits/rejected": -2.6395068168640137, |
|
"logps/chosen": -505.58984375, |
|
"logps/rejected": -491.96356201171875, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0662881135940552, |
|
"rewards/margins": 0.33815911412239075, |
|
"rewards/rejected": -1.404447317123413, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -2.666665554046631, |
|
"logits/rejected": -2.6183128356933594, |
|
"logps/chosen": -520.7285766601562, |
|
"logps/rejected": -519.2037963867188, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.031095266342163, |
|
"rewards/margins": 0.3654022812843323, |
|
"rewards/rejected": -1.3964974880218506, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -2.6764984130859375, |
|
"logits/rejected": -2.6465909481048584, |
|
"logps/chosen": -487.6134338378906, |
|
"logps/rejected": -479.92742919921875, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.068189263343811, |
|
"rewards/margins": 0.2995051443576813, |
|
"rewards/rejected": -1.36769437789917, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -2.706010580062866, |
|
"logits/rejected": -2.6768994331359863, |
|
"logps/chosen": -467.7933044433594, |
|
"logps/rejected": -459.462890625, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1356656551361084, |
|
"rewards/margins": 0.3141850531101227, |
|
"rewards/rejected": -1.4498507976531982, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -2.645399570465088, |
|
"logits/rejected": -2.6416878700256348, |
|
"logps/chosen": -460.01348876953125, |
|
"logps/rejected": -490.2184143066406, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0389206409454346, |
|
"rewards/margins": 0.35546866059303284, |
|
"rewards/rejected": -1.3943893909454346, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -2.6419765949249268, |
|
"eval_logits/rejected": -2.608949661254883, |
|
"eval_logps/chosen": -506.4727478027344, |
|
"eval_logps/rejected": -492.1650085449219, |
|
"eval_loss": 0.6090958714485168, |
|
"eval_rewards/accuracies": 0.6570000052452087, |
|
"eval_rewards/chosen": -1.1024789810180664, |
|
"eval_rewards/margins": 0.3023359179496765, |
|
"eval_rewards/rejected": -1.4048149585723877, |
|
"eval_runtime": 347.5981, |
|
"eval_samples_per_second": 5.754, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -2.6427597999572754, |
|
"logits/rejected": -2.6378586292266846, |
|
"logps/chosen": -508.255615234375, |
|
"logps/rejected": -498.01300048828125, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.076911211013794, |
|
"rewards/margins": 0.3045389652252197, |
|
"rewards/rejected": -1.3814500570297241, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -2.635432720184326, |
|
"logits/rejected": -2.580549716949463, |
|
"logps/chosen": -463.28411865234375, |
|
"logps/rejected": -442.45684814453125, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.119048833847046, |
|
"rewards/margins": 0.3251452147960663, |
|
"rewards/rejected": -1.444193959236145, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -2.6761887073516846, |
|
"logits/rejected": -2.6448588371276855, |
|
"logps/chosen": -470.81353759765625, |
|
"logps/rejected": -462.959228515625, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1331965923309326, |
|
"rewards/margins": 0.2631588578224182, |
|
"rewards/rejected": -1.3963555097579956, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -2.747220754623413, |
|
"logits/rejected": -2.6560819149017334, |
|
"logps/chosen": -522.0474853515625, |
|
"logps/rejected": -451.25836181640625, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0959001779556274, |
|
"rewards/margins": 0.3455473482608795, |
|
"rewards/rejected": -1.441447377204895, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -2.619112730026245, |
|
"logits/rejected": -2.6229631900787354, |
|
"logps/chosen": -463.21539306640625, |
|
"logps/rejected": -479.87957763671875, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0741875171661377, |
|
"rewards/margins": 0.34632328152656555, |
|
"rewards/rejected": -1.4205108880996704, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -2.6366074085235596, |
|
"logits/rejected": -2.636824131011963, |
|
"logps/chosen": -507.3749084472656, |
|
"logps/rejected": -560.8580322265625, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0962883234024048, |
|
"rewards/margins": 0.34130847454071045, |
|
"rewards/rejected": -1.4375969171524048, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -2.7327983379364014, |
|
"logits/rejected": -2.7102444171905518, |
|
"logps/chosen": -523.8292846679688, |
|
"logps/rejected": -506.67742919921875, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0699433088302612, |
|
"rewards/margins": 0.2572742998600006, |
|
"rewards/rejected": -1.3272178173065186, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.625, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -2.7022974491119385, |
|
"logits/rejected": -2.6688647270202637, |
|
"logps/chosen": -505.4044494628906, |
|
"logps/rejected": -490.70867919921875, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1016364097595215, |
|
"rewards/margins": 0.34242209792137146, |
|
"rewards/rejected": -1.4440586566925049, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -2.707888126373291, |
|
"logits/rejected": -2.647016763687134, |
|
"logps/chosen": -517.645751953125, |
|
"logps/rejected": -478.8772888183594, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.051059603691101, |
|
"rewards/margins": 0.3384644389152527, |
|
"rewards/rejected": -1.389524221420288, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -2.671968936920166, |
|
"logits/rejected": -2.647467851638794, |
|
"logps/chosen": -513.36669921875, |
|
"logps/rejected": -501.7236328125, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9971181154251099, |
|
"rewards/margins": 0.36711040139198303, |
|
"rewards/rejected": -1.3642284870147705, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -2.633734703063965, |
|
"eval_logits/rejected": -2.600095272064209, |
|
"eval_logps/chosen": -505.9959716796875, |
|
"eval_logps/rejected": -491.744384765625, |
|
"eval_loss": 0.6088695526123047, |
|
"eval_rewards/accuracies": 0.659500002861023, |
|
"eval_rewards/chosen": -1.0977121591567993, |
|
"eval_rewards/margins": 0.3028964698314667, |
|
"eval_rewards/rejected": -1.4006085395812988, |
|
"eval_runtime": 347.6519, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -2.7147414684295654, |
|
"logits/rejected": -2.6615347862243652, |
|
"logps/chosen": -522.6924438476562, |
|
"logps/rejected": -512.82373046875, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9803631901741028, |
|
"rewards/margins": 0.28811120986938477, |
|
"rewards/rejected": -1.2684743404388428, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -2.671032190322876, |
|
"logits/rejected": -2.633485794067383, |
|
"logps/chosen": -516.436767578125, |
|
"logps/rejected": -491.43963623046875, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0946416854858398, |
|
"rewards/margins": 0.33089718222618103, |
|
"rewards/rejected": -1.4255390167236328, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -2.63783860206604, |
|
"logits/rejected": -2.608571767807007, |
|
"logps/chosen": -481.3115234375, |
|
"logps/rejected": -481.80499267578125, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1024813652038574, |
|
"rewards/margins": 0.2848733067512512, |
|
"rewards/rejected": -1.3873546123504639, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.75, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -2.6718032360076904, |
|
"logits/rejected": -2.6056950092315674, |
|
"logps/chosen": -484.45489501953125, |
|
"logps/rejected": -480.09619140625, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1476237773895264, |
|
"rewards/margins": 0.28761929273605347, |
|
"rewards/rejected": -1.4352428913116455, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -2.72918438911438, |
|
"logits/rejected": -2.6762828826904297, |
|
"logps/chosen": -508.759521484375, |
|
"logps/rejected": -497.8362731933594, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1187124252319336, |
|
"rewards/margins": 0.27222010493278503, |
|
"rewards/rejected": -1.3909324407577515, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -2.6751275062561035, |
|
"logits/rejected": -2.640679121017456, |
|
"logps/chosen": -520.294921875, |
|
"logps/rejected": -521.7930908203125, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0745341777801514, |
|
"rewards/margins": 0.33133482933044434, |
|
"rewards/rejected": -1.4058691263198853, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -2.6148898601531982, |
|
"logits/rejected": -2.5540575981140137, |
|
"logps/chosen": -503.8189392089844, |
|
"logps/rejected": -468.48468017578125, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0721933841705322, |
|
"rewards/margins": 0.2973105311393738, |
|
"rewards/rejected": -1.3695039749145508, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -2.6700875759124756, |
|
"logits/rejected": -2.6666862964630127, |
|
"logps/chosen": -451.3338928222656, |
|
"logps/rejected": -494.00537109375, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0849757194519043, |
|
"rewards/margins": 0.3115970492362976, |
|
"rewards/rejected": -1.3965727090835571, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -2.6685242652893066, |
|
"logits/rejected": -2.658634662628174, |
|
"logps/chosen": -498.36871337890625, |
|
"logps/rejected": -510.15118408203125, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0659822225570679, |
|
"rewards/margins": 0.3512209951877594, |
|
"rewards/rejected": -1.4172031879425049, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -2.637341022491455, |
|
"logits/rejected": -2.636918544769287, |
|
"logps/chosen": -501.9124450683594, |
|
"logps/rejected": -526.0079345703125, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0901362895965576, |
|
"rewards/margins": 0.30762726068496704, |
|
"rewards/rejected": -1.3977636098861694, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.6388015747070312, |
|
"eval_logits/rejected": -2.605605363845825, |
|
"eval_logps/chosen": -506.04547119140625, |
|
"eval_logps/rejected": -491.9724426269531, |
|
"eval_loss": 0.6086438894271851, |
|
"eval_rewards/accuracies": 0.6604999899864197, |
|
"eval_rewards/chosen": -1.0982069969177246, |
|
"eval_rewards/margins": 0.3046818971633911, |
|
"eval_rewards/rejected": -1.4028888940811157, |
|
"eval_runtime": 347.8304, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -2.6419506072998047, |
|
"logits/rejected": -2.6541552543640137, |
|
"logps/chosen": -491.3660583496094, |
|
"logps/rejected": -470.6940002441406, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.115142583847046, |
|
"rewards/margins": 0.18211853504180908, |
|
"rewards/rejected": -1.2972612380981445, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -2.672940731048584, |
|
"logits/rejected": -2.6646289825439453, |
|
"logps/chosen": -534.7271728515625, |
|
"logps/rejected": -501.14385986328125, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0392810106277466, |
|
"rewards/margins": 0.2989768981933594, |
|
"rewards/rejected": -1.3382577896118164, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -2.627161979675293, |
|
"logits/rejected": -2.624307155609131, |
|
"logps/chosen": -498.4679260253906, |
|
"logps/rejected": -492.9081115722656, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.0917952060699463, |
|
"rewards/margins": 0.18084125220775604, |
|
"rewards/rejected": -1.2726365327835083, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.125, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -2.7235939502716064, |
|
"logits/rejected": -2.68570613861084, |
|
"logps/chosen": -511.3997497558594, |
|
"logps/rejected": -466.37591552734375, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0969817638397217, |
|
"rewards/margins": 0.2552695572376251, |
|
"rewards/rejected": -1.3522512912750244, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -2.7106406688690186, |
|
"logits/rejected": -2.665052890777588, |
|
"logps/chosen": -538.0487060546875, |
|
"logps/rejected": -513.3031005859375, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.014005422592163, |
|
"rewards/margins": 0.33192679286003113, |
|
"rewards/rejected": -1.3459322452545166, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -2.6548619270324707, |
|
"logits/rejected": -2.6132593154907227, |
|
"logps/chosen": -503.3099060058594, |
|
"logps/rejected": -498.5037536621094, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1849998235702515, |
|
"rewards/margins": 0.2957174777984619, |
|
"rewards/rejected": -1.480717420578003, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -2.6821701526641846, |
|
"logits/rejected": -2.667227268218994, |
|
"logps/chosen": -484.52783203125, |
|
"logps/rejected": -481.2435607910156, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0785605907440186, |
|
"rewards/margins": 0.2731327414512634, |
|
"rewards/rejected": -1.3516933917999268, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.125, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -2.6534295082092285, |
|
"logits/rejected": -2.655452013015747, |
|
"logps/chosen": -510.244384765625, |
|
"logps/rejected": -527.35546875, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.032504677772522, |
|
"rewards/margins": 0.2230747640132904, |
|
"rewards/rejected": -1.2555794715881348, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -2.6963908672332764, |
|
"logits/rejected": -2.7045657634735107, |
|
"logps/chosen": -497.63653564453125, |
|
"logps/rejected": -511.78851318359375, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0954744815826416, |
|
"rewards/margins": 0.259405255317688, |
|
"rewards/rejected": -1.3548799753189087, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.25, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -2.6455070972442627, |
|
"logits/rejected": -2.602128505706787, |
|
"logps/chosen": -493.291259765625, |
|
"logps/rejected": -495.4798889160156, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0171977281570435, |
|
"rewards/margins": 0.32562780380249023, |
|
"rewards/rejected": -1.3428254127502441, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -2.64424991607666, |
|
"eval_logits/rejected": -2.611661911010742, |
|
"eval_logps/chosen": -504.757080078125, |
|
"eval_logps/rejected": -490.4914855957031, |
|
"eval_loss": 0.6086958050727844, |
|
"eval_rewards/accuracies": 0.6610000133514404, |
|
"eval_rewards/chosen": -1.0853232145309448, |
|
"eval_rewards/margins": 0.30275672674179077, |
|
"eval_rewards/rejected": -1.3880800008773804, |
|
"eval_runtime": 347.7972, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -2.68962025642395, |
|
"logits/rejected": -2.662564754486084, |
|
"logps/chosen": -457.9961853027344, |
|
"logps/rejected": -422.3614807128906, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0693327188491821, |
|
"rewards/margins": 0.3496701717376709, |
|
"rewards/rejected": -1.4190027713775635, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -2.6620020866394043, |
|
"logits/rejected": -2.629638433456421, |
|
"logps/chosen": -499.93896484375, |
|
"logps/rejected": -487.2354431152344, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.130860686302185, |
|
"rewards/margins": 0.27780821919441223, |
|
"rewards/rejected": -1.408668875694275, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -2.7372817993164062, |
|
"logits/rejected": -2.6844122409820557, |
|
"logps/chosen": -516.2169189453125, |
|
"logps/rejected": -501.70721435546875, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0415713787078857, |
|
"rewards/margins": 0.37831583619117737, |
|
"rewards/rejected": -1.4198873043060303, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 4.125, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -2.6761159896850586, |
|
"logits/rejected": -2.651787281036377, |
|
"logps/chosen": -508.19891357421875, |
|
"logps/rejected": -515.2844848632812, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1085197925567627, |
|
"rewards/margins": 0.3225208520889282, |
|
"rewards/rejected": -1.4310405254364014, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.390625, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -2.6180145740509033, |
|
"logits/rejected": -2.5854008197784424, |
|
"logps/chosen": -513.4478759765625, |
|
"logps/rejected": -516.1890869140625, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0392944812774658, |
|
"rewards/margins": 0.3910349905490875, |
|
"rewards/rejected": -1.4303295612335205, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -2.6438541412353516, |
|
"logits/rejected": -2.5801243782043457, |
|
"logps/chosen": -512.1270751953125, |
|
"logps/rejected": -525.7010498046875, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0036083459854126, |
|
"rewards/margins": 0.4596267640590668, |
|
"rewards/rejected": -1.4632351398468018, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -2.637359380722046, |
|
"logits/rejected": -2.609163761138916, |
|
"logps/chosen": -479.6756896972656, |
|
"logps/rejected": -470.44158935546875, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1648880243301392, |
|
"rewards/margins": 0.31521743535995483, |
|
"rewards/rejected": -1.4801056385040283, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 4.25, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -2.655611276626587, |
|
"logits/rejected": -2.6167244911193848, |
|
"logps/chosen": -495.48779296875, |
|
"logps/rejected": -477.42877197265625, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.13040292263031, |
|
"rewards/margins": 0.2747969925403595, |
|
"rewards/rejected": -1.4052000045776367, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -2.6717348098754883, |
|
"logits/rejected": -2.594569683074951, |
|
"logps/chosen": -509.77593994140625, |
|
"logps/rejected": -469.570068359375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0561670064926147, |
|
"rewards/margins": 0.38789016008377075, |
|
"rewards/rejected": -1.4440572261810303, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -2.6890311241149902, |
|
"logits/rejected": -2.6426968574523926, |
|
"logps/chosen": -555.2242431640625, |
|
"logps/rejected": -503.68292236328125, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9910923838615417, |
|
"rewards/margins": 0.3401171565055847, |
|
"rewards/rejected": -1.331209421157837, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -2.6360013484954834, |
|
"eval_logits/rejected": -2.602590560913086, |
|
"eval_logps/chosen": -505.1947021484375, |
|
"eval_logps/rejected": -490.9886779785156, |
|
"eval_loss": 0.6087493300437927, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": -1.0896990299224854, |
|
"eval_rewards/margins": 0.3033522665500641, |
|
"eval_rewards/rejected": -1.393051266670227, |
|
"eval_runtime": 347.6769, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -2.651811122894287, |
|
"logits/rejected": -2.603327512741089, |
|
"logps/chosen": -529.9959106445312, |
|
"logps/rejected": -487.5215759277344, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1013660430908203, |
|
"rewards/margins": 0.261643648147583, |
|
"rewards/rejected": -1.3630096912384033, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -2.723508596420288, |
|
"logits/rejected": -2.6757960319519043, |
|
"logps/chosen": -535.6593627929688, |
|
"logps/rejected": -493.79840087890625, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.080249309539795, |
|
"rewards/margins": 0.29932349920272827, |
|
"rewards/rejected": -1.379572868347168, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -2.687269687652588, |
|
"logits/rejected": -2.629744529724121, |
|
"logps/chosen": -520.5648193359375, |
|
"logps/rejected": -503.39532470703125, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0786540508270264, |
|
"rewards/margins": 0.2842092216014862, |
|
"rewards/rejected": -1.362863302230835, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -2.7056689262390137, |
|
"logits/rejected": -2.6712379455566406, |
|
"logps/chosen": -483.80413818359375, |
|
"logps/rejected": -458.0314025878906, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0653079748153687, |
|
"rewards/margins": 0.31929486989974976, |
|
"rewards/rejected": -1.3846029043197632, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.9375, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -2.697467803955078, |
|
"logits/rejected": -2.6769003868103027, |
|
"logps/chosen": -504.36572265625, |
|
"logps/rejected": -491.1905212402344, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0281347036361694, |
|
"rewards/margins": 0.3227378726005554, |
|
"rewards/rejected": -1.3508726358413696, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -2.667304039001465, |
|
"logits/rejected": -2.652622938156128, |
|
"logps/chosen": -477.559326171875, |
|
"logps/rejected": -501.595703125, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0702049732208252, |
|
"rewards/margins": 0.3717937469482422, |
|
"rewards/rejected": -1.4419987201690674, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -2.677260398864746, |
|
"logits/rejected": -2.619901180267334, |
|
"logps/chosen": -533.4788818359375, |
|
"logps/rejected": -490.34552001953125, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0250308513641357, |
|
"rewards/margins": 0.38970544934272766, |
|
"rewards/rejected": -1.4147361516952515, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -2.7128968238830566, |
|
"logits/rejected": -2.679882764816284, |
|
"logps/chosen": -489.0921325683594, |
|
"logps/rejected": -481.76470947265625, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1597046852111816, |
|
"rewards/margins": 0.2149733006954193, |
|
"rewards/rejected": -1.3746780157089233, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -2.703012466430664, |
|
"logits/rejected": -2.672036647796631, |
|
"logps/chosen": -522.70849609375, |
|
"logps/rejected": -502.2613830566406, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0457698106765747, |
|
"rewards/margins": 0.35509949922561646, |
|
"rewards/rejected": -1.4008692502975464, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -2.6770823001861572, |
|
"logits/rejected": -2.624206066131592, |
|
"logps/chosen": -506.50311279296875, |
|
"logps/rejected": -476.0753479003906, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.052063226699829, |
|
"rewards/margins": 0.32971978187561035, |
|
"rewards/rejected": -1.381783127784729, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -2.6460154056549072, |
|
"eval_logits/rejected": -2.613610029220581, |
|
"eval_logps/chosen": -505.44384765625, |
|
"eval_logps/rejected": -491.3070068359375, |
|
"eval_loss": 0.6085324883460999, |
|
"eval_rewards/accuracies": 0.659500002861023, |
|
"eval_rewards/chosen": -1.0921905040740967, |
|
"eval_rewards/margins": 0.30404436588287354, |
|
"eval_rewards/rejected": -1.3962348699569702, |
|
"eval_runtime": 347.657, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -2.6578614711761475, |
|
"logits/rejected": -2.6162824630737305, |
|
"logps/chosen": -483.9580993652344, |
|
"logps/rejected": -489.7320251464844, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0210082530975342, |
|
"rewards/margins": 0.37885022163391113, |
|
"rewards/rejected": -1.3998584747314453, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -2.7051877975463867, |
|
"logits/rejected": -2.6907284259796143, |
|
"logps/chosen": -506.522216796875, |
|
"logps/rejected": -518.1729736328125, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9934719204902649, |
|
"rewards/margins": 0.3790653347969055, |
|
"rewards/rejected": -1.3725372552871704, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -2.62373948097229, |
|
"logits/rejected": -2.6049044132232666, |
|
"logps/chosen": -467.76312255859375, |
|
"logps/rejected": -479.63446044921875, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.143489122390747, |
|
"rewards/margins": 0.27841717004776, |
|
"rewards/rejected": -1.4219063520431519, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 4.625, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -2.706756114959717, |
|
"logits/rejected": -2.6707985401153564, |
|
"logps/chosen": -539.0672607421875, |
|
"logps/rejected": -535.8557739257812, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0309526920318604, |
|
"rewards/margins": 0.33775442838668823, |
|
"rewards/rejected": -1.3687069416046143, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -2.6710562705993652, |
|
"logits/rejected": -2.6908836364746094, |
|
"logps/chosen": -492.14251708984375, |
|
"logps/rejected": -494.04534912109375, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0950779914855957, |
|
"rewards/margins": 0.22633162140846252, |
|
"rewards/rejected": -1.3214095830917358, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -2.6932473182678223, |
|
"logits/rejected": -2.676130771636963, |
|
"logps/chosen": -497.710693359375, |
|
"logps/rejected": -469.8008728027344, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0803611278533936, |
|
"rewards/margins": 0.3779391050338745, |
|
"rewards/rejected": -1.458300232887268, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -2.695744752883911, |
|
"logits/rejected": -2.657261610031128, |
|
"logps/chosen": -534.2222900390625, |
|
"logps/rejected": -514.4498291015625, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9726902842521667, |
|
"rewards/margins": 0.33556845784187317, |
|
"rewards/rejected": -1.3082587718963623, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -2.645259380340576, |
|
"logits/rejected": -2.626682758331299, |
|
"logps/chosen": -505.6795959472656, |
|
"logps/rejected": -522.542724609375, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0380734205245972, |
|
"rewards/margins": 0.3490239083766937, |
|
"rewards/rejected": -1.3870973587036133, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -2.6519410610198975, |
|
"logits/rejected": -2.6350717544555664, |
|
"logps/chosen": -507.71734619140625, |
|
"logps/rejected": -494.5531311035156, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1069073677062988, |
|
"rewards/margins": 0.278178870677948, |
|
"rewards/rejected": -1.3850862979888916, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.375, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -2.623077869415283, |
|
"logits/rejected": -2.615199565887451, |
|
"logps/chosen": -456.0958557128906, |
|
"logps/rejected": -476.017578125, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1336462497711182, |
|
"rewards/margins": 0.26231056451797485, |
|
"rewards/rejected": -1.3959566354751587, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -2.639690399169922, |
|
"eval_logits/rejected": -2.6065900325775146, |
|
"eval_logps/chosen": -505.278076171875, |
|
"eval_logps/rejected": -491.1413269042969, |
|
"eval_loss": 0.6085542440414429, |
|
"eval_rewards/accuracies": 0.659500002861023, |
|
"eval_rewards/chosen": -1.0905324220657349, |
|
"eval_rewards/margins": 0.30404558777809143, |
|
"eval_rewards/rejected": -1.394577980041504, |
|
"eval_runtime": 347.9164, |
|
"eval_samples_per_second": 5.749, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -2.6383109092712402, |
|
"logits/rejected": -2.6063365936279297, |
|
"logps/chosen": -466.67974853515625, |
|
"logps/rejected": -459.43292236328125, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0989251136779785, |
|
"rewards/margins": 0.2716377377510071, |
|
"rewards/rejected": -1.3705627918243408, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -2.677117347717285, |
|
"logits/rejected": -2.641345500946045, |
|
"logps/chosen": -486.0668029785156, |
|
"logps/rejected": -461.42364501953125, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1190522909164429, |
|
"rewards/margins": 0.26842787861824036, |
|
"rewards/rejected": -1.3874801397323608, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -2.729708433151245, |
|
"logits/rejected": -2.699693202972412, |
|
"logps/chosen": -496.13580322265625, |
|
"logps/rejected": -466.28607177734375, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.098503589630127, |
|
"rewards/margins": 0.29642829298973083, |
|
"rewards/rejected": -1.3949320316314697, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -2.66868257522583, |
|
"logits/rejected": -2.611743450164795, |
|
"logps/chosen": -512.9805908203125, |
|
"logps/rejected": -457.05517578125, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0667917728424072, |
|
"rewards/margins": 0.3067484498023987, |
|
"rewards/rejected": -1.3735402822494507, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -2.7076990604400635, |
|
"logits/rejected": -2.6731173992156982, |
|
"logps/chosen": -483.63604736328125, |
|
"logps/rejected": -467.524658203125, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0306214094161987, |
|
"rewards/margins": 0.3499363660812378, |
|
"rewards/rejected": -1.3805577754974365, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -2.664285182952881, |
|
"logits/rejected": -2.6678249835968018, |
|
"logps/chosen": -514.0250244140625, |
|
"logps/rejected": -520.1316528320312, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1207656860351562, |
|
"rewards/margins": 0.2250840663909912, |
|
"rewards/rejected": -1.3458497524261475, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -2.7375500202178955, |
|
"logits/rejected": -2.689077377319336, |
|
"logps/chosen": -519.54150390625, |
|
"logps/rejected": -497.95574951171875, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1362625360488892, |
|
"rewards/margins": 0.3190504312515259, |
|
"rewards/rejected": -1.455312967300415, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -2.6551547050476074, |
|
"logits/rejected": -2.6728549003601074, |
|
"logps/chosen": -494.8778381347656, |
|
"logps/rejected": -511.70086669921875, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0726226568222046, |
|
"rewards/margins": 0.3504863679409027, |
|
"rewards/rejected": -1.4231090545654297, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 3.5, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -2.6753056049346924, |
|
"logits/rejected": -2.628095865249634, |
|
"logps/chosen": -563.8702392578125, |
|
"logps/rejected": -515.8682861328125, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9584245681762695, |
|
"rewards/margins": 0.3854634165763855, |
|
"rewards/rejected": -1.3438880443572998, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.125, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -2.586505174636841, |
|
"logits/rejected": -2.5706770420074463, |
|
"logps/chosen": -538.0152587890625, |
|
"logps/rejected": -521.6544189453125, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0640016794204712, |
|
"rewards/margins": 0.30589136481285095, |
|
"rewards/rejected": -1.369892954826355, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -2.6422648429870605, |
|
"eval_logits/rejected": -2.609360694885254, |
|
"eval_logps/chosen": -505.29425048828125, |
|
"eval_logps/rejected": -491.1404724121094, |
|
"eval_loss": 0.6086028218269348, |
|
"eval_rewards/accuracies": 0.6549999713897705, |
|
"eval_rewards/chosen": -1.0906946659088135, |
|
"eval_rewards/margins": 0.30387499928474426, |
|
"eval_rewards/rejected": -1.394569754600525, |
|
"eval_runtime": 347.9396, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.719, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -2.6033217906951904, |
|
"logits/rejected": -2.595217227935791, |
|
"logps/chosen": -475.32647705078125, |
|
"logps/rejected": -481.47039794921875, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0801260471343994, |
|
"rewards/margins": 0.30023181438446045, |
|
"rewards/rejected": -1.3803579807281494, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -2.6388094425201416, |
|
"logits/rejected": -2.5794878005981445, |
|
"logps/chosen": -490.6612854003906, |
|
"logps/rejected": -458.67999267578125, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1297202110290527, |
|
"rewards/margins": 0.3556092083454132, |
|
"rewards/rejected": -1.4853293895721436, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -2.6429569721221924, |
|
"logits/rejected": -2.6291909217834473, |
|
"logps/chosen": -483.68841552734375, |
|
"logps/rejected": -469.73638916015625, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.080833077430725, |
|
"rewards/margins": 0.29357820749282837, |
|
"rewards/rejected": -1.3744113445281982, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -2.6733694076538086, |
|
"logits/rejected": -2.645113468170166, |
|
"logps/chosen": -516.4934692382812, |
|
"logps/rejected": -511.3959045410156, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0565123558044434, |
|
"rewards/margins": 0.41344934701919556, |
|
"rewards/rejected": -1.4699615240097046, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -2.6986021995544434, |
|
"logits/rejected": -2.662108898162842, |
|
"logps/chosen": -497.4439392089844, |
|
"logps/rejected": -491.6142578125, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0045363903045654, |
|
"rewards/margins": 0.381572961807251, |
|
"rewards/rejected": -1.3861093521118164, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -2.6970138549804688, |
|
"logits/rejected": -2.6791810989379883, |
|
"logps/chosen": -501.97576904296875, |
|
"logps/rejected": -475.7244567871094, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0755218267440796, |
|
"rewards/margins": 0.2614571452140808, |
|
"rewards/rejected": -1.3369790315628052, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -2.6607155799865723, |
|
"logits/rejected": -2.617769241333008, |
|
"logps/chosen": -522.4849853515625, |
|
"logps/rejected": -511.22186279296875, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0117288827896118, |
|
"rewards/margins": 0.3699984848499298, |
|
"rewards/rejected": -1.3817272186279297, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -2.6623787879943848, |
|
"logits/rejected": -2.649315357208252, |
|
"logps/chosen": -489.23443603515625, |
|
"logps/rejected": -522.0306396484375, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0721361637115479, |
|
"rewards/margins": 0.22307145595550537, |
|
"rewards/rejected": -1.2952076196670532, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.84375, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -2.676607608795166, |
|
"logits/rejected": -2.614905595779419, |
|
"logps/chosen": -527.0256958007812, |
|
"logps/rejected": -511.72943115234375, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1001697778701782, |
|
"rewards/margins": 0.33944058418273926, |
|
"rewards/rejected": -1.4396103620529175, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -2.7049756050109863, |
|
"logits/rejected": -2.668239116668701, |
|
"logps/chosen": -520.1328125, |
|
"logps/rejected": -504.3516540527344, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.012479305267334, |
|
"rewards/margins": 0.34507402777671814, |
|
"rewards/rejected": -1.357553243637085, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -2.642503023147583, |
|
"eval_logits/rejected": -2.609644889831543, |
|
"eval_logps/chosen": -504.9806823730469, |
|
"eval_logps/rejected": -490.8211364746094, |
|
"eval_loss": 0.6085299253463745, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": -1.0875595808029175, |
|
"eval_rewards/margins": 0.3038162589073181, |
|
"eval_rewards/rejected": -1.3913757801055908, |
|
"eval_runtime": 347.9755, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.718, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -2.6654155254364014, |
|
"logits/rejected": -2.625561475753784, |
|
"logps/chosen": -482.1392517089844, |
|
"logps/rejected": -486.5087890625, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0852601528167725, |
|
"rewards/margins": 0.3787585496902466, |
|
"rewards/rejected": -1.464018702507019, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -2.6726207733154297, |
|
"logits/rejected": -2.636209487915039, |
|
"logps/chosen": -527.1629638671875, |
|
"logps/rejected": -491.42041015625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.100174069404602, |
|
"rewards/margins": 0.3280791640281677, |
|
"rewards/rejected": -1.428253173828125, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6288731582999011, |
|
"train_runtime": 37165.2285, |
|
"train_samples_per_second": 1.645, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|