k-r-l's picture
Training in progress, step 49, checkpoint
dbd62f8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990442816183498,
"eval_steps": 500,
"global_step": 49,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02038865880853775,
"grad_norm": 5.981678485870361,
"learning_rate": 1e-05,
"log_odds_chosen": -0.09542512148618698,
"log_odds_ratio": -0.7891254425048828,
"logits/chosen": -25.510120391845703,
"logits/rejected": -25.670942306518555,
"logps/chosen": -7.750019073486328,
"logps/rejected": -7.654723167419434,
"loss": 8.3254,
"nll_loss": 8.246482849121094,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.7750019431114197,
"rewards/margins": -0.009529606439173222,
"rewards/rejected": -0.7654722929000854,
"step": 1
},
{
"epoch": 0.0407773176170755,
"grad_norm": 5.836461067199707,
"learning_rate": 2e-05,
"log_odds_chosen": -0.12119242548942566,
"log_odds_ratio": -0.7900717854499817,
"logits/chosen": -25.202072143554688,
"logits/rejected": -25.530351638793945,
"logps/chosen": -7.662315368652344,
"logps/rejected": -7.541194915771484,
"loss": 8.2533,
"nll_loss": 8.17427921295166,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.7662315964698792,
"rewards/margins": -0.012112049385905266,
"rewards/rejected": -0.7541195750236511,
"step": 2
},
{
"epoch": 0.06116597642561325,
"grad_norm": 5.690365314483643,
"learning_rate": 3e-05,
"log_odds_chosen": -0.10295607149600983,
"log_odds_ratio": -0.7882184982299805,
"logits/chosen": -25.582046508789062,
"logits/rejected": -25.795223236083984,
"logps/chosen": -7.466374397277832,
"logps/rejected": -7.363470554351807,
"loss": 8.0091,
"nll_loss": 7.930233001708984,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.7466374635696411,
"rewards/margins": -0.01029033400118351,
"rewards/rejected": -0.7363470792770386,
"step": 3
},
{
"epoch": 0.081554635234151,
"grad_norm": 5.972021102905273,
"learning_rate": 4e-05,
"log_odds_chosen": -0.13920506834983826,
"log_odds_ratio": -0.8114263415336609,
"logits/chosen": -25.581079483032227,
"logits/rejected": -25.690841674804688,
"logps/chosen": -7.703052520751953,
"logps/rejected": -7.563896656036377,
"loss": 8.2429,
"nll_loss": 8.16179084777832,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.7703053951263428,
"rewards/margins": -0.013915632851421833,
"rewards/rejected": -0.7563896775245667,
"step": 4
},
{
"epoch": 0.10194329404268876,
"grad_norm": 6.138757705688477,
"learning_rate": 5e-05,
"log_odds_chosen": -0.12647418677806854,
"log_odds_ratio": -0.7903496026992798,
"logits/chosen": -25.797008514404297,
"logits/rejected": -25.96974754333496,
"logps/chosen": -7.411267280578613,
"logps/rejected": -7.284878730773926,
"loss": 7.957,
"nll_loss": 7.877945899963379,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.7411267161369324,
"rewards/margins": -0.012638838961720467,
"rewards/rejected": -0.7284878492355347,
"step": 5
},
{
"epoch": 0.1223319528512265,
"grad_norm": 6.701693058013916,
"learning_rate": 6e-05,
"log_odds_chosen": -0.30561840534210205,
"log_odds_ratio": -0.9098848700523376,
"logits/chosen": -25.611379623413086,
"logits/rejected": -25.745819091796875,
"logps/chosen": -7.532090187072754,
"logps/rejected": -7.226659297943115,
"loss": 8.0191,
"nll_loss": 7.928078651428223,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.753209114074707,
"rewards/margins": -0.03054318204522133,
"rewards/rejected": -0.7226659059524536,
"step": 6
},
{
"epoch": 0.14272061165976427,
"grad_norm": 7.153010845184326,
"learning_rate": 7e-05,
"log_odds_chosen": -0.19920583069324493,
"log_odds_ratio": -0.8425557613372803,
"logits/chosen": -25.43368148803711,
"logits/rejected": -25.85980987548828,
"logps/chosen": -7.394861698150635,
"logps/rejected": -7.195916652679443,
"loss": 7.8565,
"nll_loss": 7.7722601890563965,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.7394862174987793,
"rewards/margins": -0.019894439727067947,
"rewards/rejected": -0.7195916771888733,
"step": 7
},
{
"epoch": 0.163109270468302,
"grad_norm": 7.066710948944092,
"learning_rate": 8e-05,
"log_odds_chosen": 0.07723305374383926,
"log_odds_ratio": -0.698059618473053,
"logits/chosen": -25.201927185058594,
"logits/rejected": -25.380168914794922,
"logps/chosen": -6.719008922576904,
"logps/rejected": -6.796014785766602,
"loss": 7.2375,
"nll_loss": 7.167738914489746,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.6719008684158325,
"rewards/margins": 0.007700655609369278,
"rewards/rejected": -0.6796015501022339,
"step": 8
},
{
"epoch": 0.18349792927683975,
"grad_norm": 6.926291465759277,
"learning_rate": 9e-05,
"log_odds_chosen": 0.0007690861821174622,
"log_odds_ratio": -0.716381311416626,
"logits/chosen": -25.599267959594727,
"logits/rejected": -25.906770706176758,
"logps/chosen": -6.2457404136657715,
"logps/rejected": -6.246469974517822,
"loss": 6.7189,
"nll_loss": 6.647217273712158,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.6245740652084351,
"rewards/margins": 7.29067251086235e-05,
"rewards/rejected": -0.6246469616889954,
"step": 9
},
{
"epoch": 0.20388658808537752,
"grad_norm": 7.008027076721191,
"learning_rate": 0.0001,
"log_odds_chosen": 0.06532639265060425,
"log_odds_ratio": -0.7066172361373901,
"logits/chosen": -25.353395462036133,
"logits/rejected": -25.549720764160156,
"logps/chosen": -5.948342323303223,
"logps/rejected": -6.013580322265625,
"loss": 6.4356,
"nll_loss": 6.3649492263793945,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.5948342680931091,
"rewards/margins": 0.006523814518004656,
"rewards/rejected": -0.6013580560684204,
"step": 10
},
{
"epoch": 0.22427524689391526,
"grad_norm": 6.876976013183594,
"learning_rate": 9.743589743589744e-05,
"log_odds_chosen": -0.027606813237071037,
"log_odds_ratio": -0.7333718538284302,
"logits/chosen": -25.094350814819336,
"logits/rejected": -25.1453914642334,
"logps/chosen": -5.422086715698242,
"logps/rejected": -5.394695281982422,
"loss": 5.9228,
"nll_loss": 5.8494744300842285,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.542208731174469,
"rewards/margins": -0.0027391775511205196,
"rewards/rejected": -0.5394695401191711,
"step": 11
},
{
"epoch": 0.244663905702453,
"grad_norm": 7.610140800476074,
"learning_rate": 9.487179487179487e-05,
"log_odds_chosen": -0.0114058256149292,
"log_odds_ratio": -0.718590259552002,
"logits/chosen": -24.709278106689453,
"logits/rejected": -24.824674606323242,
"logps/chosen": -5.05657958984375,
"logps/rejected": -5.045741081237793,
"loss": 5.5928,
"nll_loss": 5.520919322967529,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5056579113006592,
"rewards/margins": -0.001083830837160349,
"rewards/rejected": -0.5045741200447083,
"step": 12
},
{
"epoch": 0.26505256451099074,
"grad_norm": 7.7652740478515625,
"learning_rate": 9.230769230769232e-05,
"log_odds_chosen": -0.036822859197854996,
"log_odds_ratio": -0.7302141189575195,
"logits/chosen": -24.395771026611328,
"logits/rejected": -24.55908203125,
"logps/chosen": -4.55651330947876,
"logps/rejected": -4.520573139190674,
"loss": 5.1489,
"nll_loss": 5.0758538246154785,
"rewards/accuracies": 0.453125,
"rewards/chosen": -0.45565131306648254,
"rewards/margins": -0.0035939845256507397,
"rewards/rejected": -0.4520573019981384,
"step": 13
},
{
"epoch": 0.28544122331952854,
"grad_norm": 7.427043437957764,
"learning_rate": 8.974358974358975e-05,
"log_odds_chosen": 0.03310338407754898,
"log_odds_ratio": -0.6999921798706055,
"logits/chosen": -24.008562088012695,
"logits/rejected": -24.253616333007812,
"logps/chosen": -4.199281692504883,
"logps/rejected": -4.2311601638793945,
"loss": 4.7697,
"nll_loss": 4.699740409851074,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.4199281930923462,
"rewards/margins": 0.003187798894941807,
"rewards/rejected": -0.4231160581111908,
"step": 14
},
{
"epoch": 0.3058298821280663,
"grad_norm": 6.008456707000732,
"learning_rate": 8.717948717948718e-05,
"log_odds_chosen": -0.1511610448360443,
"log_odds_ratio": -0.7926070690155029,
"logits/chosen": -23.354618072509766,
"logits/rejected": -23.83741569519043,
"logps/chosen": -4.212711811065674,
"logps/rejected": -4.063477993011475,
"loss": 4.7418,
"nll_loss": 4.6625566482543945,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.4212712049484253,
"rewards/margins": -0.014923380687832832,
"rewards/rejected": -0.4063478112220764,
"step": 15
},
{
"epoch": 0.326218540936604,
"grad_norm": 8.08737850189209,
"learning_rate": 8.461538461538461e-05,
"log_odds_chosen": 0.04898079112172127,
"log_odds_ratio": -0.6859325766563416,
"logits/chosen": -23.336509704589844,
"logits/rejected": -23.604385375976562,
"logps/chosen": -3.9837567806243896,
"logps/rejected": -4.0314741134643555,
"loss": 4.4964,
"nll_loss": 4.427821159362793,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.39837566018104553,
"rewards/margins": 0.004771741107106209,
"rewards/rejected": -0.4031473994255066,
"step": 16
},
{
"epoch": 0.34660719974514176,
"grad_norm": 8.519704818725586,
"learning_rate": 8.205128205128205e-05,
"log_odds_chosen": 0.041097067296504974,
"log_odds_ratio": -0.6894928812980652,
"logits/chosen": -23.24013900756836,
"logits/rejected": -23.641630172729492,
"logps/chosen": -3.8253583908081055,
"logps/rejected": -3.863762378692627,
"loss": 4.3403,
"nll_loss": 4.271350383758545,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.3825358748435974,
"rewards/margins": 0.0038403947837650776,
"rewards/rejected": -0.3863762617111206,
"step": 17
},
{
"epoch": 0.3669958585536795,
"grad_norm": 7.910739898681641,
"learning_rate": 7.948717948717948e-05,
"log_odds_chosen": 0.034136779606342316,
"log_odds_ratio": -0.6866725087165833,
"logits/chosen": -23.235126495361328,
"logits/rejected": -23.519380569458008,
"logps/chosen": -3.6243810653686523,
"logps/rejected": -3.6572864055633545,
"loss": 4.1231,
"nll_loss": 4.0544114112854,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.3624381422996521,
"rewards/margins": 0.0032905228435993195,
"rewards/rejected": -0.3657286763191223,
"step": 18
},
{
"epoch": 0.38738451736221724,
"grad_norm": 7.562887191772461,
"learning_rate": 7.692307692307693e-05,
"log_odds_chosen": 0.12465603649616241,
"log_odds_ratio": -0.6449427008628845,
"logits/chosen": -23.304338455200195,
"logits/rejected": -23.28459358215332,
"logps/chosen": -3.4598004817962646,
"logps/rejected": -3.580230474472046,
"loss": 3.951,
"nll_loss": 3.8865041732788086,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.34598004817962646,
"rewards/margins": 0.012042999267578125,
"rewards/rejected": -0.3580230474472046,
"step": 19
},
{
"epoch": 0.40777317617075504,
"grad_norm": 8.01102352142334,
"learning_rate": 7.435897435897436e-05,
"log_odds_chosen": 0.005397355649620295,
"log_odds_ratio": -0.6990857720375061,
"logits/chosen": -22.913864135742188,
"logits/rejected": -23.254535675048828,
"logps/chosen": -3.5577945709228516,
"logps/rejected": -3.5629467964172363,
"loss": 4.0222,
"nll_loss": 3.95233416557312,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.35577940940856934,
"rewards/margins": 0.0005152309313416481,
"rewards/rejected": -0.3562946915626526,
"step": 20
},
{
"epoch": 0.4281618349792928,
"grad_norm": 7.010775089263916,
"learning_rate": 7.17948717948718e-05,
"log_odds_chosen": 0.04185578599572182,
"log_odds_ratio": -0.6874637603759766,
"logits/chosen": -22.951570510864258,
"logits/rejected": -23.108959197998047,
"logps/chosen": -3.363877296447754,
"logps/rejected": -3.403106927871704,
"loss": 3.8103,
"nll_loss": 3.7415761947631836,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.3363877236843109,
"rewards/margins": 0.00392295653000474,
"rewards/rejected": -0.3403106927871704,
"step": 21
},
{
"epoch": 0.4485504937878305,
"grad_norm": 4.4137654304504395,
"learning_rate": 6.923076923076924e-05,
"log_odds_chosen": 0.05776768922805786,
"log_odds_ratio": -0.674350917339325,
"logits/chosen": -22.662582397460938,
"logits/rejected": -22.98798370361328,
"logps/chosen": -3.312605619430542,
"logps/rejected": -3.368607997894287,
"loss": 3.7255,
"nll_loss": 3.658046007156372,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.3312605917453766,
"rewards/margins": 0.005600241012871265,
"rewards/rejected": -0.3368608355522156,
"step": 22
},
{
"epoch": 0.46893915259636826,
"grad_norm": 7.14854097366333,
"learning_rate": 6.666666666666667e-05,
"log_odds_chosen": 0.09064004570245743,
"log_odds_ratio": -0.6606053113937378,
"logits/chosen": -22.38204002380371,
"logits/rejected": -22.507062911987305,
"logps/chosen": -3.2942514419555664,
"logps/rejected": -3.3807365894317627,
"loss": 3.6835,
"nll_loss": 3.6173930168151855,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.3294251561164856,
"rewards/margins": 0.008648505434393883,
"rewards/rejected": -0.33807364106178284,
"step": 23
},
{
"epoch": 0.489327811404906,
"grad_norm": 8.279687881469727,
"learning_rate": 6.410256410256412e-05,
"log_odds_chosen": 0.029120640829205513,
"log_odds_ratio": -0.6869578957557678,
"logits/chosen": -22.003984451293945,
"logits/rejected": -22.17644500732422,
"logps/chosen": -3.1726255416870117,
"logps/rejected": -3.1994142532348633,
"loss": 3.5689,
"nll_loss": 3.500185012817383,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.31726256012916565,
"rewards/margins": 0.0026788865216076374,
"rewards/rejected": -0.3199414610862732,
"step": 24
},
{
"epoch": 0.5097164702134438,
"grad_norm": 8.568649291992188,
"learning_rate": 6.153846153846155e-05,
"log_odds_chosen": 0.060226939618587494,
"log_odds_ratio": -0.675925612449646,
"logits/chosen": -21.84587287902832,
"logits/rejected": -22.01560401916504,
"logps/chosen": -3.0773396492004395,
"logps/rejected": -3.133820056915283,
"loss": 3.4766,
"nll_loss": 3.40903902053833,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.3077339828014374,
"rewards/margins": 0.005648063495755196,
"rewards/rejected": -0.31338202953338623,
"step": 25
},
{
"epoch": 0.5301051290219815,
"grad_norm": 8.693368911743164,
"learning_rate": 5.897435897435898e-05,
"log_odds_chosen": 0.13959810137748718,
"log_odds_ratio": -0.6384263038635254,
"logits/chosen": -21.52666664123535,
"logits/rejected": -21.586284637451172,
"logps/chosen": -2.9605109691619873,
"logps/rejected": -3.0922350883483887,
"loss": 3.3692,
"nll_loss": 3.305342674255371,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.29605114459991455,
"rewards/margins": 0.013172402046620846,
"rewards/rejected": -0.3092235326766968,
"step": 26
},
{
"epoch": 0.5504937878305193,
"grad_norm": 8.652642250061035,
"learning_rate": 5.6410256410256414e-05,
"log_odds_chosen": 0.13179698586463928,
"log_odds_ratio": -0.6500768661499023,
"logits/chosen": -21.42678451538086,
"logits/rejected": -21.460784912109375,
"logps/chosen": -2.8721394538879395,
"logps/rejected": -2.995119333267212,
"loss": 3.2505,
"nll_loss": 3.1854705810546875,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.2872139513492584,
"rewards/margins": 0.012298012152314186,
"rewards/rejected": -0.29951193928718567,
"step": 27
},
{
"epoch": 0.5708824466390571,
"grad_norm": 9.070357322692871,
"learning_rate": 5.384615384615385e-05,
"log_odds_chosen": 0.06178201735019684,
"log_odds_ratio": -0.672979474067688,
"logits/chosen": -21.22831153869629,
"logits/rejected": -21.3394775390625,
"logps/chosen": -2.8566203117370605,
"logps/rejected": -2.9141340255737305,
"loss": 3.2585,
"nll_loss": 3.191239833831787,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.2856620252132416,
"rewards/margins": 0.00575134065002203,
"rewards/rejected": -0.2914133667945862,
"step": 28
},
{
"epoch": 0.5912711054475948,
"grad_norm": 8.458242416381836,
"learning_rate": 5.128205128205128e-05,
"log_odds_chosen": 0.15792030096054077,
"log_odds_ratio": -0.6294744610786438,
"logits/chosen": -21.1250057220459,
"logits/rejected": -21.499061584472656,
"logps/chosen": -2.759305238723755,
"logps/rejected": -2.9067063331604004,
"loss": 3.0982,
"nll_loss": 3.0353012084960938,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.2759305536746979,
"rewards/margins": 0.014740084297955036,
"rewards/rejected": -0.29067063331604004,
"step": 29
},
{
"epoch": 0.6116597642561326,
"grad_norm": 7.295664310455322,
"learning_rate": 4.871794871794872e-05,
"log_odds_chosen": 0.10987404733896255,
"log_odds_ratio": -0.6565213203430176,
"logits/chosen": -21.159151077270508,
"logits/rejected": -21.322973251342773,
"logps/chosen": -2.6774895191192627,
"logps/rejected": -2.7792630195617676,
"loss": 3.0365,
"nll_loss": 2.9708292484283447,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.26774895191192627,
"rewards/margins": 0.010177338495850563,
"rewards/rejected": -0.2779262959957123,
"step": 30
},
{
"epoch": 0.6320484230646702,
"grad_norm": 5.238936424255371,
"learning_rate": 4.615384615384616e-05,
"log_odds_chosen": 0.12444893270730972,
"log_odds_ratio": -0.6458015441894531,
"logits/chosen": -21.33697509765625,
"logits/rejected": -21.44550895690918,
"logps/chosen": -2.718036651611328,
"logps/rejected": -2.8326799869537354,
"loss": 3.0471,
"nll_loss": 2.9825289249420166,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.27180367708206177,
"rewards/margins": 0.011464308016002178,
"rewards/rejected": -0.2832679748535156,
"step": 31
},
{
"epoch": 0.652437081873208,
"grad_norm": 3.7347183227539062,
"learning_rate": 4.358974358974359e-05,
"log_odds_chosen": 0.14928925037384033,
"log_odds_ratio": -0.632793128490448,
"logits/chosen": -21.26512908935547,
"logits/rejected": -21.627201080322266,
"logps/chosen": -2.61964750289917,
"logps/rejected": -2.7589893341064453,
"loss": 2.9566,
"nll_loss": 2.8932831287384033,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.2619647979736328,
"rewards/margins": 0.013934147544205189,
"rewards/rejected": -0.27589893341064453,
"step": 32
},
{
"epoch": 0.6728257406817458,
"grad_norm": 4.034797191619873,
"learning_rate": 4.1025641025641023e-05,
"log_odds_chosen": 0.15122568607330322,
"log_odds_ratio": -0.6359158158302307,
"logits/chosen": -21.30415916442871,
"logits/rejected": -21.591684341430664,
"logps/chosen": -2.5857341289520264,
"logps/rejected": -2.725327253341675,
"loss": 2.9044,
"nll_loss": 2.8408076763153076,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.25857341289520264,
"rewards/margins": 0.013959331437945366,
"rewards/rejected": -0.27253273129463196,
"step": 33
},
{
"epoch": 0.6932143994902835,
"grad_norm": 4.155447483062744,
"learning_rate": 3.846153846153846e-05,
"log_odds_chosen": 0.1967214196920395,
"log_odds_ratio": -0.6163268089294434,
"logits/chosen": -21.339792251586914,
"logits/rejected": -21.64710235595703,
"logps/chosen": -2.5361406803131104,
"logps/rejected": -2.716292381286621,
"loss": 2.8424,
"nll_loss": 2.780778408050537,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.25361406803131104,
"rewards/margins": 0.018015172332525253,
"rewards/rejected": -0.2716292142868042,
"step": 34
},
{
"epoch": 0.7136030582988213,
"grad_norm": 4.295944690704346,
"learning_rate": 3.58974358974359e-05,
"log_odds_chosen": 0.15044674277305603,
"log_odds_ratio": -0.6369625926017761,
"logits/chosen": -21.422685623168945,
"logits/rejected": -21.575807571411133,
"logps/chosen": -2.5278573036193848,
"logps/rejected": -2.665768623352051,
"loss": 2.8338,
"nll_loss": 2.770143985748291,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.25278571248054504,
"rewards/margins": 0.01379114855080843,
"rewards/rejected": -0.266576886177063,
"step": 35
},
{
"epoch": 0.733991717107359,
"grad_norm": 4.2903218269348145,
"learning_rate": 3.3333333333333335e-05,
"log_odds_chosen": 0.1347799301147461,
"log_odds_ratio": -0.6438393592834473,
"logits/chosen": -21.24468994140625,
"logits/rejected": -21.41203498840332,
"logps/chosen": -2.528264284133911,
"logps/rejected": -2.649376392364502,
"loss": 2.7962,
"nll_loss": 2.7317965030670166,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.25282642245292664,
"rewards/margins": 0.012111193500459194,
"rewards/rejected": -0.2649376094341278,
"step": 36
},
{
"epoch": 0.7543803759158968,
"grad_norm": 4.028990745544434,
"learning_rate": 3.0769230769230774e-05,
"log_odds_chosen": 0.094432532787323,
"log_odds_ratio": -0.660050630569458,
"logits/chosen": -21.22388458251953,
"logits/rejected": -21.39991569519043,
"logps/chosen": -2.404029369354248,
"logps/rejected": -2.4883453845977783,
"loss": 2.7288,
"nll_loss": 2.662759304046631,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2404029369354248,
"rewards/margins": 0.008431611582636833,
"rewards/rejected": -0.2488345503807068,
"step": 37
},
{
"epoch": 0.7747690347244345,
"grad_norm": 4.111539840698242,
"learning_rate": 2.8205128205128207e-05,
"log_odds_chosen": 0.10993114113807678,
"log_odds_ratio": -0.6521956324577332,
"logits/chosen": -21.075908660888672,
"logits/rejected": -21.3304500579834,
"logps/chosen": -2.415492534637451,
"logps/rejected": -2.51309871673584,
"loss": 2.7113,
"nll_loss": 2.6461009979248047,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.24154925346374512,
"rewards/margins": 0.00976061075925827,
"rewards/rejected": -0.2513098418712616,
"step": 38
},
{
"epoch": 0.7951576935329723,
"grad_norm": 4.149285316467285,
"learning_rate": 2.564102564102564e-05,
"log_odds_chosen": 0.1494802087545395,
"log_odds_ratio": -0.642086923122406,
"logits/chosen": -21.14957046508789,
"logits/rejected": -21.198829650878906,
"logps/chosen": -2.4494690895080566,
"logps/rejected": -2.578655958175659,
"loss": 2.764,
"nll_loss": 2.6997601985931396,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2449468970298767,
"rewards/margins": 0.012918684631586075,
"rewards/rejected": -0.2578656077384949,
"step": 39
},
{
"epoch": 0.8155463523415101,
"grad_norm": 3.752061605453491,
"learning_rate": 2.307692307692308e-05,
"log_odds_chosen": 0.1980660855770111,
"log_odds_ratio": -0.6138854026794434,
"logits/chosen": -21.215681076049805,
"logits/rejected": -21.421716690063477,
"logps/chosen": -2.30979323387146,
"logps/rejected": -2.488827705383301,
"loss": 2.6535,
"nll_loss": 2.5920803546905518,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.2309793382883072,
"rewards/margins": 0.017903439700603485,
"rewards/rejected": -0.24888278543949127,
"step": 40
},
{
"epoch": 0.8359350111500478,
"grad_norm": 3.594350814819336,
"learning_rate": 2.0512820512820512e-05,
"log_odds_chosen": 0.16511858999729156,
"log_odds_ratio": -0.6266151666641235,
"logits/chosen": -21.277156829833984,
"logits/rejected": -21.420866012573242,
"logps/chosen": -2.2157204151153564,
"logps/rejected": -2.362075090408325,
"loss": 2.5551,
"nll_loss": 2.4924428462982178,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.22157204151153564,
"rewards/margins": 0.014635485596954823,
"rewards/rejected": -0.2362075299024582,
"step": 41
},
{
"epoch": 0.8563236699585856,
"grad_norm": 3.3525002002716064,
"learning_rate": 1.794871794871795e-05,
"log_odds_chosen": 0.17062872648239136,
"log_odds_ratio": -0.6240280270576477,
"logits/chosen": -21.299617767333984,
"logits/rejected": -21.4893798828125,
"logps/chosen": -2.137052536010742,
"logps/rejected": -2.2875261306762695,
"loss": 2.4734,
"nll_loss": 2.410959005355835,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.21370527148246765,
"rewards/margins": 0.015047376975417137,
"rewards/rejected": -0.22875264286994934,
"step": 42
},
{
"epoch": 0.8767123287671232,
"grad_norm": 3.4169490337371826,
"learning_rate": 1.5384615384615387e-05,
"log_odds_chosen": 0.17450740933418274,
"log_odds_ratio": -0.624984622001648,
"logits/chosen": -21.364498138427734,
"logits/rejected": -21.48747444152832,
"logps/chosen": -2.1473217010498047,
"logps/rejected": -2.2999539375305176,
"loss": 2.4607,
"nll_loss": 2.3982174396514893,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.21473217010498047,
"rewards/margins": 0.015263203531503677,
"rewards/rejected": -0.22999539971351624,
"step": 43
},
{
"epoch": 0.897100987575661,
"grad_norm": 3.6790101528167725,
"learning_rate": 1.282051282051282e-05,
"log_odds_chosen": 0.1768464744091034,
"log_odds_ratio": -0.6206101775169373,
"logits/chosen": -21.34912109375,
"logits/rejected": -21.56414794921875,
"logps/chosen": -2.0744807720184326,
"logps/rejected": -2.2315096855163574,
"loss": 2.4399,
"nll_loss": 2.3778889179229736,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.2074480801820755,
"rewards/margins": 0.015702884644269943,
"rewards/rejected": -0.22315098345279694,
"step": 44
},
{
"epoch": 0.9174896463841988,
"grad_norm": 4.124813556671143,
"learning_rate": 1.0256410256410256e-05,
"log_odds_chosen": 0.15550947189331055,
"log_odds_ratio": -0.6320576667785645,
"logits/chosen": -21.190162658691406,
"logits/rejected": -21.409671783447266,
"logps/chosen": -2.202808380126953,
"logps/rejected": -2.339813470840454,
"loss": 2.5482,
"nll_loss": 2.484999179840088,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.22028085589408875,
"rewards/margins": 0.01370049174875021,
"rewards/rejected": -0.23398138582706451,
"step": 45
},
{
"epoch": 0.9378783051927365,
"grad_norm": 3.9883928298950195,
"learning_rate": 7.692307692307694e-06,
"log_odds_chosen": 0.14527583122253418,
"log_odds_ratio": -0.6397886872291565,
"logits/chosen": -21.27792739868164,
"logits/rejected": -21.438852310180664,
"logps/chosen": -2.1068501472473145,
"logps/rejected": -2.2336912155151367,
"loss": 2.443,
"nll_loss": 2.3790242671966553,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.21068499982357025,
"rewards/margins": 0.012684130109846592,
"rewards/rejected": -0.22336915135383606,
"step": 46
},
{
"epoch": 0.9582669640012743,
"grad_norm": 4.068699836730957,
"learning_rate": 5.128205128205128e-06,
"log_odds_chosen": 0.1451435387134552,
"log_odds_ratio": -0.6405162215232849,
"logits/chosen": -21.31224250793457,
"logits/rejected": -21.43851089477539,
"logps/chosen": -2.114806652069092,
"logps/rejected": -2.2397756576538086,
"loss": 2.5128,
"nll_loss": 2.4487178325653076,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.21148064732551575,
"rewards/margins": 0.012496920302510262,
"rewards/rejected": -0.22397758066654205,
"step": 47
},
{
"epoch": 0.978655622809812,
"grad_norm": 4.197949409484863,
"learning_rate": 2.564102564102564e-06,
"log_odds_chosen": 0.1768975853919983,
"log_odds_ratio": -0.6197177171707153,
"logits/chosen": -21.2183780670166,
"logits/rejected": -21.43151092529297,
"logps/chosen": -2.0559120178222656,
"logps/rejected": -2.2111546993255615,
"loss": 2.4158,
"nll_loss": 2.3538498878479004,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20559120178222656,
"rewards/margins": 0.01552429050207138,
"rewards/rejected": -0.22111549973487854,
"step": 48
},
{
"epoch": 0.9990442816183498,
"grad_norm": 4.174467086791992,
"learning_rate": 0.0,
"log_odds_chosen": 0.19606728851795197,
"log_odds_ratio": -0.6126885414123535,
"logits/chosen": -21.158491134643555,
"logits/rejected": -21.405784606933594,
"logps/chosen": -2.048922538757324,
"logps/rejected": -2.2221457958221436,
"loss": 2.4451,
"nll_loss": 2.3838396072387695,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.20489224791526794,
"rewards/margins": 0.017322326079010963,
"rewards/rejected": -0.22221459448337555,
"step": 49
}
],
"logging_steps": 1,
"max_steps": 49,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 4,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}