|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9980806142034548, |
|
"eval_steps": 10000000, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1135.2510024076782, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -2.5583817958831787, |
|
"logits/rejected": -2.4487552642822266, |
|
"logps/chosen": -258.1644592285156, |
|
"logps/rejected": -216.25729370117188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1064.195577422658, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -2.606004476547241, |
|
"logits/rejected": -2.553109884262085, |
|
"logps/chosen": -267.5234680175781, |
|
"logps/rejected": -217.6415557861328, |
|
"loss": 0.7054, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.03280753642320633, |
|
"rewards/margins": 0.0353083573281765, |
|
"rewards/rejected": -0.002500815549865365, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 736.2634036624544, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -2.630505323410034, |
|
"logits/rejected": -2.5676522254943848, |
|
"logps/chosen": -260.584716796875, |
|
"logps/rejected": -207.07144165039062, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.5755742788314819, |
|
"rewards/margins": 0.5894275903701782, |
|
"rewards/rejected": -0.013853324577212334, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1076.3695793406284, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -2.6462178230285645, |
|
"logits/rejected": -2.571561336517334, |
|
"logps/chosen": -250.9139862060547, |
|
"logps/rejected": -198.4534912109375, |
|
"loss": 0.3324, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 3.3866074085235596, |
|
"rewards/margins": 3.0545947551727295, |
|
"rewards/rejected": 0.3320125639438629, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 418.3228099023361, |
|
"learning_rate": 4.99989986344963e-07, |
|
"logits/chosen": -2.6392903327941895, |
|
"logits/rejected": -2.5602712631225586, |
|
"logps/chosen": -243.54013061523438, |
|
"logps/rejected": -192.9114227294922, |
|
"loss": 0.3161, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 5.447351455688477, |
|
"rewards/margins": 4.827452182769775, |
|
"rewards/rejected": 0.6198989748954773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 630.2703390024756, |
|
"learning_rate": 4.987893180827479e-07, |
|
"logits/chosen": -2.651214361190796, |
|
"logits/rejected": -2.57964825630188, |
|
"logps/chosen": -258.42962646484375, |
|
"logps/rejected": -203.57992553710938, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 7.846033573150635, |
|
"rewards/margins": 6.590806007385254, |
|
"rewards/rejected": 1.255226731300354, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 655.8352889546771, |
|
"learning_rate": 4.955969343539162e-07, |
|
"logits/chosen": -2.60957932472229, |
|
"logits/rejected": -2.5362067222595215, |
|
"logps/chosen": -262.3640441894531, |
|
"logps/rejected": -209.32199096679688, |
|
"loss": 0.3453, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 5.170942306518555, |
|
"rewards/margins": 6.18172550201416, |
|
"rewards/rejected": -1.0107834339141846, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 456.9589116841801, |
|
"learning_rate": 4.90438392204474e-07, |
|
"logits/chosen": -2.5825228691101074, |
|
"logits/rejected": -2.5089833736419678, |
|
"logps/chosen": -291.7918395996094, |
|
"logps/rejected": -227.83432006835938, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 4.895013809204102, |
|
"rewards/margins": 7.00995397567749, |
|
"rewards/rejected": -2.1149401664733887, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 816.8720109326792, |
|
"learning_rate": 4.83354989019146e-07, |
|
"logits/chosen": -2.5420753955841064, |
|
"logits/rejected": -2.467258930206299, |
|
"logps/chosen": -259.6270446777344, |
|
"logps/rejected": -204.15179443359375, |
|
"loss": 0.3311, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 6.344871997833252, |
|
"rewards/margins": 7.2052764892578125, |
|
"rewards/rejected": -0.860403835773468, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 922.6738539012168, |
|
"learning_rate": 4.7440343190975353e-07, |
|
"logits/chosen": -2.5713560581207275, |
|
"logits/rejected": -2.513441801071167, |
|
"logps/chosen": -257.0751037597656, |
|
"logps/rejected": -217.1184844970703, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 3.73614239692688, |
|
"rewards/margins": 5.834546089172363, |
|
"rewards/rejected": -2.0984034538269043, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 406.82707972381877, |
|
"learning_rate": 4.6365538373900506e-07, |
|
"logits/chosen": -2.6249356269836426, |
|
"logits/rejected": -2.5500850677490234, |
|
"logps/chosen": -236.4239501953125, |
|
"logps/rejected": -200.73150634765625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.595959663391113, |
|
"rewards/margins": 6.244544506072998, |
|
"rewards/rejected": -1.648585557937622, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 656.3071663391811, |
|
"learning_rate": 4.5119688941406386e-07, |
|
"logits/chosen": -2.618974208831787, |
|
"logits/rejected": -2.5380780696868896, |
|
"logps/chosen": -257.79248046875, |
|
"logps/rejected": -209.8715362548828, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 6.305555820465088, |
|
"rewards/margins": 7.463587760925293, |
|
"rewards/rejected": -1.158031940460205, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 810.7648282749318, |
|
"learning_rate": 4.3712768704277524e-07, |
|
"logits/chosen": -2.5895957946777344, |
|
"logits/rejected": -2.519530773162842, |
|
"logps/chosen": -262.7950134277344, |
|
"logps/rejected": -208.9604949951172, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 5.333884239196777, |
|
"rewards/margins": 7.409175872802734, |
|
"rewards/rejected": -2.075291156768799, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 513.4959841183485, |
|
"learning_rate": 4.2156040946718343e-07, |
|
"logits/chosen": -2.5553436279296875, |
|
"logits/rejected": -2.487457752227783, |
|
"logps/chosen": -251.7507781982422, |
|
"logps/rejected": -197.44088745117188, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 4.441976070404053, |
|
"rewards/margins": 7.408116340637207, |
|
"rewards/rejected": -2.966140031814575, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 650.5511601275197, |
|
"learning_rate": 4.046196825665637e-07, |
|
"logits/chosen": -2.5706536769866943, |
|
"logits/rejected": -2.500262498855591, |
|
"logps/chosen": -270.2043762207031, |
|
"logps/rejected": -217.0515594482422, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.977551221847534, |
|
"rewards/margins": 6.7731499671936035, |
|
"rewards/rejected": -2.7955987453460693, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 530.5799871161138, |
|
"learning_rate": 3.864411275486261e-07, |
|
"logits/chosen": -2.5574281215667725, |
|
"logits/rejected": -2.488007068634033, |
|
"logps/chosen": -263.3489685058594, |
|
"logps/rejected": -212.54638671875, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 5.79421329498291, |
|
"rewards/margins": 7.515044212341309, |
|
"rewards/rejected": -1.720831274986267, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 600.6086946072276, |
|
"learning_rate": 3.671702752161759e-07, |
|
"logits/chosen": -2.563870906829834, |
|
"logits/rejected": -2.493649482727051, |
|
"logps/chosen": -244.5281219482422, |
|
"logps/rejected": -198.3011474609375, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 3.527863025665283, |
|
"rewards/margins": 7.751578330993652, |
|
"rewards/rejected": -4.223715782165527, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 753.6856997505446, |
|
"learning_rate": 3.4696140090121375e-07, |
|
"logits/chosen": -2.5673775672912598, |
|
"logits/rejected": -2.500842571258545, |
|
"logps/chosen": -265.5797119140625, |
|
"logps/rejected": -211.0306854248047, |
|
"loss": 0.3547, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 4.089644908905029, |
|
"rewards/margins": 7.812180519104004, |
|
"rewards/rejected": -3.7225348949432373, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 645.3967547220625, |
|
"learning_rate": 3.259762893935617e-07, |
|
"logits/chosen": -2.6238903999328613, |
|
"logits/rejected": -2.534097194671631, |
|
"logps/chosen": -236.9849395751953, |
|
"logps/rejected": -186.6522674560547, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 4.038764953613281, |
|
"rewards/margins": 6.760235786437988, |
|
"rewards/rejected": -2.721470594406128, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 707.705744532387, |
|
"learning_rate": 3.0438293975154184e-07, |
|
"logits/chosen": -2.582486867904663, |
|
"logits/rejected": -2.5034093856811523, |
|
"logps/chosen": -261.0556945800781, |
|
"logps/rejected": -205.6962890625, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 3.011924982070923, |
|
"rewards/margins": 8.104998588562012, |
|
"rewards/rejected": -5.093073844909668, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 822.8629977119059, |
|
"learning_rate": 2.823542203635138e-07, |
|
"logits/chosen": -2.615396499633789, |
|
"logits/rejected": -2.5223731994628906, |
|
"logps/chosen": -277.3884582519531, |
|
"logps/rejected": -221.803466796875, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.5077309608459473, |
|
"rewards/margins": 9.055838584899902, |
|
"rewards/rejected": -6.548108100891113, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 954.7034527431528, |
|
"learning_rate": 2.600664850273538e-07, |
|
"logits/chosen": -2.603569269180298, |
|
"logits/rejected": -2.5283331871032715, |
|
"logps/chosen": -269.19873046875, |
|
"logps/rejected": -213.823974609375, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.4840681552886963, |
|
"rewards/margins": 7.454611778259277, |
|
"rewards/rejected": -5.97054386138916, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 685.0955562473252, |
|
"learning_rate": 2.3769816112703045e-07, |
|
"logits/chosen": -2.6224589347839355, |
|
"logits/rejected": -2.55679988861084, |
|
"logps/chosen": -257.71661376953125, |
|
"logps/rejected": -214.28329467773438, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.168187141418457, |
|
"rewards/margins": 6.781345367431641, |
|
"rewards/rejected": -3.6131577491760254, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 568.8894162951807, |
|
"learning_rate": 2.1542832120881677e-07, |
|
"logits/chosen": -2.664320945739746, |
|
"logits/rejected": -2.5764544010162354, |
|
"logps/chosen": -266.98114013671875, |
|
"logps/rejected": -216.44894409179688, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 5.137583255767822, |
|
"rewards/margins": 7.965329647064209, |
|
"rewards/rejected": -2.827746629714966, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 923.147651672606, |
|
"learning_rate": 1.934352493925695e-07, |
|
"logits/chosen": -2.6468780040740967, |
|
"logits/rejected": -2.5980067253112793, |
|
"logps/chosen": -262.94610595703125, |
|
"logps/rejected": -220.69448852539062, |
|
"loss": 0.3991, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 3.828115463256836, |
|
"rewards/margins": 9.526643753051758, |
|
"rewards/rejected": -5.69852876663208, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 615.4120078013015, |
|
"learning_rate": 1.7189501409486059e-07, |
|
"logits/chosen": -2.656362533569336, |
|
"logits/rejected": -2.584864616394043, |
|
"logps/chosen": -267.7325439453125, |
|
"logps/rejected": -222.2632293701172, |
|
"loss": 0.4004, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 3.2369320392608643, |
|
"rewards/margins": 7.942319393157959, |
|
"rewards/rejected": -4.705387115478516, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 488.0068782741624, |
|
"learning_rate": 1.5098005849021078e-07, |
|
"logits/chosen": -2.64605450630188, |
|
"logits/rejected": -2.586585283279419, |
|
"logps/chosen": -261.89093017578125, |
|
"logps/rejected": -208.77493286132812, |
|
"loss": 0.3817, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 3.175231456756592, |
|
"rewards/margins": 7.630448818206787, |
|
"rewards/rejected": -4.455216884613037, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 559.4430135222711, |
|
"learning_rate": 1.30857819994673e-07, |
|
"logits/chosen": -2.6208698749542236, |
|
"logits/rejected": -2.5371921062469482, |
|
"logps/chosen": -274.78753662109375, |
|
"logps/rejected": -230.4307861328125, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.437089204788208, |
|
"rewards/margins": 9.265036582946777, |
|
"rewards/rejected": -7.82794713973999, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 432.8210354095987, |
|
"learning_rate": 1.116893898236716e-07, |
|
"logits/chosen": -2.654949426651001, |
|
"logits/rejected": -2.5985524654388428, |
|
"logps/chosen": -270.3836975097656, |
|
"logps/rejected": -219.8002471923828, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 2.6581013202667236, |
|
"rewards/margins": 8.142509460449219, |
|
"rewards/rejected": -5.484408378601074, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 482.2442984028295, |
|
"learning_rate": 9.362822335518062e-08, |
|
"logits/chosen": -2.6166903972625732, |
|
"logits/rejected": -2.5696167945861816, |
|
"logps/chosen": -268.19140625, |
|
"logps/rejected": -216.9479522705078, |
|
"loss": 0.3568, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 3.0037200450897217, |
|
"rewards/margins": 7.667593479156494, |
|
"rewards/rejected": -4.663873195648193, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 492.9163861530474, |
|
"learning_rate": 7.681891162260015e-08, |
|
"logits/chosen": -2.636460781097412, |
|
"logits/rejected": -2.580770254135132, |
|
"logps/chosen": -274.6198425292969, |
|
"logps/rejected": -220.8531951904297, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 3.163914680480957, |
|
"rewards/margins": 7.829231262207031, |
|
"rewards/rejected": -4.665315628051758, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 437.9917779014462, |
|
"learning_rate": 6.139602377230247e-08, |
|
"logits/chosen": -2.6010611057281494, |
|
"logits/rejected": -2.532543897628784, |
|
"logps/chosen": -278.3953552246094, |
|
"logps/rejected": -215.9014129638672, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 3.1028757095336914, |
|
"rewards/margins": 8.152434349060059, |
|
"rewards/rejected": -5.049559593200684, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 649.8222699481745, |
|
"learning_rate": 4.748302975270837e-08, |
|
"logits/chosen": -2.6264309883117676, |
|
"logits/rejected": -2.5793588161468506, |
|
"logps/chosen": -261.37890625, |
|
"logps/rejected": -204.51773071289062, |
|
"loss": 0.405, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.8262996673583984, |
|
"rewards/margins": 7.375731468200684, |
|
"rewards/rejected": -4.549432277679443, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 583.9617574483902, |
|
"learning_rate": 3.5191311859445795e-08, |
|
"logits/chosen": -2.6449975967407227, |
|
"logits/rejected": -2.586719512939453, |
|
"logps/chosen": -264.58428955078125, |
|
"logps/rejected": -217.4517364501953, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 4.091521263122559, |
|
"rewards/margins": 7.869417667388916, |
|
"rewards/rejected": -3.7778968811035156, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 516.763098966226, |
|
"learning_rate": 2.4619273049795996e-08, |
|
"logits/chosen": -2.631946563720703, |
|
"logits/rejected": -2.5740180015563965, |
|
"logps/chosen": -260.0722961425781, |
|
"logps/rejected": -210.775146484375, |
|
"loss": 0.3558, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 3.791111469268799, |
|
"rewards/margins": 8.514566421508789, |
|
"rewards/rejected": -4.723455905914307, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 434.316228593937, |
|
"learning_rate": 1.5851549164932115e-08, |
|
"logits/chosen": -2.641859531402588, |
|
"logits/rejected": -2.592379093170166, |
|
"logps/chosen": -269.5948181152344, |
|
"logps/rejected": -226.536865234375, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 3.7248435020446777, |
|
"rewards/margins": 7.7656402587890625, |
|
"rewards/rejected": -4.040797233581543, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 570.6334718025578, |
|
"learning_rate": 8.958331366609423e-09, |
|
"logits/chosen": -2.6432430744171143, |
|
"logits/rejected": -2.574936628341675, |
|
"logps/chosen": -275.0256652832031, |
|
"logps/rejected": -219.6584014892578, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.3530006408691406, |
|
"rewards/margins": 8.096589088439941, |
|
"rewards/rejected": -4.743588447570801, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 877.4134874498682, |
|
"learning_rate": 3.994804212627461e-09, |
|
"logits/chosen": -2.6024394035339355, |
|
"logits/rejected": -2.5662083625793457, |
|
"logps/chosen": -273.9478454589844, |
|
"logps/rejected": -229.1957550048828, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 4.258389472961426, |
|
"rewards/margins": 7.956662178039551, |
|
"rewards/rejected": -3.698272705078125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 416.60583937652194, |
|
"learning_rate": 1.0007038696262516e-09, |
|
"logits/chosen": -2.651128053665161, |
|
"logits/rejected": -2.610159397125244, |
|
"logps/chosen": -263.07269287109375, |
|
"logps/rejected": -230.61502075195312, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.250136375427246, |
|
"rewards/margins": 8.099352836608887, |
|
"rewards/rejected": -3.8492164611816406, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 678.8175373396961, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.6594204902648926, |
|
"logits/rejected": -2.5979819297790527, |
|
"logps/chosen": -250.8957977294922, |
|
"logps/rejected": -210.31497192382812, |
|
"loss": 0.4132, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 3.278926134109497, |
|
"rewards/margins": 7.683538913726807, |
|
"rewards/rejected": -4.4046125411987305, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 390, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4220164916454217, |
|
"train_runtime": 5868.9984, |
|
"train_samples_per_second": 8.519, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|