|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": -0.03749094158411026, |
|
"logits/rejected": 0.03204140067100525, |
|
"logps/chosen": -559.2154541015625, |
|
"logps/rejected": -890.8544921875, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -0.15300115942955017, |
|
"logits/rejected": -0.12813442945480347, |
|
"logps/chosen": -481.31634521484375, |
|
"logps/rejected": -804.3386840820312, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": -0.0013270878698676825, |
|
"rewards/margins": 0.0023776644375175238, |
|
"rewards/rejected": -0.0037047527730464935, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -0.17372946441173553, |
|
"logits/rejected": -0.10258068144321442, |
|
"logps/chosen": -623.6970825195312, |
|
"logps/rejected": -831.9564208984375, |
|
"loss": 0.1794, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0440058633685112, |
|
"rewards/margins": 0.03512907028198242, |
|
"rewards/rejected": -0.07913494110107422, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -0.19729416072368622, |
|
"logits/rejected": -0.2202361822128296, |
|
"logps/chosen": -677.2532958984375, |
|
"logps/rejected": -1107.8955078125, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1397601068019867, |
|
"rewards/margins": 0.1527976095676422, |
|
"rewards/rejected": -0.2925576865673065, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -0.19801445305347443, |
|
"logits/rejected": -0.20075838267803192, |
|
"logps/chosen": -686.9973754882812, |
|
"logps/rejected": -964.6900634765625, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13784293830394745, |
|
"rewards/margins": 0.11145637929439545, |
|
"rewards/rejected": -0.2492993324995041, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-06, |
|
"logits/chosen": -0.19628724455833435, |
|
"logits/rejected": -0.19985933601856232, |
|
"logps/chosen": -606.3834228515625, |
|
"logps/rejected": -982.189453125, |
|
"loss": 0.143, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13489742577075958, |
|
"rewards/margins": 0.14312751591205597, |
|
"rewards/rejected": -0.27802491188049316, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -0.18777325749397278, |
|
"logits/rejected": -0.2514544427394867, |
|
"logps/chosen": -679.7066650390625, |
|
"logps/rejected": -1182.11083984375, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.14588578045368195, |
|
"rewards/margins": 0.16957412660121918, |
|
"rewards/rejected": -0.3154599070549011, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7761938666470405e-06, |
|
"logits/chosen": -0.20718152821063995, |
|
"logits/rejected": -0.18668043613433838, |
|
"logps/chosen": -692.5406494140625, |
|
"logps/rejected": -1079.9908447265625, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1537138670682907, |
|
"rewards/margins": 0.16138955950737, |
|
"rewards/rejected": -0.3151034116744995, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -0.21971774101257324, |
|
"logits/rejected": -0.24234215915203094, |
|
"logps/chosen": -710.6842041015625, |
|
"logps/rejected": -1136.7525634765625, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.19244925677776337, |
|
"rewards/margins": 0.1675754338502884, |
|
"rewards/rejected": -0.36002466082572937, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4890613722044526e-06, |
|
"logits/chosen": -0.23076686263084412, |
|
"logits/rejected": -0.21980659663677216, |
|
"logps/chosen": -706.5121459960938, |
|
"logps/rejected": -1076.0596923828125, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.18025998771190643, |
|
"rewards/margins": 0.15869399905204773, |
|
"rewards/rejected": -0.33895397186279297, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -0.2438955307006836, |
|
"logits/rejected": -0.19910338521003723, |
|
"logps/chosen": -733.8079833984375, |
|
"logps/rejected": -1110.71923828125, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20329563319683075, |
|
"rewards/margins": 0.16255612671375275, |
|
"rewards/rejected": -0.3658517897129059, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.102189034962561e-06, |
|
"logits/chosen": -0.19663023948669434, |
|
"logits/rejected": -0.21354413032531738, |
|
"logps/chosen": -751.34765625, |
|
"logps/rejected": -1080.0113525390625, |
|
"loss": 0.1301, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1990022361278534, |
|
"rewards/margins": 0.15701577067375183, |
|
"rewards/rejected": -0.3560180068016052, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -0.1580425500869751, |
|
"logits/rejected": -0.20701774954795837, |
|
"logps/chosen": -649.1971435546875, |
|
"logps/rejected": -1110.748779296875, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.17047809064388275, |
|
"rewards/margins": 0.17829902470111847, |
|
"rewards/rejected": -0.34877708554267883, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-06, |
|
"logits/chosen": -0.1981876790523529, |
|
"logits/rejected": -0.22599034011363983, |
|
"logps/chosen": -736.1990966796875, |
|
"logps/rejected": -1140.4241943359375, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.1852777749300003, |
|
"rewards/margins": 0.19615033268928528, |
|
"rewards/rejected": -0.3814280927181244, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -0.22472605109214783, |
|
"logits/rejected": -0.20980677008628845, |
|
"logps/chosen": -727.8230590820312, |
|
"logps/rejected": -1080.5826416015625, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.19285574555397034, |
|
"rewards/margins": 0.16788846254348755, |
|
"rewards/rejected": -0.3607442080974579, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1108510153447352e-06, |
|
"logits/chosen": -0.18862155079841614, |
|
"logits/rejected": -0.18806660175323486, |
|
"logps/chosen": -744.17919921875, |
|
"logps/rejected": -1178.953369140625, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.196334570646286, |
|
"rewards/margins": 0.20167379081249237, |
|
"rewards/rejected": -0.39800840616226196, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -0.20716705918312073, |
|
"logits/rejected": -0.23998220264911652, |
|
"logps/chosen": -716.6298217773438, |
|
"logps/rejected": -1128.6080322265625, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.20008280873298645, |
|
"rewards/margins": 0.19826427102088928, |
|
"rewards/rejected": -0.39834707975387573, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.556095160739513e-06, |
|
"logits/chosen": -0.18807800114154816, |
|
"logits/rejected": -0.24145498871803284, |
|
"logps/chosen": -771.5186767578125, |
|
"logps/rejected": -1165.31787109375, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24131879210472107, |
|
"rewards/margins": 0.18717139959335327, |
|
"rewards/rejected": -0.42849016189575195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -0.22785386443138123, |
|
"logits/rejected": -0.24600060284137726, |
|
"logps/chosen": -828.021484375, |
|
"logps/rejected": -1258.8414306640625, |
|
"loss": 0.1148, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.26432016491889954, |
|
"rewards/margins": 0.2154586762189865, |
|
"rewards/rejected": -0.47977885603904724, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9985264605418185e-06, |
|
"logits/chosen": -0.19840948283672333, |
|
"logits/rejected": -0.22779376804828644, |
|
"logps/chosen": -748.4619140625, |
|
"logps/rejected": -1191.1099853515625, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.22014987468719482, |
|
"rewards/margins": 0.20830897986888885, |
|
"rewards/rejected": -0.4284588396549225, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -0.15184776484966278, |
|
"logits/rejected": -0.2133471965789795, |
|
"logps/chosen": -777.3941650390625, |
|
"logps/rejected": -1259.5482177734375, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.21874341368675232, |
|
"rewards/margins": 0.20576436817646027, |
|
"rewards/rejected": -0.4245077669620514, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.466103737583699e-06, |
|
"logits/chosen": -0.21760430932044983, |
|
"logits/rejected": -0.21733775734901428, |
|
"logps/chosen": -786.8435668945312, |
|
"logps/rejected": -1184.2471923828125, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.22768548130989075, |
|
"rewards/margins": 0.21592466533184052, |
|
"rewards/rejected": -0.44361013174057007, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -0.15853038430213928, |
|
"logits/rejected": -0.2094181478023529, |
|
"logps/chosen": -753.2785034179688, |
|
"logps/rejected": -1242.8369140625, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2066824734210968, |
|
"rewards/margins": 0.21286919713020325, |
|
"rewards/rejected": -0.41955167055130005, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-07, |
|
"logits/chosen": -0.1381937712430954, |
|
"logits/rejected": -0.23759886622428894, |
|
"logps/chosen": -732.6824951171875, |
|
"logps/rejected": -1177.4642333984375, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.19210098683834076, |
|
"rewards/margins": 0.2129323035478592, |
|
"rewards/rejected": -0.40503329038619995, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -0.18693767488002777, |
|
"logits/rejected": -0.19473309814929962, |
|
"logps/chosen": -755.0777587890625, |
|
"logps/rejected": -1212.91064453125, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.21374483406543732, |
|
"rewards/margins": 0.2292633354663849, |
|
"rewards/rejected": -0.4430081248283386, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049723e-07, |
|
"logits/chosen": -0.24827036261558533, |
|
"logits/rejected": -0.20749957859516144, |
|
"logps/chosen": -715.92333984375, |
|
"logps/rejected": -1144.6124267578125, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.20713326334953308, |
|
"rewards/margins": 0.18099360167980194, |
|
"rewards/rejected": -0.38812685012817383, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -0.19055083394050598, |
|
"logits/rejected": -0.2412928342819214, |
|
"logps/chosen": -693.1292724609375, |
|
"logps/rejected": -1136.007080078125, |
|
"loss": 0.1117, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.1922534853219986, |
|
"rewards/margins": 0.17840158939361572, |
|
"rewards/rejected": -0.3706550598144531, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"logits/chosen": -0.15525199472904205, |
|
"logits/rejected": -0.16159489750862122, |
|
"logps/chosen": -790.9383544921875, |
|
"logps/rejected": -1267.6949462890625, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.21919909119606018, |
|
"rewards/margins": 0.19449128210544586, |
|
"rewards/rejected": -0.41369038820266724, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -0.19185583293437958, |
|
"logits/rejected": -0.1721155345439911, |
|
"logps/chosen": -745.1690673828125, |
|
"logps/rejected": -1151.0634765625, |
|
"loss": 0.1118, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.20249255001544952, |
|
"rewards/margins": 0.19131307303905487, |
|
"rewards/rejected": -0.3938056528568268, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-08, |
|
"logits/chosen": -0.18523597717285156, |
|
"logits/rejected": -0.20255737006664276, |
|
"logps/chosen": -761.7174072265625, |
|
"logps/rejected": -1124.2659912109375, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.20564845204353333, |
|
"rewards/margins": 0.20229394733905792, |
|
"rewards/rejected": -0.40794238448143005, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -0.24068090319633484, |
|
"logits/rejected": -0.1731335073709488, |
|
"logps/chosen": -718.3553466796875, |
|
"logps/rejected": -1177.1654052734375, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.21485304832458496, |
|
"rewards/margins": 0.19611014425754547, |
|
"rewards/rejected": -0.41096314787864685, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336734e-10, |
|
"logits/chosen": -0.1374652087688446, |
|
"logits/rejected": -0.15347729623317719, |
|
"logps/chosen": -747.0050048828125, |
|
"logps/rejected": -1133.5335693359375, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.199618861079216, |
|
"rewards/margins": 0.20002660155296326, |
|
"rewards/rejected": -0.39964547753334045, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.004328498234733557, |
|
"train_runtime": 430.9957, |
|
"train_samples_per_second": 46.404, |
|
"train_steps_per_second": 0.724 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|