|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -1.381319522857666, |
|
"logits/rejected": -0.9757366180419922, |
|
"logps/chosen": -223.25863647460938, |
|
"logps/rejected": -830.5400390625, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -1.736572504043579, |
|
"logits/rejected": -1.0549728870391846, |
|
"logps/chosen": -406.9079284667969, |
|
"logps/rejected": -761.596435546875, |
|
"loss": 0.1822, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.00039627417572773993, |
|
"rewards/margins": 0.000484730233438313, |
|
"rewards/rejected": -8.845605771057308e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.6399459838867188, |
|
"logits/rejected": -1.0379071235656738, |
|
"logps/chosen": -483.6226501464844, |
|
"logps/rejected": -819.0009765625, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 3.848170308629051e-05, |
|
"rewards/margins": 0.00036858199746347964, |
|
"rewards/rejected": -0.00033010030165314674, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.7753417491912842, |
|
"logits/rejected": -1.3355859518051147, |
|
"logps/chosen": -443.94390869140625, |
|
"logps/rejected": -788.3363647460938, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0017691084649413824, |
|
"rewards/margins": 0.0024432786740362644, |
|
"rewards/rejected": -0.0006741699180565774, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.5635123252868652, |
|
"logits/rejected": -0.9124569892883301, |
|
"logps/chosen": -458.33428955078125, |
|
"logps/rejected": -747.6420288085938, |
|
"loss": 0.2195, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.004033350385725498, |
|
"rewards/margins": 0.006722611375153065, |
|
"rewards/rejected": -0.0026892595924437046, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.631588339805603, |
|
"logits/rejected": -0.8681947588920593, |
|
"logps/chosen": -465.05731201171875, |
|
"logps/rejected": -838.4075927734375, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.008273603394627571, |
|
"rewards/margins": 0.015597726218402386, |
|
"rewards/rejected": -0.0073241242207586765, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.4628058671951294, |
|
"logits/rejected": -1.2347371578216553, |
|
"logps/chosen": -343.9599304199219, |
|
"logps/rejected": -739.0056762695312, |
|
"loss": 0.1761, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0036266068927943707, |
|
"rewards/margins": 0.022512439638376236, |
|
"rewards/rejected": -0.01888582855463028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.8094412088394165, |
|
"logits/rejected": -0.9877569079399109, |
|
"logps/chosen": -497.0489807128906, |
|
"logps/rejected": -864.1619262695312, |
|
"loss": 0.1809, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.003260440658777952, |
|
"rewards/margins": 0.046338800340890884, |
|
"rewards/rejected": -0.04307835176587105, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.6897491216659546, |
|
"logits/rejected": -1.0848586559295654, |
|
"logps/chosen": -560.68017578125, |
|
"logps/rejected": -1089.6458740234375, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.006865059025585651, |
|
"rewards/margins": 0.08565281331539154, |
|
"rewards/rejected": -0.09251787513494492, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.7690013647079468, |
|
"logits/rejected": -0.9375957250595093, |
|
"logps/chosen": -427.4967346191406, |
|
"logps/rejected": -953.2610473632812, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.010322836227715015, |
|
"rewards/margins": 0.07507754862308502, |
|
"rewards/rejected": -0.08540038764476776, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.4934628009796143, |
|
"logits/rejected": -0.9881563186645508, |
|
"logps/chosen": -397.26727294921875, |
|
"logps/rejected": -905.0123901367188, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.01196499913930893, |
|
"rewards/margins": 0.11481380462646484, |
|
"rewards/rejected": -0.12677881121635437, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.559560775756836, |
|
"logits/rejected": -0.9702051877975464, |
|
"logps/chosen": -446.76849365234375, |
|
"logps/rejected": -964.1668090820312, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.029861677438020706, |
|
"rewards/margins": 0.14626939594745636, |
|
"rewards/rejected": -0.17613105475902557, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.7105035781860352, |
|
"logits/rejected": -0.9925721287727356, |
|
"logps/chosen": -542.6316528320312, |
|
"logps/rejected": -977.3997192382812, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06414582580327988, |
|
"rewards/margins": 0.1390438973903656, |
|
"rewards/rejected": -0.2031897008419037, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -1.7129449844360352, |
|
"logits/rejected": -0.9808734655380249, |
|
"logps/chosen": -639.7268676757812, |
|
"logps/rejected": -1264.408203125, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1564100980758667, |
|
"rewards/margins": 0.22008244693279266, |
|
"rewards/rejected": -0.37649255990982056, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.4957599639892578, |
|
"logits/rejected": -0.9900957345962524, |
|
"logps/chosen": -606.5774536132812, |
|
"logps/rejected": -1158.996826171875, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1333167403936386, |
|
"rewards/margins": 0.18605293333530426, |
|
"rewards/rejected": -0.31936967372894287, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.7749484777450562, |
|
"logits/rejected": -1.1498210430145264, |
|
"logps/chosen": -588.8472900390625, |
|
"logps/rejected": -1247.8353271484375, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15223875641822815, |
|
"rewards/margins": 0.2504242956638336, |
|
"rewards/rejected": -0.40266305208206177, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.4668447971343994, |
|
"logits/rejected": -0.9629266858100891, |
|
"logps/chosen": -740.5608520507812, |
|
"logps/rejected": -1320.8753662109375, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22708892822265625, |
|
"rewards/margins": 0.23111894726753235, |
|
"rewards/rejected": -0.458207905292511, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.5278871059417725, |
|
"logits/rejected": -1.1116211414337158, |
|
"logps/chosen": -571.50390625, |
|
"logps/rejected": -1168.722412109375, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1509171426296234, |
|
"rewards/margins": 0.27128082513809204, |
|
"rewards/rejected": -0.4221979081630707, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.6129930019378662, |
|
"logits/rejected": -1.0707186460494995, |
|
"logps/chosen": -591.6637573242188, |
|
"logps/rejected": -1284.7354736328125, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12449514865875244, |
|
"rewards/margins": 0.2544993758201599, |
|
"rewards/rejected": -0.37899452447891235, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -1.6850178241729736, |
|
"logits/rejected": -1.1943457126617432, |
|
"logps/chosen": -514.7299194335938, |
|
"logps/rejected": -1000.5671997070312, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09646569192409515, |
|
"rewards/margins": 0.17868806421756744, |
|
"rewards/rejected": -0.2751538157463074, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.5830456018447876, |
|
"logits/rejected": -1.097068428993225, |
|
"logps/chosen": -658.3897705078125, |
|
"logps/rejected": -1211.879150390625, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16921699047088623, |
|
"rewards/margins": 0.20347478985786438, |
|
"rewards/rejected": -0.3726917505264282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.765363097190857, |
|
"logits/rejected": -0.8959721326828003, |
|
"logps/chosen": -716.1063842773438, |
|
"logps/rejected": -1217.0675048828125, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21076449751853943, |
|
"rewards/margins": 0.23035843670368195, |
|
"rewards/rejected": -0.4411229193210602, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -1.4971026182174683, |
|
"logits/rejected": -1.0308849811553955, |
|
"logps/chosen": -635.874267578125, |
|
"logps/rejected": -1254.032470703125, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1963960826396942, |
|
"rewards/margins": 0.2612842321395874, |
|
"rewards/rejected": -0.4576803147792816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -1.3114674091339111, |
|
"logits/rejected": -1.1226143836975098, |
|
"logps/chosen": -669.163818359375, |
|
"logps/rejected": -1447.716064453125, |
|
"loss": 0.083, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2657012939453125, |
|
"rewards/margins": 0.3253920078277588, |
|
"rewards/rejected": -0.5910933613777161, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.5960338115692139, |
|
"logits/rejected": -0.8448678255081177, |
|
"logps/chosen": -783.5925903320312, |
|
"logps/rejected": -1367.287841796875, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2770916819572449, |
|
"rewards/margins": 0.2540797293186188, |
|
"rewards/rejected": -0.531171441078186, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.6775119304656982, |
|
"logits/rejected": -1.2753263711929321, |
|
"logps/chosen": -694.5948486328125, |
|
"logps/rejected": -1378.860107421875, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2328498661518097, |
|
"rewards/margins": 0.26127415895462036, |
|
"rewards/rejected": -0.49412399530410767, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.4416381120681763, |
|
"logits/rejected": -0.9755349159240723, |
|
"logps/chosen": -681.4529418945312, |
|
"logps/rejected": -1261.5875244140625, |
|
"loss": 0.1169, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18144458532333374, |
|
"rewards/margins": 0.2220906764268875, |
|
"rewards/rejected": -0.40353527665138245, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.776125192642212, |
|
"logits/rejected": -1.1443500518798828, |
|
"logps/chosen": -665.0440673828125, |
|
"logps/rejected": -1173.3468017578125, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19433431327342987, |
|
"rewards/margins": 0.21727688610553741, |
|
"rewards/rejected": -0.4116111695766449, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -1.6408843994140625, |
|
"logits/rejected": -1.2362545728683472, |
|
"logps/chosen": -652.2579956054688, |
|
"logps/rejected": -1271.698974609375, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1795671433210373, |
|
"rewards/margins": 0.24397559463977814, |
|
"rewards/rejected": -0.4235427975654602, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -1.8528718948364258, |
|
"logits/rejected": -1.1004583835601807, |
|
"logps/chosen": -762.6512451171875, |
|
"logps/rejected": -1343.5460205078125, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.226405531167984, |
|
"rewards/margins": 0.2510288953781128, |
|
"rewards/rejected": -0.4774344861507416, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.7013801336288452, |
|
"logits/rejected": -1.2125957012176514, |
|
"logps/chosen": -613.29345703125, |
|
"logps/rejected": -1406.8970947265625, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17005790770053864, |
|
"rewards/margins": 0.32780343294143677, |
|
"rewards/rejected": -0.4978613257408142, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.415290117263794, |
|
"logits/rejected": -0.9908515810966492, |
|
"logps/chosen": -712.7332763671875, |
|
"logps/rejected": -1258.039306640625, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25694146752357483, |
|
"rewards/margins": 0.21659043431282043, |
|
"rewards/rejected": -0.4735318720340729, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -1.5041309595108032, |
|
"logits/rejected": -1.0038108825683594, |
|
"logps/chosen": -661.9385986328125, |
|
"logps/rejected": -1160.186767578125, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21888642013072968, |
|
"rewards/margins": 0.21484248340129852, |
|
"rewards/rejected": -0.4337288737297058, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.6438214778900146, |
|
"logits/rejected": -1.0989625453948975, |
|
"logps/chosen": -537.155517578125, |
|
"logps/rejected": -1078.1251220703125, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11467760801315308, |
|
"rewards/margins": 0.25429314374923706, |
|
"rewards/rejected": -0.36897072196006775, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.7438217401504517, |
|
"logits/rejected": -1.0444936752319336, |
|
"logps/chosen": -644.1298828125, |
|
"logps/rejected": -1348.093505859375, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19465377926826477, |
|
"rewards/margins": 0.32639193534851074, |
|
"rewards/rejected": -0.5210457444190979, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.7205537557601929, |
|
"logits/rejected": -1.1176466941833496, |
|
"logps/chosen": -653.7063598632812, |
|
"logps/rejected": -1317.8218994140625, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15995605289936066, |
|
"rewards/margins": 0.2710942327976227, |
|
"rewards/rejected": -0.43105024099349976, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.8885447978973389, |
|
"logits/rejected": -1.4283367395401, |
|
"logps/chosen": -435.2276916503906, |
|
"logps/rejected": -1041.008056640625, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05559268593788147, |
|
"rewards/margins": 0.2262849360704422, |
|
"rewards/rejected": -0.2818776071071625, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.7745654582977295, |
|
"logits/rejected": -1.2009865045547485, |
|
"logps/chosen": -606.5958251953125, |
|
"logps/rejected": -1145.1015625, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11594484001398087, |
|
"rewards/margins": 0.24505428969860077, |
|
"rewards/rejected": -0.36099910736083984, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.6557433605194092, |
|
"logits/rejected": -1.1534380912780762, |
|
"logps/chosen": -643.7879028320312, |
|
"logps/rejected": -1236.194091796875, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16532504558563232, |
|
"rewards/margins": 0.24116845428943634, |
|
"rewards/rejected": -0.40649348497390747, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -1.6974895000457764, |
|
"logits/rejected": -1.2435563802719116, |
|
"logps/chosen": -619.0020751953125, |
|
"logps/rejected": -1253.426513671875, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11514924466609955, |
|
"rewards/margins": 0.26920756697654724, |
|
"rewards/rejected": -0.3843567967414856, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.6725631952285767, |
|
"logits/rejected": -0.9952858686447144, |
|
"logps/chosen": -683.99072265625, |
|
"logps/rejected": -1263.6319580078125, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22003349661827087, |
|
"rewards/margins": 0.21788537502288818, |
|
"rewards/rejected": -0.4379189610481262, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -1.6918662786483765, |
|
"logits/rejected": -0.9807602167129517, |
|
"logps/chosen": -646.4641723632812, |
|
"logps/rejected": -1105.5599365234375, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13202176988124847, |
|
"rewards/margins": 0.22680577635765076, |
|
"rewards/rejected": -0.35882753133773804, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.4633402824401855, |
|
"logits/rejected": -0.8964066505432129, |
|
"logps/chosen": -623.4297485351562, |
|
"logps/rejected": -1171.818359375, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17750394344329834, |
|
"rewards/margins": 0.23941746354103088, |
|
"rewards/rejected": -0.4169214367866516, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -1.7313741445541382, |
|
"logits/rejected": -1.088205099105835, |
|
"logps/chosen": -618.5508422851562, |
|
"logps/rejected": -1335.5269775390625, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15107488632202148, |
|
"rewards/margins": 0.30174392461776733, |
|
"rewards/rejected": -0.4528188109397888, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -1.6465423107147217, |
|
"logits/rejected": -1.1094882488250732, |
|
"logps/chosen": -621.9539794921875, |
|
"logps/rejected": -1221.4598388671875, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15349408984184265, |
|
"rewards/margins": 0.25984710454940796, |
|
"rewards/rejected": -0.4133411943912506, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.8044379949569702, |
|
"logits/rejected": -1.1311860084533691, |
|
"logps/chosen": -721.5858764648438, |
|
"logps/rejected": -1256.863037109375, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21946442127227783, |
|
"rewards/margins": 0.22845225036144257, |
|
"rewards/rejected": -0.4479166567325592, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -1.76088547706604, |
|
"logits/rejected": -1.2385786771774292, |
|
"logps/chosen": -657.9778442382812, |
|
"logps/rejected": -1414.336669921875, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16792455315589905, |
|
"rewards/margins": 0.3237282633781433, |
|
"rewards/rejected": -0.49165281653404236, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.5809530019760132, |
|
"logits/rejected": -1.1684823036193848, |
|
"logps/chosen": -623.2067260742188, |
|
"logps/rejected": -1329.0098876953125, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15883824229240417, |
|
"rewards/margins": 0.3003080189228058, |
|
"rewards/rejected": -0.45914632081985474, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -1.5603920221328735, |
|
"logits/rejected": -1.0103719234466553, |
|
"logps/chosen": -745.253173828125, |
|
"logps/rejected": -1386.312744140625, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2308214157819748, |
|
"rewards/margins": 0.3027498126029968, |
|
"rewards/rejected": -0.5335712432861328, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -1.7204630374908447, |
|
"logits/rejected": -1.0981186628341675, |
|
"logps/chosen": -663.6007080078125, |
|
"logps/rejected": -1271.954833984375, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.23924314975738525, |
|
"rewards/margins": 0.3018389344215393, |
|
"rewards/rejected": -0.5410820841789246, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.6816179752349854, |
|
"logits/rejected": -1.2458436489105225, |
|
"logps/chosen": -812.2794189453125, |
|
"logps/rejected": -1452.4508056640625, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3606022000312805, |
|
"rewards/margins": 0.2518552541732788, |
|
"rewards/rejected": -0.6124575138092041, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.675920844078064, |
|
"logits/rejected": -1.2157505750656128, |
|
"logps/chosen": -622.9227294921875, |
|
"logps/rejected": -1268.478271484375, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23038606345653534, |
|
"rewards/margins": 0.280417799949646, |
|
"rewards/rejected": -0.5108038783073425, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.6523020267486572, |
|
"logits/rejected": -1.1194841861724854, |
|
"logps/chosen": -536.9530029296875, |
|
"logps/rejected": -1284.30517578125, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15182599425315857, |
|
"rewards/margins": 0.308131605386734, |
|
"rewards/rejected": -0.4599575400352478, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.5846920013427734, |
|
"logits/rejected": -1.2534643411636353, |
|
"logps/chosen": -688.9484252929688, |
|
"logps/rejected": -1183.958984375, |
|
"loss": 0.1024, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21223464608192444, |
|
"rewards/margins": 0.19630616903305054, |
|
"rewards/rejected": -0.408540815114975, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.7657943964004517, |
|
"logits/rejected": -1.2593281269073486, |
|
"logps/chosen": -591.6409912109375, |
|
"logps/rejected": -1163.0576171875, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17924083769321442, |
|
"rewards/margins": 0.2396513670682907, |
|
"rewards/rejected": -0.4188922345638275, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.7614797353744507, |
|
"logits/rejected": -1.300492286682129, |
|
"logps/chosen": -588.6466064453125, |
|
"logps/rejected": -1244.7601318359375, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16072975099086761, |
|
"rewards/margins": 0.29418593645095825, |
|
"rewards/rejected": -0.4549156725406647, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -1.5666195154190063, |
|
"logits/rejected": -0.924557089805603, |
|
"logps/chosen": -734.8566284179688, |
|
"logps/rejected": -1351.8369140625, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.236628919839859, |
|
"rewards/margins": 0.24980910122394562, |
|
"rewards/rejected": -0.48643797636032104, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.5746439695358276, |
|
"logits/rejected": -1.0514501333236694, |
|
"logps/chosen": -673.4780883789062, |
|
"logps/rejected": -1367.333740234375, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23988430202007294, |
|
"rewards/margins": 0.26904696226119995, |
|
"rewards/rejected": -0.5089312791824341, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.5549921989440918, |
|
"logits/rejected": -0.825292706489563, |
|
"logps/chosen": -724.3428955078125, |
|
"logps/rejected": -1314.2396240234375, |
|
"loss": 0.1148, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2795710563659668, |
|
"rewards/margins": 0.24751707911491394, |
|
"rewards/rejected": -0.5270881652832031, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.4858559370040894, |
|
"logits/rejected": -1.164233922958374, |
|
"logps/chosen": -720.483642578125, |
|
"logps/rejected": -1470.8189697265625, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.28550735116004944, |
|
"rewards/margins": 0.27395910024642944, |
|
"rewards/rejected": -0.5594664812088013, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.7503721714019775, |
|
"logits/rejected": -1.0189541578292847, |
|
"logps/chosen": -725.6884765625, |
|
"logps/rejected": -1330.4554443359375, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19357889890670776, |
|
"rewards/margins": 0.26617223024368286, |
|
"rewards/rejected": -0.4597511887550354, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.7605764865875244, |
|
"logits/rejected": -0.9754387140274048, |
|
"logps/chosen": -736.2041625976562, |
|
"logps/rejected": -1220.272216796875, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16418889164924622, |
|
"rewards/margins": 0.2352021038532257, |
|
"rewards/rejected": -0.3993909955024719, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.6696975231170654, |
|
"logits/rejected": -1.1732470989227295, |
|
"logps/chosen": -546.5994262695312, |
|
"logps/rejected": -1076.2138671875, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17854368686676025, |
|
"rewards/margins": 0.22630243003368378, |
|
"rewards/rejected": -0.40484610199928284, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.6130393743515015, |
|
"logits/rejected": -0.9944950342178345, |
|
"logps/chosen": -607.0363159179688, |
|
"logps/rejected": -1301.4781494140625, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19457684457302094, |
|
"rewards/margins": 0.2787570357322693, |
|
"rewards/rejected": -0.47333383560180664, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.6456743478775024, |
|
"logits/rejected": -1.2997629642486572, |
|
"logps/chosen": -683.3946533203125, |
|
"logps/rejected": -1356.107177734375, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2882430851459503, |
|
"rewards/margins": 0.2655082941055298, |
|
"rewards/rejected": -0.5537513494491577, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.406205654144287, |
|
"logits/rejected": -1.0480941534042358, |
|
"logps/chosen": -837.6101684570312, |
|
"logps/rejected": -1426.9271240234375, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3547836244106293, |
|
"rewards/margins": 0.24995502829551697, |
|
"rewards/rejected": -0.6047386527061462, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.6352602243423462, |
|
"logits/rejected": -1.0264801979064941, |
|
"logps/chosen": -687.4603271484375, |
|
"logps/rejected": -1298.3892822265625, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.25935202836990356, |
|
"rewards/margins": 0.28063350915908813, |
|
"rewards/rejected": -0.5399855375289917, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.652361512184143, |
|
"logits/rejected": -1.3324909210205078, |
|
"logps/chosen": -703.0999755859375, |
|
"logps/rejected": -1310.7120361328125, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2579403519630432, |
|
"rewards/margins": 0.26742976903915405, |
|
"rewards/rejected": -0.525370180606842, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.8630950450897217, |
|
"logits/rejected": -1.117851734161377, |
|
"logps/chosen": -572.1090087890625, |
|
"logps/rejected": -1142.517822265625, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13034145534038544, |
|
"rewards/margins": 0.2702890932559967, |
|
"rewards/rejected": -0.40063056349754333, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.5307347774505615, |
|
"logits/rejected": -1.1395881175994873, |
|
"logps/chosen": -570.9049072265625, |
|
"logps/rejected": -1325.3486328125, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1899944543838501, |
|
"rewards/margins": 0.31518790125846863, |
|
"rewards/rejected": -0.5051823854446411, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.6429624557495117, |
|
"logits/rejected": -0.952468991279602, |
|
"logps/chosen": -504.73321533203125, |
|
"logps/rejected": -1041.1776123046875, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08391048014163971, |
|
"rewards/margins": 0.24313923716545105, |
|
"rewards/rejected": -0.3270496726036072, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.8941549062728882, |
|
"logits/rejected": -1.1734158992767334, |
|
"logps/chosen": -530.647216796875, |
|
"logps/rejected": -1243.784912109375, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03668345510959625, |
|
"rewards/margins": 0.30672526359558105, |
|
"rewards/rejected": -0.3434087336063385, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.6430208683013916, |
|
"logits/rejected": -1.1618727445602417, |
|
"logps/chosen": -664.0516357421875, |
|
"logps/rejected": -1358.1705322265625, |
|
"loss": 0.064, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1712174415588379, |
|
"rewards/margins": 0.27873173356056213, |
|
"rewards/rejected": -0.44994911551475525, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.5911110639572144, |
|
"logits/rejected": -0.9319060444831848, |
|
"logps/chosen": -551.3786010742188, |
|
"logps/rejected": -1062.9332275390625, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13672541081905365, |
|
"rewards/margins": 0.23177051544189453, |
|
"rewards/rejected": -0.368495911359787, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.3883923292160034, |
|
"logits/rejected": -0.8421472311019897, |
|
"logps/chosen": -622.4197387695312, |
|
"logps/rejected": -1147.806884765625, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19367149472236633, |
|
"rewards/margins": 0.24927671253681183, |
|
"rewards/rejected": -0.44294825196266174, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.6589374542236328, |
|
"logits/rejected": -1.0507800579071045, |
|
"logps/chosen": -580.9486083984375, |
|
"logps/rejected": -1298.055908203125, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1833840161561966, |
|
"rewards/margins": 0.29099351167678833, |
|
"rewards/rejected": -0.4743775427341461, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.9193570613861084, |
|
"logits/rejected": -1.29689359664917, |
|
"logps/chosen": -593.7999267578125, |
|
"logps/rejected": -1074.827880859375, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13971452414989471, |
|
"rewards/margins": 0.2339230477809906, |
|
"rewards/rejected": -0.3736375570297241, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.7203441858291626, |
|
"logits/rejected": -1.0238596200942993, |
|
"logps/chosen": -575.1504516601562, |
|
"logps/rejected": -1223.0185546875, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1228102445602417, |
|
"rewards/margins": 0.28532546758651733, |
|
"rewards/rejected": -0.4081357419490814, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.5191177129745483, |
|
"logits/rejected": -1.0029339790344238, |
|
"logps/chosen": -579.9059448242188, |
|
"logps/rejected": -1316.46142578125, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1901233047246933, |
|
"rewards/margins": 0.3058861494064331, |
|
"rewards/rejected": -0.4960094392299652, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.5836925506591797, |
|
"logits/rejected": -0.9203447103500366, |
|
"logps/chosen": -776.8148803710938, |
|
"logps/rejected": -1455.188720703125, |
|
"loss": 0.0819, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23546621203422546, |
|
"rewards/margins": 0.2814113199710846, |
|
"rewards/rejected": -0.5168775916099548, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.6255607604980469, |
|
"logits/rejected": -1.1398379802703857, |
|
"logps/chosen": -536.3740234375, |
|
"logps/rejected": -1251.212646484375, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11835892498493195, |
|
"rewards/margins": 0.3064490854740143, |
|
"rewards/rejected": -0.4248080849647522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.5587496757507324, |
|
"logits/rejected": -1.0958257913589478, |
|
"logps/chosen": -577.5291748046875, |
|
"logps/rejected": -1253.976806640625, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15895147621631622, |
|
"rewards/margins": 0.29505571722984314, |
|
"rewards/rejected": -0.45400720834732056, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.5678253173828125, |
|
"logits/rejected": -1.0112650394439697, |
|
"logps/chosen": -582.6275634765625, |
|
"logps/rejected": -1258.8802490234375, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16134771704673767, |
|
"rewards/margins": 0.3210769593715668, |
|
"rewards/rejected": -0.48242464661598206, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.5958459377288818, |
|
"logits/rejected": -1.0422935485839844, |
|
"logps/chosen": -604.3923950195312, |
|
"logps/rejected": -1268.2470703125, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17159458994865417, |
|
"rewards/margins": 0.29530078172683716, |
|
"rewards/rejected": -0.4668954014778137, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.6635258197784424, |
|
"logits/rejected": -1.1757241487503052, |
|
"logps/chosen": -576.9772338867188, |
|
"logps/rejected": -1184.485595703125, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10074315965175629, |
|
"rewards/margins": 0.25036129355430603, |
|
"rewards/rejected": -0.3511044681072235, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.4811336994171143, |
|
"logits/rejected": -0.9249873161315918, |
|
"logps/chosen": -593.8721313476562, |
|
"logps/rejected": -1355.8583984375, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1529252678155899, |
|
"rewards/margins": 0.3431011736392975, |
|
"rewards/rejected": -0.4960264265537262, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.5341050624847412, |
|
"logits/rejected": -0.9583051800727844, |
|
"logps/chosen": -680.0247802734375, |
|
"logps/rejected": -1330.1800537109375, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18310308456420898, |
|
"rewards/margins": 0.31003543734550476, |
|
"rewards/rejected": -0.49313855171203613, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.3542709350585938, |
|
"logits/rejected": -0.9462020993232727, |
|
"logps/chosen": -521.2957763671875, |
|
"logps/rejected": -1280.841552734375, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12296704202890396, |
|
"rewards/margins": 0.3202964663505554, |
|
"rewards/rejected": -0.44326353073120117, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.4632813930511475, |
|
"logits/rejected": -1.0232326984405518, |
|
"logps/chosen": -572.9237060546875, |
|
"logps/rejected": -1308.82666015625, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.11455889046192169, |
|
"rewards/margins": 0.3093631863594055, |
|
"rewards/rejected": -0.423922061920166, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.8567373752593994, |
|
"logits/rejected": -0.8586881756782532, |
|
"logps/chosen": -683.0592651367188, |
|
"logps/rejected": -1194.775634765625, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15534023940563202, |
|
"rewards/margins": 0.24916231632232666, |
|
"rewards/rejected": -0.4045025706291199, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.5189939737319946, |
|
"logits/rejected": -1.0900559425354004, |
|
"logps/chosen": -617.9810791015625, |
|
"logps/rejected": -1228.6695556640625, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16101650893688202, |
|
"rewards/margins": 0.2594471573829651, |
|
"rewards/rejected": -0.4204636514186859, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.609279990196228, |
|
"logits/rejected": -0.9329544901847839, |
|
"logps/chosen": -649.111328125, |
|
"logps/rejected": -1244.6878662109375, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1598513424396515, |
|
"rewards/margins": 0.2521916627883911, |
|
"rewards/rejected": -0.4120430052280426, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.5942234992980957, |
|
"logits/rejected": -0.9433167576789856, |
|
"logps/chosen": -600.16796875, |
|
"logps/rejected": -1137.0755615234375, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1486530601978302, |
|
"rewards/margins": 0.2408868372440338, |
|
"rewards/rejected": -0.389539897441864, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.757784128189087, |
|
"logits/rejected": -0.9788764715194702, |
|
"logps/chosen": -526.8590087890625, |
|
"logps/rejected": -1164.993408203125, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06887698173522949, |
|
"rewards/margins": 0.3250022530555725, |
|
"rewards/rejected": -0.3938792049884796, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.695051908493042, |
|
"logits/rejected": -1.0422875881195068, |
|
"logps/chosen": -673.8726806640625, |
|
"logps/rejected": -1251.0634765625, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1298268437385559, |
|
"rewards/margins": 0.2638325095176697, |
|
"rewards/rejected": -0.3936593532562256, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.817731261253357, |
|
"logits/rejected": -1.1714346408843994, |
|
"logps/chosen": -632.3103637695312, |
|
"logps/rejected": -1204.101806640625, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08371297270059586, |
|
"rewards/margins": 0.2807037830352783, |
|
"rewards/rejected": -0.36441677808761597, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.6917314529418945, |
|
"logits/rejected": -1.2975494861602783, |
|
"logps/chosen": -501.3326110839844, |
|
"logps/rejected": -1110.889404296875, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.010560419410467148, |
|
"rewards/margins": 0.2687085270881653, |
|
"rewards/rejected": -0.27926892042160034, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.681670904159546, |
|
"logits/rejected": -1.0678811073303223, |
|
"logps/chosen": -548.2903442382812, |
|
"logps/rejected": -1203.9267578125, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08060415089130402, |
|
"rewards/margins": 0.297444224357605, |
|
"rewards/rejected": -0.3780483603477478, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.469982385635376, |
|
"logits/rejected": -1.1716678142547607, |
|
"logps/chosen": -675.4676513671875, |
|
"logps/rejected": -1349.0924072265625, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18839700520038605, |
|
"rewards/margins": 0.28368327021598816, |
|
"rewards/rejected": -0.4720802903175354, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.589691162109375, |
|
"logits/rejected": -1.0157541036605835, |
|
"logps/chosen": -608.4013061523438, |
|
"logps/rejected": -1338.4168701171875, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15417249500751495, |
|
"rewards/margins": 0.30883660912513733, |
|
"rewards/rejected": -0.4630090594291687, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.6860065460205078, |
|
"logits/rejected": -1.0934185981750488, |
|
"logps/chosen": -706.9453735351562, |
|
"logps/rejected": -1332.388916015625, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2328319251537323, |
|
"rewards/margins": 0.26258260011672974, |
|
"rewards/rejected": -0.49541449546813965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.433650016784668, |
|
"logits/rejected": -1.1477049589157104, |
|
"logps/chosen": -688.2756958007812, |
|
"logps/rejected": -1245.33447265625, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22389158606529236, |
|
"rewards/margins": 0.24267525970935822, |
|
"rewards/rejected": -0.46656686067581177, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.5687224864959717, |
|
"logits/rejected": -0.853603720664978, |
|
"logps/chosen": -791.3260498046875, |
|
"logps/rejected": -1367.469482421875, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21331918239593506, |
|
"rewards/margins": 0.2616121768951416, |
|
"rewards/rejected": -0.4749313294887543, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.7121455669403076, |
|
"logits/rejected": -0.8818023800849915, |
|
"logps/chosen": -657.982177734375, |
|
"logps/rejected": -1258.4931640625, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12182845175266266, |
|
"rewards/margins": 0.31102484464645386, |
|
"rewards/rejected": -0.4328532814979553, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.4799646139144897, |
|
"logits/rejected": -1.1651620864868164, |
|
"logps/chosen": -479.6021423339844, |
|
"logps/rejected": -1181.6322021484375, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09868054836988449, |
|
"rewards/margins": 0.26966673135757446, |
|
"rewards/rejected": -0.36834731698036194, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.827845573425293, |
|
"logits/rejected": -1.0856597423553467, |
|
"logps/chosen": -625.0631103515625, |
|
"logps/rejected": -1238.838134765625, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1401088982820511, |
|
"rewards/margins": 0.30787450075149536, |
|
"rewards/rejected": -0.44798341393470764, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.5721492767333984, |
|
"logits/rejected": -0.8746267557144165, |
|
"logps/chosen": -652.6207275390625, |
|
"logps/rejected": -1226.645751953125, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17216768860816956, |
|
"rewards/margins": 0.27679505944252014, |
|
"rewards/rejected": -0.4489627778530121, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.6152639389038086, |
|
"logits/rejected": -1.128306269645691, |
|
"logps/chosen": -588.0072021484375, |
|
"logps/rejected": -1203.587890625, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15817685425281525, |
|
"rewards/margins": 0.2696138024330139, |
|
"rewards/rejected": -0.42779064178466797, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.3238633871078491, |
|
"logits/rejected": -0.7138497233390808, |
|
"logps/chosen": -601.8871459960938, |
|
"logps/rejected": -1312.838623046875, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15420369803905487, |
|
"rewards/margins": 0.3349114656448364, |
|
"rewards/rejected": -0.4891151785850525, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.2504911422729492, |
|
"logits/rejected": -0.7397804856300354, |
|
"logps/chosen": -631.8212890625, |
|
"logps/rejected": -1243.5828857421875, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19839642941951752, |
|
"rewards/margins": 0.29437923431396484, |
|
"rewards/rejected": -0.49277567863464355, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.809372901916504, |
|
"logits/rejected": -0.8999295234680176, |
|
"logps/chosen": -633.7000122070312, |
|
"logps/rejected": -1235.184814453125, |
|
"loss": 0.0684, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1565568596124649, |
|
"rewards/margins": 0.30287352204322815, |
|
"rewards/rejected": -0.45943036675453186, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.5753552913665771, |
|
"logits/rejected": -0.8743413090705872, |
|
"logps/chosen": -684.4049072265625, |
|
"logps/rejected": -1305.688232421875, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17457641661167145, |
|
"rewards/margins": 0.2981758415699005, |
|
"rewards/rejected": -0.47275224328041077, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.5101526975631714, |
|
"logits/rejected": -0.977883517742157, |
|
"logps/chosen": -588.9329223632812, |
|
"logps/rejected": -1159.4969482421875, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1433287113904953, |
|
"rewards/margins": 0.25360313057899475, |
|
"rewards/rejected": -0.39693182706832886, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.4618273973464966, |
|
"logits/rejected": -1.0363489389419556, |
|
"logps/chosen": -448.98089599609375, |
|
"logps/rejected": -1272.61865234375, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09960681945085526, |
|
"rewards/margins": 0.31163084506988525, |
|
"rewards/rejected": -0.4112376570701599, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.6461549997329712, |
|
"logits/rejected": -0.9912912249565125, |
|
"logps/chosen": -611.0677490234375, |
|
"logps/rejected": -1183.2215576171875, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1306958794593811, |
|
"rewards/margins": 0.28848105669021606, |
|
"rewards/rejected": -0.41917696595191956, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.8383325338363647, |
|
"logits/rejected": -0.9174288511276245, |
|
"logps/chosen": -618.29638671875, |
|
"logps/rejected": -1216.172119140625, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11510708183050156, |
|
"rewards/margins": 0.29687556624412537, |
|
"rewards/rejected": -0.41198262572288513, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.3221898078918457, |
|
"logits/rejected": -0.9603347778320312, |
|
"logps/chosen": -517.3427124023438, |
|
"logps/rejected": -1317.27099609375, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.09739838540554047, |
|
"rewards/margins": 0.3246908485889435, |
|
"rewards/rejected": -0.42208918929100037, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.7003717422485352, |
|
"logits/rejected": -0.7912822961807251, |
|
"logps/chosen": -598.9830932617188, |
|
"logps/rejected": -1176.973388671875, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18871501088142395, |
|
"rewards/margins": 0.29334282875061035, |
|
"rewards/rejected": -0.4820578098297119, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.4258317947387695, |
|
"logits/rejected": -1.199805498123169, |
|
"logps/chosen": -769.9146728515625, |
|
"logps/rejected": -1417.9521484375, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.25572648644447327, |
|
"rewards/margins": 0.25885215401649475, |
|
"rewards/rejected": -0.5145785808563232, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.580244779586792, |
|
"logits/rejected": -0.9348461031913757, |
|
"logps/chosen": -604.8896484375, |
|
"logps/rejected": -1246.4227294921875, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14286699891090393, |
|
"rewards/margins": 0.30815887451171875, |
|
"rewards/rejected": -0.4510258734226227, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.449998140335083, |
|
"logits/rejected": -1.0776797533035278, |
|
"logps/chosen": -581.5048217773438, |
|
"logps/rejected": -1115.7093505859375, |
|
"loss": 0.1337, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0915089100599289, |
|
"rewards/margins": 0.22939009964466095, |
|
"rewards/rejected": -0.32089897990226746, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.1581242084503174, |
|
"logits/rejected": -0.6956531405448914, |
|
"logps/chosen": -486.95257568359375, |
|
"logps/rejected": -1211.6070556640625, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02962355688214302, |
|
"rewards/margins": 0.285904198884964, |
|
"rewards/rejected": -0.3155277669429779, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.6472032070159912, |
|
"logits/rejected": -1.1526950597763062, |
|
"logps/chosen": -458.576171875, |
|
"logps/rejected": -1106.2225341796875, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08721883594989777, |
|
"rewards/margins": 0.26627764105796814, |
|
"rewards/rejected": -0.3534964919090271, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.5008890628814697, |
|
"logits/rejected": -0.9523450136184692, |
|
"logps/chosen": -564.3670654296875, |
|
"logps/rejected": -1191.93798828125, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12786266207695007, |
|
"rewards/margins": 0.2664950489997864, |
|
"rewards/rejected": -0.39435768127441406, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.5925250053405762, |
|
"logits/rejected": -0.787007749080658, |
|
"logps/chosen": -634.4956665039062, |
|
"logps/rejected": -1199.644287109375, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1421954333782196, |
|
"rewards/margins": 0.27115732431411743, |
|
"rewards/rejected": -0.4133527874946594, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.5216588973999023, |
|
"logits/rejected": -0.8067516088485718, |
|
"logps/chosen": -736.2872314453125, |
|
"logps/rejected": -1336.3583984375, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2195323407649994, |
|
"rewards/margins": 0.2807646691799164, |
|
"rewards/rejected": -0.500296950340271, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.6377366781234741, |
|
"logits/rejected": -1.1248198747634888, |
|
"logps/chosen": -589.9413452148438, |
|
"logps/rejected": -1174.857666015625, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1545807123184204, |
|
"rewards/margins": 0.25077009201049805, |
|
"rewards/rejected": -0.40535083413124084, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.359490990638733, |
|
"logits/rejected": -0.7116638422012329, |
|
"logps/chosen": -482.0816345214844, |
|
"logps/rejected": -1281.95068359375, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06251558661460876, |
|
"rewards/margins": 0.3599362075328827, |
|
"rewards/rejected": -0.42245182394981384, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.4103469848632812, |
|
"logits/rejected": -0.9721433520317078, |
|
"logps/chosen": -495.11639404296875, |
|
"logps/rejected": -1221.6544189453125, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09844540059566498, |
|
"rewards/margins": 0.32037508487701416, |
|
"rewards/rejected": -0.41882047057151794, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.5504339933395386, |
|
"logits/rejected": -0.8985152244567871, |
|
"logps/chosen": -516.6334228515625, |
|
"logps/rejected": -1175.6920166015625, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15037675201892853, |
|
"rewards/margins": 0.27001953125, |
|
"rewards/rejected": -0.42039623856544495, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.5418593883514404, |
|
"logits/rejected": -0.9249173402786255, |
|
"logps/chosen": -479.78521728515625, |
|
"logps/rejected": -1107.382568359375, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1212129220366478, |
|
"rewards/margins": 0.27643007040023804, |
|
"rewards/rejected": -0.3976430296897888, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.461808443069458, |
|
"logits/rejected": -0.8558281660079956, |
|
"logps/chosen": -667.7941284179688, |
|
"logps/rejected": -1245.40234375, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1607908010482788, |
|
"rewards/margins": 0.2721284031867981, |
|
"rewards/rejected": -0.4329192638397217, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.3010141849517822, |
|
"logits/rejected": -0.9021800756454468, |
|
"logps/chosen": -656.9991455078125, |
|
"logps/rejected": -1318.10205078125, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20327822864055634, |
|
"rewards/margins": 0.2836567759513855, |
|
"rewards/rejected": -0.48693498969078064, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.2061702013015747, |
|
"logits/rejected": -0.8291865587234497, |
|
"logps/chosen": -608.1866455078125, |
|
"logps/rejected": -1327.5355224609375, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16998444497585297, |
|
"rewards/margins": 0.32111790776252747, |
|
"rewards/rejected": -0.49110230803489685, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.6475965976715088, |
|
"logits/rejected": -0.8810272216796875, |
|
"logps/chosen": -616.7311401367188, |
|
"logps/rejected": -1327.117431640625, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1568661332130432, |
|
"rewards/margins": 0.3473976254463196, |
|
"rewards/rejected": -0.5042637586593628, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.3039356470108032, |
|
"logits/rejected": -0.9500153660774231, |
|
"logps/chosen": -611.8599853515625, |
|
"logps/rejected": -1257.874267578125, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1369098573923111, |
|
"rewards/margins": 0.29038089513778687, |
|
"rewards/rejected": -0.42729073762893677, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.4324769973754883, |
|
"logits/rejected": -0.807245135307312, |
|
"logps/chosen": -625.339111328125, |
|
"logps/rejected": -1257.253173828125, |
|
"loss": 0.089, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14000260829925537, |
|
"rewards/margins": 0.28776806592941284, |
|
"rewards/rejected": -0.4277706742286682, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.445261001586914, |
|
"logits/rejected": -0.9508928060531616, |
|
"logps/chosen": -594.2431030273438, |
|
"logps/rejected": -1150.235595703125, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15963387489318848, |
|
"rewards/margins": 0.2395774871110916, |
|
"rewards/rejected": -0.3992113471031189, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.33284592628479, |
|
"logits/rejected": -0.7798209190368652, |
|
"logps/chosen": -605.1170654296875, |
|
"logps/rejected": -1115.391845703125, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17733311653137207, |
|
"rewards/margins": 0.23692724108695984, |
|
"rewards/rejected": -0.4142603278160095, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.353686809539795, |
|
"logits/rejected": -0.9458833932876587, |
|
"logps/chosen": -541.7138671875, |
|
"logps/rejected": -1133.408935546875, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1459619700908661, |
|
"rewards/margins": 0.25547298789024353, |
|
"rewards/rejected": -0.4014349579811096, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.3489134311676025, |
|
"logits/rejected": -1.114639163017273, |
|
"logps/chosen": -650.7469482421875, |
|
"logps/rejected": -1327.571044921875, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19746045768260956, |
|
"rewards/margins": 0.3029775023460388, |
|
"rewards/rejected": -0.5004379749298096, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.3552124500274658, |
|
"logits/rejected": -0.8964862823486328, |
|
"logps/chosen": -505.401123046875, |
|
"logps/rejected": -1367.124267578125, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12019245326519012, |
|
"rewards/margins": 0.3561457395553589, |
|
"rewards/rejected": -0.4763382375240326, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.5101354122161865, |
|
"logits/rejected": -1.0879840850830078, |
|
"logps/chosen": -619.8988037109375, |
|
"logps/rejected": -1368.367919921875, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15163154900074005, |
|
"rewards/margins": 0.34400704503059387, |
|
"rewards/rejected": -0.49563854932785034, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.5360214710235596, |
|
"logits/rejected": -0.9855524897575378, |
|
"logps/chosen": -748.1507568359375, |
|
"logps/rejected": -1296.6070556640625, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1940506398677826, |
|
"rewards/margins": 0.2233462780714035, |
|
"rewards/rejected": -0.4173968732357025, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.5095126628875732, |
|
"logits/rejected": -0.8970023989677429, |
|
"logps/chosen": -497.6952209472656, |
|
"logps/rejected": -1121.1767578125, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10547561943531036, |
|
"rewards/margins": 0.27518388628959656, |
|
"rewards/rejected": -0.3806595206260681, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.4751381874084473, |
|
"logits/rejected": -0.9001661539077759, |
|
"logps/chosen": -588.8636474609375, |
|
"logps/rejected": -1271.7371826171875, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1327972114086151, |
|
"rewards/margins": 0.305908739566803, |
|
"rewards/rejected": -0.4387059211730957, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.616092324256897, |
|
"logits/rejected": -1.0537126064300537, |
|
"logps/chosen": -540.6871337890625, |
|
"logps/rejected": -1179.548828125, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09595973044633865, |
|
"rewards/margins": 0.3011319935321808, |
|
"rewards/rejected": -0.39709168672561646, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.423906683921814, |
|
"logits/rejected": -0.7600029706954956, |
|
"logps/chosen": -594.6055908203125, |
|
"logps/rejected": -1255.1416015625, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12480314821004868, |
|
"rewards/margins": 0.3190528154373169, |
|
"rewards/rejected": -0.44385600090026855, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.5575999021530151, |
|
"logits/rejected": -1.1269023418426514, |
|
"logps/chosen": -627.612548828125, |
|
"logps/rejected": -1399.1263427734375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1461525857448578, |
|
"rewards/margins": 0.3743034303188324, |
|
"rewards/rejected": -0.5204560160636902, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.4309592247009277, |
|
"logits/rejected": -0.8114882707595825, |
|
"logps/chosen": -639.8936767578125, |
|
"logps/rejected": -1324.5836181640625, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15769222378730774, |
|
"rewards/margins": 0.3088419735431671, |
|
"rewards/rejected": -0.46653419733047485, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.3537265062332153, |
|
"logits/rejected": -0.9575905799865723, |
|
"logps/chosen": -644.993408203125, |
|
"logps/rejected": -1161.0167236328125, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13782911002635956, |
|
"rewards/margins": 0.23069393634796143, |
|
"rewards/rejected": -0.3685230612754822, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.7066447734832764, |
|
"logits/rejected": -0.8981353044509888, |
|
"logps/chosen": -499.3677673339844, |
|
"logps/rejected": -1228.71533203125, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10399389266967773, |
|
"rewards/margins": 0.3211382031440735, |
|
"rewards/rejected": -0.42513203620910645, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.5993268489837646, |
|
"logits/rejected": -0.943587601184845, |
|
"logps/chosen": -549.6283569335938, |
|
"logps/rejected": -1147.561279296875, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09537569433450699, |
|
"rewards/margins": 0.2917521297931671, |
|
"rewards/rejected": -0.3871277868747711, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.4540773630142212, |
|
"logits/rejected": -0.9544679522514343, |
|
"logps/chosen": -567.1152954101562, |
|
"logps/rejected": -1310.57080078125, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13753007352352142, |
|
"rewards/margins": 0.3150814175605774, |
|
"rewards/rejected": -0.45261150598526, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.5060861110687256, |
|
"logits/rejected": -1.0383261442184448, |
|
"logps/chosen": -541.1341552734375, |
|
"logps/rejected": -1345.200927734375, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08431238681077957, |
|
"rewards/margins": 0.34140679240226746, |
|
"rewards/rejected": -0.42571917176246643, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.6825485229492188, |
|
"logits/rejected": -0.8775063753128052, |
|
"logps/chosen": -636.63427734375, |
|
"logps/rejected": -1235.6170654296875, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09943681955337524, |
|
"rewards/margins": 0.2878590226173401, |
|
"rewards/rejected": -0.38729584217071533, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.686977744102478, |
|
"logits/rejected": -1.0781195163726807, |
|
"logps/chosen": -594.205078125, |
|
"logps/rejected": -1142.4056396484375, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11142469942569733, |
|
"rewards/margins": 0.26075294613838196, |
|
"rewards/rejected": -0.3721776604652405, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.571839451789856, |
|
"logits/rejected": -1.1530930995941162, |
|
"logps/chosen": -549.628173828125, |
|
"logps/rejected": -1172.0718994140625, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10474500805139542, |
|
"rewards/margins": 0.269944429397583, |
|
"rewards/rejected": -0.37468940019607544, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.61894953250885, |
|
"logits/rejected": -0.9551402926445007, |
|
"logps/chosen": -607.7701416015625, |
|
"logps/rejected": -1296.424072265625, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.13554790616035461, |
|
"rewards/margins": 0.3056618571281433, |
|
"rewards/rejected": -0.4412097930908203, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.6686357259750366, |
|
"logits/rejected": -1.0033533573150635, |
|
"logps/chosen": -650.9888916015625, |
|
"logps/rejected": -1215.917724609375, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1597142517566681, |
|
"rewards/margins": 0.26373302936553955, |
|
"rewards/rejected": -0.42344728112220764, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.3500854969024658, |
|
"logits/rejected": -0.8943287134170532, |
|
"logps/chosen": -594.9763793945312, |
|
"logps/rejected": -1269.090576171875, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14664295315742493, |
|
"rewards/margins": 0.29260388016700745, |
|
"rewards/rejected": -0.4392468333244324, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.5656368732452393, |
|
"logits/rejected": -0.917197048664093, |
|
"logps/chosen": -623.9747924804688, |
|
"logps/rejected": -1312.198486328125, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1256953328847885, |
|
"rewards/margins": 0.3076288104057312, |
|
"rewards/rejected": -0.4333241581916809, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.4567426443099976, |
|
"logits/rejected": -1.0509058237075806, |
|
"logps/chosen": -514.6878051757812, |
|
"logps/rejected": -1177.8046875, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1288285255432129, |
|
"rewards/margins": 0.2865757346153259, |
|
"rewards/rejected": -0.4154042601585388, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.6101102828979492, |
|
"logits/rejected": -1.2119176387786865, |
|
"logps/chosen": -549.9823608398438, |
|
"logps/rejected": -1237.9873046875, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1458604633808136, |
|
"rewards/margins": 0.2935812771320343, |
|
"rewards/rejected": -0.4394417405128479, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.6634056568145752, |
|
"logits/rejected": -0.9373501539230347, |
|
"logps/chosen": -565.6293334960938, |
|
"logps/rejected": -1192.64208984375, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15129828453063965, |
|
"rewards/margins": 0.2895079553127289, |
|
"rewards/rejected": -0.44080623984336853, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.4864572286605835, |
|
"logits/rejected": -1.038694143295288, |
|
"logps/chosen": -491.3564453125, |
|
"logps/rejected": -1235.2786865234375, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09666456282138824, |
|
"rewards/margins": 0.3150130808353424, |
|
"rewards/rejected": -0.41167759895324707, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.561033010482788, |
|
"logits/rejected": -0.8384987711906433, |
|
"logps/chosen": -697.5499877929688, |
|
"logps/rejected": -1347.208251953125, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17049038410186768, |
|
"rewards/margins": 0.30814796686172485, |
|
"rewards/rejected": -0.47863835096359253, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.4081476926803589, |
|
"logits/rejected": -1.0466349124908447, |
|
"logps/chosen": -630.5411376953125, |
|
"logps/rejected": -1300.04541015625, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12465560436248779, |
|
"rewards/margins": 0.29317888617515564, |
|
"rewards/rejected": -0.41783446073532104, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.6413304805755615, |
|
"logits/rejected": -0.9996837377548218, |
|
"logps/chosen": -584.4578857421875, |
|
"logps/rejected": -1310.6175537109375, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16229207813739777, |
|
"rewards/margins": 0.3190585970878601, |
|
"rewards/rejected": -0.4813506603240967, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.5459994077682495, |
|
"logits/rejected": -0.8717397451400757, |
|
"logps/chosen": -689.1311645507812, |
|
"logps/rejected": -1347.6954345703125, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18276944756507874, |
|
"rewards/margins": 0.30370309948921204, |
|
"rewards/rejected": -0.48647254705429077, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.5712751150131226, |
|
"logits/rejected": -0.9994010925292969, |
|
"logps/chosen": -587.7183227539062, |
|
"logps/rejected": -1291.80322265625, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13140062987804413, |
|
"rewards/margins": 0.2967793345451355, |
|
"rewards/rejected": -0.4281799793243408, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.45646071434021, |
|
"logits/rejected": -0.9559444189071655, |
|
"logps/chosen": -743.5758056640625, |
|
"logps/rejected": -1357.35693359375, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1634978950023651, |
|
"rewards/margins": 0.26821058988571167, |
|
"rewards/rejected": -0.4317084848880768, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.2642626762390137, |
|
"logits/rejected": -0.8956004977226257, |
|
"logps/chosen": -672.7369995117188, |
|
"logps/rejected": -1317.1905517578125, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19635051488876343, |
|
"rewards/margins": 0.26600882411003113, |
|
"rewards/rejected": -0.46235933899879456, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.538914442062378, |
|
"logits/rejected": -0.9134441614151001, |
|
"logps/chosen": -672.8499755859375, |
|
"logps/rejected": -1275.136474609375, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17318633198738098, |
|
"rewards/margins": 0.288135290145874, |
|
"rewards/rejected": -0.4613215923309326, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.4427398443222046, |
|
"logits/rejected": -0.8872078061103821, |
|
"logps/chosen": -686.1043701171875, |
|
"logps/rejected": -1366.374755859375, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19739912450313568, |
|
"rewards/margins": 0.28218263387680054, |
|
"rewards/rejected": -0.479581743478775, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.3497573137283325, |
|
"logits/rejected": -0.872177004814148, |
|
"logps/chosen": -589.72412109375, |
|
"logps/rejected": -1277.909423828125, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13301566243171692, |
|
"rewards/margins": 0.2993132770061493, |
|
"rewards/rejected": -0.4323289394378662, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.3833153247833252, |
|
"logits/rejected": -1.0205485820770264, |
|
"logps/chosen": -539.8531494140625, |
|
"logps/rejected": -1183.308837890625, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1366974115371704, |
|
"rewards/margins": 0.2687934637069702, |
|
"rewards/rejected": -0.4054908752441406, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.4850012063980103, |
|
"logits/rejected": -0.8387139439582825, |
|
"logps/chosen": -636.983642578125, |
|
"logps/rejected": -1304.3221435546875, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1878032237291336, |
|
"rewards/margins": 0.2711120843887329, |
|
"rewards/rejected": -0.4589153230190277, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.0989512205123901, |
|
"logits/rejected": -0.8349654078483582, |
|
"logps/chosen": -627.0206298828125, |
|
"logps/rejected": -1307.218505859375, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1822880506515503, |
|
"rewards/margins": 0.3011923134326935, |
|
"rewards/rejected": -0.4834803640842438, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.485824704170227, |
|
"logits/rejected": -0.8337934613227844, |
|
"logps/chosen": -623.2086791992188, |
|
"logps/rejected": -1302.7957763671875, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15663902461528778, |
|
"rewards/margins": 0.3056802451610565, |
|
"rewards/rejected": -0.4623193144798279, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.7094755172729492, |
|
"logits/rejected": -1.041133165359497, |
|
"logps/chosen": -671.2548217773438, |
|
"logps/rejected": -1311.30419921875, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.167900949716568, |
|
"rewards/margins": 0.27187058329582214, |
|
"rewards/rejected": -0.43977150321006775, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.6966993808746338, |
|
"logits/rejected": -1.220529556274414, |
|
"logps/chosen": -596.4708251953125, |
|
"logps/rejected": -1238.605224609375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12312579154968262, |
|
"rewards/margins": 0.272055447101593, |
|
"rewards/rejected": -0.39518123865127563, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.661625623703003, |
|
"logits/rejected": -1.1181296110153198, |
|
"logps/chosen": -575.0912475585938, |
|
"logps/rejected": -1266.91064453125, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11585699021816254, |
|
"rewards/margins": 0.2694869041442871, |
|
"rewards/rejected": -0.38534384965896606, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.606603980064392, |
|
"logits/rejected": -1.0587247610092163, |
|
"logps/chosen": -512.89013671875, |
|
"logps/rejected": -1128.80859375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10761525481939316, |
|
"rewards/margins": 0.2635376751422882, |
|
"rewards/rejected": -0.37115293741226196, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.5754063129425049, |
|
"logits/rejected": -0.8801782727241516, |
|
"logps/chosen": -655.8692626953125, |
|
"logps/rejected": -1180.616943359375, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.153736412525177, |
|
"rewards/margins": 0.2592002749443054, |
|
"rewards/rejected": -0.41293662786483765, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.427549123764038, |
|
"logits/rejected": -1.0166289806365967, |
|
"logps/chosen": -586.8533325195312, |
|
"logps/rejected": -1178.782958984375, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15467192232608795, |
|
"rewards/margins": 0.2657596468925476, |
|
"rewards/rejected": -0.420431524515152, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.580718755722046, |
|
"logits/rejected": -1.1227762699127197, |
|
"logps/chosen": -620.2764892578125, |
|
"logps/rejected": -1323.167236328125, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14635826647281647, |
|
"rewards/margins": 0.3277556300163269, |
|
"rewards/rejected": -0.4741138815879822, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.291585922241211, |
|
"logits/rejected": -0.7484699487686157, |
|
"logps/chosen": -530.9237060546875, |
|
"logps/rejected": -1149.4744873046875, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1255730241537094, |
|
"rewards/margins": 0.28315025568008423, |
|
"rewards/rejected": -0.40872329473495483, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.3349201679229736, |
|
"logits/rejected": -0.9772024154663086, |
|
"logps/chosen": -620.5244140625, |
|
"logps/rejected": -1263.1776123046875, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16309909522533417, |
|
"rewards/margins": 0.273107647895813, |
|
"rewards/rejected": -0.43620675802230835, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.4110755920410156, |
|
"logits/rejected": -0.9908379316329956, |
|
"logps/chosen": -516.9703369140625, |
|
"logps/rejected": -1351.016357421875, |
|
"loss": 0.0628, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14381906390190125, |
|
"rewards/margins": 0.3800903558731079, |
|
"rewards/rejected": -0.5239094495773315, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.4153320789337158, |
|
"logits/rejected": -0.820611298084259, |
|
"logps/chosen": -487.5252990722656, |
|
"logps/rejected": -1135.842529296875, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10841517150402069, |
|
"rewards/margins": 0.29948437213897705, |
|
"rewards/rejected": -0.40789952874183655, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.4119293689727783, |
|
"logits/rejected": -0.8228232264518738, |
|
"logps/chosen": -627.8701171875, |
|
"logps/rejected": -1294.2296142578125, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12193576991558075, |
|
"rewards/margins": 0.32117849588394165, |
|
"rewards/rejected": -0.4431142807006836, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.527777075767517, |
|
"logits/rejected": -0.8934208154678345, |
|
"logps/chosen": -581.7477416992188, |
|
"logps/rejected": -1238.830810546875, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1308506429195404, |
|
"rewards/margins": 0.29394230246543884, |
|
"rewards/rejected": -0.42479294538497925, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.3228596448898315, |
|
"logits/rejected": -0.7611247897148132, |
|
"logps/chosen": -601.160400390625, |
|
"logps/rejected": -1213.851806640625, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1343916952610016, |
|
"rewards/margins": 0.28090834617614746, |
|
"rewards/rejected": -0.4152999818325043, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.4402358531951904, |
|
"logits/rejected": -0.8715489506721497, |
|
"logps/chosen": -518.7476806640625, |
|
"logps/rejected": -1256.0328369140625, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11650246381759644, |
|
"rewards/margins": 0.3238303065299988, |
|
"rewards/rejected": -0.4403327405452728, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.5251938104629517, |
|
"logits/rejected": -1.1043987274169922, |
|
"logps/chosen": -656.2462158203125, |
|
"logps/rejected": -1206.697021484375, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17089466750621796, |
|
"rewards/margins": 0.22701752185821533, |
|
"rewards/rejected": -0.3979122042655945, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.3905115127563477, |
|
"logits/rejected": -0.9935697317123413, |
|
"logps/chosen": -628.3776245117188, |
|
"logps/rejected": -1096.552734375, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15120725333690643, |
|
"rewards/margins": 0.24288305640220642, |
|
"rewards/rejected": -0.39409032464027405, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.722516655921936, |
|
"logits/rejected": -1.0052350759506226, |
|
"logps/chosen": -496.67657470703125, |
|
"logps/rejected": -1112.5921630859375, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10522119700908661, |
|
"rewards/margins": 0.28908994793891907, |
|
"rewards/rejected": -0.3943111300468445, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.4045814275741577, |
|
"logits/rejected": -0.9305141568183899, |
|
"logps/chosen": -512.0284423828125, |
|
"logps/rejected": -1209.204345703125, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1334155797958374, |
|
"rewards/margins": 0.2813461720943451, |
|
"rewards/rejected": -0.4147617220878601, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.343185305595398, |
|
"logits/rejected": -1.0273711681365967, |
|
"logps/chosen": -549.5206298828125, |
|
"logps/rejected": -1240.9766845703125, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1306164562702179, |
|
"rewards/margins": 0.29476845264434814, |
|
"rewards/rejected": -0.4253849387168884, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.6594680547714233, |
|
"logits/rejected": -0.850638210773468, |
|
"logps/chosen": -591.6373291015625, |
|
"logps/rejected": -1358.3460693359375, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10016246140003204, |
|
"rewards/margins": 0.33138564229011536, |
|
"rewards/rejected": -0.43154802918434143, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.5611286163330078, |
|
"logits/rejected": -0.799461841583252, |
|
"logps/chosen": -587.806640625, |
|
"logps/rejected": -1161.3482666015625, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12262831628322601, |
|
"rewards/margins": 0.2799530327320099, |
|
"rewards/rejected": -0.4025813639163971, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.6644928455352783, |
|
"logits/rejected": -0.9483749270439148, |
|
"logps/chosen": -535.2498168945312, |
|
"logps/rejected": -1164.773681640625, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1117793545126915, |
|
"rewards/margins": 0.281157910823822, |
|
"rewards/rejected": -0.3929373323917389, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.5217034816741943, |
|
"logits/rejected": -0.715064287185669, |
|
"logps/chosen": -589.2874755859375, |
|
"logps/rejected": -1261.570556640625, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1293199360370636, |
|
"rewards/margins": 0.3084322214126587, |
|
"rewards/rejected": -0.4377521574497223, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.6095244884490967, |
|
"logits/rejected": -0.9723415374755859, |
|
"logps/chosen": -610.6204223632812, |
|
"logps/rejected": -1346.6697998046875, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18110117316246033, |
|
"rewards/margins": 0.3312448561191559, |
|
"rewards/rejected": -0.5123459696769714, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.4780033826828003, |
|
"logits/rejected": -0.983650803565979, |
|
"logps/chosen": -654.4927978515625, |
|
"logps/rejected": -1217.2103271484375, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1973283588886261, |
|
"rewards/margins": 0.2564861476421356, |
|
"rewards/rejected": -0.4538145065307617, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.6766622066497803, |
|
"logits/rejected": -0.907199501991272, |
|
"logps/chosen": -659.3814086914062, |
|
"logps/rejected": -1295.118896484375, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14497703313827515, |
|
"rewards/margins": 0.3324897885322571, |
|
"rewards/rejected": -0.477466881275177, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.5557941198349, |
|
"logits/rejected": -0.8595023155212402, |
|
"logps/chosen": -599.567626953125, |
|
"logps/rejected": -1293.6220703125, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14183931052684784, |
|
"rewards/margins": 0.32466521859169006, |
|
"rewards/rejected": -0.4665044844150543, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.3870598077774048, |
|
"logits/rejected": -0.7746745944023132, |
|
"logps/chosen": -578.613525390625, |
|
"logps/rejected": -1236.041748046875, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1255788505077362, |
|
"rewards/margins": 0.2901487350463867, |
|
"rewards/rejected": -0.41572752594947815, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.539902925491333, |
|
"logits/rejected": -0.8010295629501343, |
|
"logps/chosen": -680.379638671875, |
|
"logps/rejected": -1223.6986083984375, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15240542590618134, |
|
"rewards/margins": 0.2671021819114685, |
|
"rewards/rejected": -0.41950759291648865, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.5265008211135864, |
|
"logits/rejected": -1.3408691883087158, |
|
"logps/chosen": -541.078125, |
|
"logps/rejected": -1169.788818359375, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15979784727096558, |
|
"rewards/margins": 0.2517135739326477, |
|
"rewards/rejected": -0.4115114212036133, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.4429771900177002, |
|
"logits/rejected": -0.9880257844924927, |
|
"logps/chosen": -528.2786254882812, |
|
"logps/rejected": -1244.5059814453125, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14250726997852325, |
|
"rewards/margins": 0.30752262473106384, |
|
"rewards/rejected": -0.4500298500061035, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.4017646312713623, |
|
"logits/rejected": -0.8113569021224976, |
|
"logps/chosen": -604.7733154296875, |
|
"logps/rejected": -1221.2369384765625, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15813498198986053, |
|
"rewards/margins": 0.2982472777366638, |
|
"rewards/rejected": -0.45638221502304077, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.5092931985855103, |
|
"logits/rejected": -0.8236274719238281, |
|
"logps/chosen": -533.9609985351562, |
|
"logps/rejected": -1168.628173828125, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13640852272510529, |
|
"rewards/margins": 0.28761720657348633, |
|
"rewards/rejected": -0.4240257740020752, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.5162551403045654, |
|
"logits/rejected": -0.786509096622467, |
|
"logps/chosen": -623.4132080078125, |
|
"logps/rejected": -1179.5946044921875, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1392001360654831, |
|
"rewards/margins": 0.26958781480789185, |
|
"rewards/rejected": -0.40878796577453613, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.1993951797485352, |
|
"logits/rejected": -0.6298279166221619, |
|
"logps/chosen": -582.5997314453125, |
|
"logps/rejected": -1334.009765625, |
|
"loss": 0.0706, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17245307564735413, |
|
"rewards/margins": 0.29515841603279114, |
|
"rewards/rejected": -0.46761149168014526, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.7049709558486938, |
|
"logits/rejected": -0.9636220932006836, |
|
"logps/chosen": -705.0452270507812, |
|
"logps/rejected": -1329.1993408203125, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15186749398708344, |
|
"rewards/margins": 0.3114302158355713, |
|
"rewards/rejected": -0.46329769492149353, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.4931375980377197, |
|
"logits/rejected": -1.0522197484970093, |
|
"logps/chosen": -568.1475830078125, |
|
"logps/rejected": -1231.364013671875, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13682815432548523, |
|
"rewards/margins": 0.29910990595817566, |
|
"rewards/rejected": -0.4359380602836609, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.4385493993759155, |
|
"logits/rejected": -1.0058460235595703, |
|
"logps/chosen": -473.60498046875, |
|
"logps/rejected": -1201.9755859375, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10265137255191803, |
|
"rewards/margins": 0.32189419865608215, |
|
"rewards/rejected": -0.4245455265045166, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.3410050868988037, |
|
"logits/rejected": -1.1295228004455566, |
|
"logps/chosen": -553.0806274414062, |
|
"logps/rejected": -1231.395263671875, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16361066699028015, |
|
"rewards/margins": 0.2729097008705139, |
|
"rewards/rejected": -0.43652039766311646, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.3688017129898071, |
|
"logits/rejected": -0.9007024765014648, |
|
"logps/chosen": -651.8567504882812, |
|
"logps/rejected": -1406.11572265625, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18758761882781982, |
|
"rewards/margins": 0.3379947543144226, |
|
"rewards/rejected": -0.5255824327468872, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.6441303491592407, |
|
"logits/rejected": -1.0162547826766968, |
|
"logps/chosen": -711.4053955078125, |
|
"logps/rejected": -1393.6256103515625, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2044825553894043, |
|
"rewards/margins": 0.30536073446273804, |
|
"rewards/rejected": -0.5098432302474976, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.6138668060302734, |
|
"logits/rejected": -0.9830889701843262, |
|
"logps/chosen": -678.9330444335938, |
|
"logps/rejected": -1337.688720703125, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16158784925937653, |
|
"rewards/margins": 0.30953675508499146, |
|
"rewards/rejected": -0.4711245596408844, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.5652110576629639, |
|
"logits/rejected": -0.7780826687812805, |
|
"logps/chosen": -496.6578674316406, |
|
"logps/rejected": -1144.76171875, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11985839903354645, |
|
"rewards/margins": 0.3066459596157074, |
|
"rewards/rejected": -0.42650431394577026, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.516342282295227, |
|
"logits/rejected": -1.1366102695465088, |
|
"logps/chosen": -601.8604125976562, |
|
"logps/rejected": -1439.399658203125, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15253353118896484, |
|
"rewards/margins": 0.3515278697013855, |
|
"rewards/rejected": -0.5040613412857056, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.4972165822982788, |
|
"logits/rejected": -1.1287825107574463, |
|
"logps/chosen": -551.3253784179688, |
|
"logps/rejected": -1201.3675537109375, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14231975376605988, |
|
"rewards/margins": 0.2838347554206848, |
|
"rewards/rejected": -0.4261545240879059, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.283483862876892, |
|
"logits/rejected": -0.8047486543655396, |
|
"logps/chosen": -674.7145385742188, |
|
"logps/rejected": -1273.704345703125, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19594672322273254, |
|
"rewards/margins": 0.2617323696613312, |
|
"rewards/rejected": -0.4576791226863861, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.3640494346618652, |
|
"logits/rejected": -0.9720889329910278, |
|
"logps/chosen": -654.1539306640625, |
|
"logps/rejected": -1382.0146484375, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17939691245555878, |
|
"rewards/margins": 0.31711509823799133, |
|
"rewards/rejected": -0.4965119957923889, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.557839274406433, |
|
"logits/rejected": -0.9444772601127625, |
|
"logps/chosen": -631.0563354492188, |
|
"logps/rejected": -1341.0086669921875, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16753628849983215, |
|
"rewards/margins": 0.30379122495651245, |
|
"rewards/rejected": -0.4713274836540222, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.2147436141967773, |
|
"logits/rejected": -0.681081235408783, |
|
"logps/chosen": -712.3760986328125, |
|
"logps/rejected": -1318.22265625, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23314671218395233, |
|
"rewards/margins": 0.2763899266719818, |
|
"rewards/rejected": -0.509536623954773, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.6583919525146484, |
|
"logits/rejected": -1.0387184619903564, |
|
"logps/chosen": -607.9590454101562, |
|
"logps/rejected": -1096.9937744140625, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1655300408601761, |
|
"rewards/margins": 0.24450743198394775, |
|
"rewards/rejected": -0.41003745794296265, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.4778130054473877, |
|
"logits/rejected": -0.7460058331489563, |
|
"logps/chosen": -651.7698974609375, |
|
"logps/rejected": -1243.669921875, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1902499794960022, |
|
"rewards/margins": 0.2825292944908142, |
|
"rewards/rejected": -0.4727793335914612, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.3330655097961426, |
|
"logits/rejected": -0.9275220036506653, |
|
"logps/chosen": -523.3305053710938, |
|
"logps/rejected": -1225.2471923828125, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15531578660011292, |
|
"rewards/margins": 0.31203147768974304, |
|
"rewards/rejected": -0.46734723448753357, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.3421717882156372, |
|
"logits/rejected": -1.1515899896621704, |
|
"logps/chosen": -492.34246826171875, |
|
"logps/rejected": -1202.8974609375, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15640634298324585, |
|
"rewards/margins": 0.2934826612472534, |
|
"rewards/rejected": -0.44988900423049927, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.4181009531021118, |
|
"logits/rejected": -0.8086174130439758, |
|
"logps/chosen": -602.2988891601562, |
|
"logps/rejected": -1168.888916015625, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15534570813179016, |
|
"rewards/margins": 0.2883809208869934, |
|
"rewards/rejected": -0.44372662901878357, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.6121231317520142, |
|
"logits/rejected": -0.8654192090034485, |
|
"logps/chosen": -598.3814697265625, |
|
"logps/rejected": -1284.2470703125, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14926204085350037, |
|
"rewards/margins": 0.3212565779685974, |
|
"rewards/rejected": -0.47051864862442017, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.4523346424102783, |
|
"logits/rejected": -0.9201906323432922, |
|
"logps/chosen": -600.6221923828125, |
|
"logps/rejected": -1303.9447021484375, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17756803333759308, |
|
"rewards/margins": 0.3046211898326874, |
|
"rewards/rejected": -0.48218923807144165, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.5082757472991943, |
|
"logits/rejected": -0.9013730883598328, |
|
"logps/chosen": -610.1536254882812, |
|
"logps/rejected": -1315.129638671875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.176839679479599, |
|
"rewards/margins": 0.3339093327522278, |
|
"rewards/rejected": -0.5107490420341492, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.6840633153915405, |
|
"logits/rejected": -0.9501806497573853, |
|
"logps/chosen": -662.0902709960938, |
|
"logps/rejected": -1296.606689453125, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17285804450511932, |
|
"rewards/margins": 0.3210682272911072, |
|
"rewards/rejected": -0.4939262866973877, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.4562785625457764, |
|
"logits/rejected": -1.0628981590270996, |
|
"logps/chosen": -594.0131225585938, |
|
"logps/rejected": -1322.223876953125, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.185628280043602, |
|
"rewards/margins": 0.31836962699890137, |
|
"rewards/rejected": -0.5039979219436646, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.622641921043396, |
|
"logits/rejected": -0.7408018112182617, |
|
"logps/chosen": -671.2892456054688, |
|
"logps/rejected": -1338.8228759765625, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19210806488990784, |
|
"rewards/margins": 0.3319090008735657, |
|
"rewards/rejected": -0.5240170359611511, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.6081740856170654, |
|
"logits/rejected": -0.7799841165542603, |
|
"logps/chosen": -656.5260009765625, |
|
"logps/rejected": -1301.16650390625, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21345119178295135, |
|
"rewards/margins": 0.30246636271476746, |
|
"rewards/rejected": -0.5159175992012024, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.5897592306137085, |
|
"logits/rejected": -0.830175518989563, |
|
"logps/chosen": -643.302734375, |
|
"logps/rejected": -1318.0699462890625, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17795221507549286, |
|
"rewards/margins": 0.332580029964447, |
|
"rewards/rejected": -0.5105322599411011, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.5620427131652832, |
|
"logits/rejected": -1.090867519378662, |
|
"logps/chosen": -677.5968627929688, |
|
"logps/rejected": -1375.831787109375, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.185464009642601, |
|
"rewards/margins": 0.3168545365333557, |
|
"rewards/rejected": -0.5023185014724731, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.4541980028152466, |
|
"logits/rejected": -0.972217857837677, |
|
"logps/chosen": -705.5538940429688, |
|
"logps/rejected": -1215.7884521484375, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19816702604293823, |
|
"rewards/margins": 0.24639299511909485, |
|
"rewards/rejected": -0.4445599615573883, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.5021053552627563, |
|
"logits/rejected": -0.8954359292984009, |
|
"logps/chosen": -642.7943115234375, |
|
"logps/rejected": -1351.9677734375, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18909773230552673, |
|
"rewards/margins": 0.3134486675262451, |
|
"rewards/rejected": -0.5025463104248047, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.4700965881347656, |
|
"logits/rejected": -0.7698783874511719, |
|
"logps/chosen": -592.5593872070312, |
|
"logps/rejected": -1228.227783203125, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16944539546966553, |
|
"rewards/margins": 0.2940993309020996, |
|
"rewards/rejected": -0.46354469656944275, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.6665054559707642, |
|
"logits/rejected": -0.948663592338562, |
|
"logps/chosen": -769.6544799804688, |
|
"logps/rejected": -1263.137451171875, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24728581309318542, |
|
"rewards/margins": 0.25588518381118774, |
|
"rewards/rejected": -0.5031709671020508, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.4320557117462158, |
|
"logits/rejected": -0.8846480250358582, |
|
"logps/chosen": -672.8453369140625, |
|
"logps/rejected": -1423.1922607421875, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1967121660709381, |
|
"rewards/margins": 0.31136855483055115, |
|
"rewards/rejected": -0.508080780506134, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.596573829650879, |
|
"logits/rejected": -0.9393990635871887, |
|
"logps/chosen": -719.8648681640625, |
|
"logps/rejected": -1458.17919921875, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19690574705600739, |
|
"rewards/margins": 0.33237752318382263, |
|
"rewards/rejected": -0.5292832851409912, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.6579780578613281, |
|
"logits/rejected": -1.0232713222503662, |
|
"logps/chosen": -629.9151611328125, |
|
"logps/rejected": -1246.3726806640625, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1578701287508011, |
|
"rewards/margins": 0.3020227253437042, |
|
"rewards/rejected": -0.45989280939102173, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.6255989074707031, |
|
"logits/rejected": -0.9438311457633972, |
|
"logps/chosen": -571.7470703125, |
|
"logps/rejected": -1234.297119140625, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15460793673992157, |
|
"rewards/margins": 0.3083663582801819, |
|
"rewards/rejected": -0.4629742503166199, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.4770749807357788, |
|
"logits/rejected": -0.936165452003479, |
|
"logps/chosen": -617.3800048828125, |
|
"logps/rejected": -1241.4713134765625, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15939846634864807, |
|
"rewards/margins": 0.28489136695861816, |
|
"rewards/rejected": -0.44428983330726624, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.6321996450424194, |
|
"logits/rejected": -1.2910716533660889, |
|
"logps/chosen": -621.7635498046875, |
|
"logps/rejected": -1340.450439453125, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16068853437900543, |
|
"rewards/margins": 0.31573906540870667, |
|
"rewards/rejected": -0.4764275550842285, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.790464162826538, |
|
"logits/rejected": -0.8707693219184875, |
|
"logps/chosen": -630.2117919921875, |
|
"logps/rejected": -1443.947509765625, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12107650935649872, |
|
"rewards/margins": 0.3619995713233948, |
|
"rewards/rejected": -0.4830760955810547, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.4005483388900757, |
|
"logits/rejected": -0.8707137107849121, |
|
"logps/chosen": -572.6295166015625, |
|
"logps/rejected": -1228.290283203125, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13189435005187988, |
|
"rewards/margins": 0.30900174379348755, |
|
"rewards/rejected": -0.44089609384536743, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.5691678524017334, |
|
"logits/rejected": -0.9897274971008301, |
|
"logps/chosen": -598.7499389648438, |
|
"logps/rejected": -1142.5048828125, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1460307091474533, |
|
"rewards/margins": 0.24934545159339905, |
|
"rewards/rejected": -0.39537614583969116, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.69058358669281, |
|
"logits/rejected": -1.319437861442566, |
|
"logps/chosen": -554.8807373046875, |
|
"logps/rejected": -1080.8026123046875, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13216087222099304, |
|
"rewards/margins": 0.22610945999622345, |
|
"rewards/rejected": -0.3582703471183777, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.4841969013214111, |
|
"logits/rejected": -0.8709270358085632, |
|
"logps/chosen": -590.7588500976562, |
|
"logps/rejected": -1269.95556640625, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.156617671251297, |
|
"rewards/margins": 0.2556094229221344, |
|
"rewards/rejected": -0.4122270941734314, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.7067668437957764, |
|
"logits/rejected": -0.8970105051994324, |
|
"logps/chosen": -637.1722412109375, |
|
"logps/rejected": -1236.7254638671875, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15255072712898254, |
|
"rewards/margins": 0.2996431291103363, |
|
"rewards/rejected": -0.45219388604164124, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.722249984741211, |
|
"logits/rejected": -0.9102805256843567, |
|
"logps/chosen": -568.446533203125, |
|
"logps/rejected": -1270.4002685546875, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13264772295951843, |
|
"rewards/margins": 0.33025822043418884, |
|
"rewards/rejected": -0.4629059433937073, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.4858075380325317, |
|
"logits/rejected": -1.0102977752685547, |
|
"logps/chosen": -668.56201171875, |
|
"logps/rejected": -1335.7791748046875, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20782490074634552, |
|
"rewards/margins": 0.29307177662849426, |
|
"rewards/rejected": -0.5008966326713562, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.3048267364501953, |
|
"logits/rejected": -0.79096519947052, |
|
"logps/chosen": -672.3717041015625, |
|
"logps/rejected": -1276.05224609375, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2136264592409134, |
|
"rewards/margins": 0.26162099838256836, |
|
"rewards/rejected": -0.47524747252464294, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.5877363681793213, |
|
"logits/rejected": -0.9525176882743835, |
|
"logps/chosen": -706.8630981445312, |
|
"logps/rejected": -1383.3653564453125, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2066899538040161, |
|
"rewards/margins": 0.30500850081443787, |
|
"rewards/rejected": -0.5116984248161316, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.7394685745239258, |
|
"logits/rejected": -1.1289197206497192, |
|
"logps/chosen": -770.9951171875, |
|
"logps/rejected": -1358.626220703125, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2261141985654831, |
|
"rewards/margins": 0.2788589596748352, |
|
"rewards/rejected": -0.5049731135368347, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.5435702800750732, |
|
"logits/rejected": -1.0047051906585693, |
|
"logps/chosen": -634.2347412109375, |
|
"logps/rejected": -1342.857177734375, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18071284890174866, |
|
"rewards/margins": 0.3370632231235504, |
|
"rewards/rejected": -0.5177761316299438, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.4378631114959717, |
|
"logits/rejected": -1.1336383819580078, |
|
"logps/chosen": -650.482177734375, |
|
"logps/rejected": -1228.133056640625, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21179573237895966, |
|
"rewards/margins": 0.25491851568222046, |
|
"rewards/rejected": -0.46671420335769653, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.5026006698608398, |
|
"logits/rejected": -1.0078703165054321, |
|
"logps/chosen": -657.7984619140625, |
|
"logps/rejected": -1241.7266845703125, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18873688578605652, |
|
"rewards/margins": 0.2727685272693634, |
|
"rewards/rejected": -0.4615054130554199, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.5700013637542725, |
|
"logits/rejected": -1.132730484008789, |
|
"logps/chosen": -637.8956298828125, |
|
"logps/rejected": -1278.509033203125, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19238412380218506, |
|
"rewards/margins": 0.2825482189655304, |
|
"rewards/rejected": -0.47493234276771545, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.2392457723617554, |
|
"logits/rejected": -0.6231773495674133, |
|
"logps/chosen": -654.0189208984375, |
|
"logps/rejected": -1259.2901611328125, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2084679901599884, |
|
"rewards/margins": 0.2823956310749054, |
|
"rewards/rejected": -0.4908636212348938, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.4837418794631958, |
|
"logits/rejected": -1.0266939401626587, |
|
"logps/chosen": -575.24169921875, |
|
"logps/rejected": -1292.1416015625, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17716486752033234, |
|
"rewards/margins": 0.29848557710647583, |
|
"rewards/rejected": -0.4756503999233246, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.7130857706069946, |
|
"logits/rejected": -1.1947839260101318, |
|
"logps/chosen": -707.26171875, |
|
"logps/rejected": -1313.96484375, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20284132659435272, |
|
"rewards/margins": 0.28745827078819275, |
|
"rewards/rejected": -0.49029961228370667, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.60333251953125, |
|
"logits/rejected": -1.095365047454834, |
|
"logps/chosen": -590.465576171875, |
|
"logps/rejected": -1228.563720703125, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1857512891292572, |
|
"rewards/margins": 0.2781633734703064, |
|
"rewards/rejected": -0.463914692401886, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.579377293586731, |
|
"logits/rejected": -0.8978742361068726, |
|
"logps/chosen": -583.1848754882812, |
|
"logps/rejected": -1341.265380859375, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.1685558259487152, |
|
"rewards/margins": 0.35025161504745483, |
|
"rewards/rejected": -0.5188074111938477, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.5291283130645752, |
|
"logits/rejected": -1.0524482727050781, |
|
"logps/chosen": -632.1546020507812, |
|
"logps/rejected": -1308.994384765625, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19686022400856018, |
|
"rewards/margins": 0.2875005602836609, |
|
"rewards/rejected": -0.48436084389686584, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6773452758789062, |
|
"logits/rejected": -0.8837090730667114, |
|
"logps/chosen": -650.5315551757812, |
|
"logps/rejected": -1348.764404296875, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16877111792564392, |
|
"rewards/margins": 0.34151607751846313, |
|
"rewards/rejected": -0.5102871656417847, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.2815361022949219, |
|
"logits/rejected": -0.6873558163642883, |
|
"logps/chosen": -566.9096069335938, |
|
"logps/rejected": -1250.682373046875, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18210506439208984, |
|
"rewards/margins": 0.3151671588420868, |
|
"rewards/rejected": -0.497272253036499, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.480957269668579, |
|
"logits/rejected": -1.109178066253662, |
|
"logps/chosen": -599.2630615234375, |
|
"logps/rejected": -1315.3450927734375, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16557563841342926, |
|
"rewards/margins": 0.32593974471092224, |
|
"rewards/rejected": -0.49151545763015747, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.5238648653030396, |
|
"logits/rejected": -0.9741169214248657, |
|
"logps/chosen": -644.1940307617188, |
|
"logps/rejected": -1286.3187255859375, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1868639439344406, |
|
"rewards/margins": 0.286797434091568, |
|
"rewards/rejected": -0.4736614227294922, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.5936267375946045, |
|
"logits/rejected": -1.060530424118042, |
|
"logps/chosen": -593.8538208007812, |
|
"logps/rejected": -1255.338623046875, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17814789712429047, |
|
"rewards/margins": 0.27660074830055237, |
|
"rewards/rejected": -0.45474863052368164, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.590341329574585, |
|
"logits/rejected": -0.7449840903282166, |
|
"logps/chosen": -610.4566650390625, |
|
"logps/rejected": -1260.689208984375, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1576831042766571, |
|
"rewards/margins": 0.33318689465522766, |
|
"rewards/rejected": -0.49086999893188477, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.3408777713775635, |
|
"logits/rejected": -0.7836991548538208, |
|
"logps/chosen": -589.0643920898438, |
|
"logps/rejected": -1239.4324951171875, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15606389939785004, |
|
"rewards/margins": 0.29545897245407104, |
|
"rewards/rejected": -0.45152291655540466, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.6109354496002197, |
|
"logits/rejected": -1.0465881824493408, |
|
"logps/chosen": -709.0189208984375, |
|
"logps/rejected": -1308.754638671875, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20454378426074982, |
|
"rewards/margins": 0.263343870639801, |
|
"rewards/rejected": -0.4678876996040344, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.621872901916504, |
|
"logits/rejected": -0.963768482208252, |
|
"logps/chosen": -608.9706420898438, |
|
"logps/rejected": -1198.4373779296875, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16753293573856354, |
|
"rewards/margins": 0.2994327247142792, |
|
"rewards/rejected": -0.4669656753540039, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.4504549503326416, |
|
"logits/rejected": -0.7841233015060425, |
|
"logps/chosen": -669.5154418945312, |
|
"logps/rejected": -1335.7557373046875, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19036135077476501, |
|
"rewards/margins": 0.3035343289375305, |
|
"rewards/rejected": -0.49389567971229553, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.371618390083313, |
|
"logits/rejected": -0.9690683484077454, |
|
"logps/chosen": -578.9434814453125, |
|
"logps/rejected": -1268.2574462890625, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17845505475997925, |
|
"rewards/margins": 0.30898019671440125, |
|
"rewards/rejected": -0.4874352812767029, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.3634693622589111, |
|
"logits/rejected": -1.2364251613616943, |
|
"logps/chosen": -655.4860229492188, |
|
"logps/rejected": -1382.656005859375, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21539123356342316, |
|
"rewards/margins": 0.2801755666732788, |
|
"rewards/rejected": -0.49556678533554077, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.6657747030258179, |
|
"logits/rejected": -1.1524592638015747, |
|
"logps/chosen": -609.6968994140625, |
|
"logps/rejected": -1228.130859375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18815730512142181, |
|
"rewards/margins": 0.2728883922100067, |
|
"rewards/rejected": -0.46104568243026733, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.521481990814209, |
|
"logits/rejected": -1.0144175291061401, |
|
"logps/chosen": -666.2223510742188, |
|
"logps/rejected": -1294.4681396484375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1811828464269638, |
|
"rewards/margins": 0.2964962124824524, |
|
"rewards/rejected": -0.4776790142059326, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.5954043865203857, |
|
"logits/rejected": -0.9687323570251465, |
|
"logps/chosen": -608.4232177734375, |
|
"logps/rejected": -1266.69482421875, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1722910851240158, |
|
"rewards/margins": 0.308040052652359, |
|
"rewards/rejected": -0.4803311228752136, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.4824254512786865, |
|
"logits/rejected": -0.898513913154602, |
|
"logps/chosen": -605.569091796875, |
|
"logps/rejected": -1327.9998779296875, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16081413626670837, |
|
"rewards/margins": 0.3248310089111328, |
|
"rewards/rejected": -0.4856451451778412, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.2875173091888428, |
|
"logits/rejected": -0.9592302441596985, |
|
"logps/chosen": -512.1587524414062, |
|
"logps/rejected": -1235.3233642578125, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13009069859981537, |
|
"rewards/margins": 0.3230075538158417, |
|
"rewards/rejected": -0.45309823751449585, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.526106357574463, |
|
"logits/rejected": -0.9157294034957886, |
|
"logps/chosen": -611.5623779296875, |
|
"logps/rejected": -1331.778076171875, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1644577980041504, |
|
"rewards/margins": 0.3466406762599945, |
|
"rewards/rejected": -0.5110985040664673, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.4740724563598633, |
|
"logits/rejected": -1.2198327779769897, |
|
"logps/chosen": -515.4468994140625, |
|
"logps/rejected": -1197.4677734375, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13031090795993805, |
|
"rewards/margins": 0.29615822434425354, |
|
"rewards/rejected": -0.42646917700767517, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.691954255104065, |
|
"logits/rejected": -1.2317649126052856, |
|
"logps/chosen": -551.2989501953125, |
|
"logps/rejected": -1318.2874755859375, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15544219315052032, |
|
"rewards/margins": 0.31455904245376587, |
|
"rewards/rejected": -0.4700012803077698, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.5783944129943848, |
|
"logits/rejected": -0.9322364926338196, |
|
"logps/chosen": -619.6995849609375, |
|
"logps/rejected": -1310.8206787109375, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.13858993351459503, |
|
"rewards/margins": 0.35597696900367737, |
|
"rewards/rejected": -0.4945669174194336, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.6260960102081299, |
|
"logits/rejected": -1.0405943393707275, |
|
"logps/chosen": -588.0743408203125, |
|
"logps/rejected": -1362.0108642578125, |
|
"loss": 0.037, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14083652198314667, |
|
"rewards/margins": 0.32858163118362427, |
|
"rewards/rejected": -0.46941813826560974, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.509161353111267, |
|
"logits/rejected": -1.0090100765228271, |
|
"logps/chosen": -575.589599609375, |
|
"logps/rejected": -1260.2518310546875, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16428951919078827, |
|
"rewards/margins": 0.31566599011421204, |
|
"rewards/rejected": -0.4799554944038391, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.3317458629608154, |
|
"logits/rejected": -0.9225482940673828, |
|
"logps/chosen": -795.397705078125, |
|
"logps/rejected": -1355.7197265625, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2432664930820465, |
|
"rewards/margins": 0.2584363520145416, |
|
"rewards/rejected": -0.5017029047012329, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.482460379600525, |
|
"logits/rejected": -1.0576423406600952, |
|
"logps/chosen": -619.4547119140625, |
|
"logps/rejected": -1274.428955078125, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16743937134742737, |
|
"rewards/margins": 0.3212641477584839, |
|
"rewards/rejected": -0.48870354890823364, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.5611276626586914, |
|
"logits/rejected": -1.225208044052124, |
|
"logps/chosen": -633.8873901367188, |
|
"logps/rejected": -1392.9622802734375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15855436027050018, |
|
"rewards/margins": 0.31555289030075073, |
|
"rewards/rejected": -0.4741072654724121, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.5436227321624756, |
|
"logits/rejected": -0.7761337161064148, |
|
"logps/chosen": -619.49609375, |
|
"logps/rejected": -1352.654052734375, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16255274415016174, |
|
"rewards/margins": 0.3418361246585846, |
|
"rewards/rejected": -0.5043889284133911, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.5770825147628784, |
|
"logits/rejected": -0.8190025091171265, |
|
"logps/chosen": -633.076416015625, |
|
"logps/rejected": -1185.306396484375, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14611086249351501, |
|
"rewards/margins": 0.2796880602836609, |
|
"rewards/rejected": -0.4257989525794983, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.6848455667495728, |
|
"logits/rejected": -0.7886224985122681, |
|
"logps/chosen": -664.6588745117188, |
|
"logps/rejected": -1323.16064453125, |
|
"loss": 0.0494, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15056583285331726, |
|
"rewards/margins": 0.34144124388694763, |
|
"rewards/rejected": -0.4920070767402649, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.736999750137329, |
|
"logits/rejected": -0.8889113664627075, |
|
"logps/chosen": -563.3355712890625, |
|
"logps/rejected": -1182.220947265625, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10529695451259613, |
|
"rewards/margins": 0.3400992751121521, |
|
"rewards/rejected": -0.4453962445259094, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.3477979898452759, |
|
"logits/rejected": -0.9392274618148804, |
|
"logps/chosen": -666.6458740234375, |
|
"logps/rejected": -1304.849853515625, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18823906779289246, |
|
"rewards/margins": 0.28339654207229614, |
|
"rewards/rejected": -0.471635639667511, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.468207597732544, |
|
"logits/rejected": -0.750462532043457, |
|
"logps/chosen": -549.2374877929688, |
|
"logps/rejected": -1129.191650390625, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1359584927558899, |
|
"rewards/margins": 0.2849787175655365, |
|
"rewards/rejected": -0.4209372103214264, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.5334383249282837, |
|
"logits/rejected": -1.020437240600586, |
|
"logps/chosen": -542.3077392578125, |
|
"logps/rejected": -1162.928955078125, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14567705988883972, |
|
"rewards/margins": 0.2968258261680603, |
|
"rewards/rejected": -0.4425028860569, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.614105463027954, |
|
"logits/rejected": -0.9975967407226562, |
|
"logps/chosen": -592.6704711914062, |
|
"logps/rejected": -1195.4998779296875, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14713343977928162, |
|
"rewards/margins": 0.293200820684433, |
|
"rewards/rejected": -0.4403342306613922, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.5890741348266602, |
|
"logits/rejected": -1.1050251722335815, |
|
"logps/chosen": -633.64990234375, |
|
"logps/rejected": -1316.699462890625, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15724198520183563, |
|
"rewards/margins": 0.3219819962978363, |
|
"rewards/rejected": -0.47922396659851074, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.4880859851837158, |
|
"logits/rejected": -1.0814130306243896, |
|
"logps/chosen": -556.4613647460938, |
|
"logps/rejected": -1175.9158935546875, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1583164930343628, |
|
"rewards/margins": 0.27797532081604004, |
|
"rewards/rejected": -0.43629178404808044, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.5819056034088135, |
|
"logits/rejected": -0.8545023798942566, |
|
"logps/chosen": -625.5091552734375, |
|
"logps/rejected": -1260.5357666015625, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14933553338050842, |
|
"rewards/margins": 0.3165150284767151, |
|
"rewards/rejected": -0.4658505916595459, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.4295012950897217, |
|
"logits/rejected": -0.9826697111129761, |
|
"logps/chosen": -595.7457885742188, |
|
"logps/rejected": -1152.125, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1720040887594223, |
|
"rewards/margins": 0.25839370489120483, |
|
"rewards/rejected": -0.4303978383541107, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.69967782497406, |
|
"logits/rejected": -1.0549393892288208, |
|
"logps/chosen": -673.6993408203125, |
|
"logps/rejected": -1231.6971435546875, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1993969976902008, |
|
"rewards/margins": 0.26704469323158264, |
|
"rewards/rejected": -0.46644172072410583, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.347398042678833, |
|
"logits/rejected": -0.7797093391418457, |
|
"logps/chosen": -616.3978271484375, |
|
"logps/rejected": -1361.00732421875, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1867767572402954, |
|
"rewards/margins": 0.33997079730033875, |
|
"rewards/rejected": -0.5267475247383118, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.627018928527832, |
|
"logits/rejected": -0.8644296526908875, |
|
"logps/chosen": -735.9021606445312, |
|
"logps/rejected": -1377.241455078125, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19497773051261902, |
|
"rewards/margins": 0.33351653814315796, |
|
"rewards/rejected": -0.5284942388534546, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.6354873180389404, |
|
"logits/rejected": -0.8436982035636902, |
|
"logps/chosen": -638.96728515625, |
|
"logps/rejected": -1231.9659423828125, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19029106199741364, |
|
"rewards/margins": 0.27815455198287964, |
|
"rewards/rejected": -0.4684455990791321, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.9651466608047485, |
|
"logits/rejected": -1.0768983364105225, |
|
"logps/chosen": -751.0382690429688, |
|
"logps/rejected": -1339.02783203125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1946159154176712, |
|
"rewards/margins": 0.29727649688720703, |
|
"rewards/rejected": -0.49189239740371704, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.511791467666626, |
|
"logits/rejected": -1.1664907932281494, |
|
"logps/chosen": -508.4298400878906, |
|
"logps/rejected": -1147.3773193359375, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.149129718542099, |
|
"rewards/margins": 0.2729035019874573, |
|
"rewards/rejected": -0.4220332205295563, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.4468176364898682, |
|
"logits/rejected": -1.2824242115020752, |
|
"logps/chosen": -515.1135864257812, |
|
"logps/rejected": -1145.444580078125, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1785714328289032, |
|
"rewards/margins": 0.23533880710601807, |
|
"rewards/rejected": -0.41391023993492126, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.5166642665863037, |
|
"logits/rejected": -0.9769018292427063, |
|
"logps/chosen": -721.0452880859375, |
|
"logps/rejected": -1224.28125, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19355639815330505, |
|
"rewards/margins": 0.24941392242908478, |
|
"rewards/rejected": -0.442970335483551, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.5955547094345093, |
|
"logits/rejected": -0.8334843516349792, |
|
"logps/chosen": -624.3068237304688, |
|
"logps/rejected": -1299.734130859375, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1941680610179901, |
|
"rewards/margins": 0.31897804141044617, |
|
"rewards/rejected": -0.5131461024284363, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.361081600189209, |
|
"logits/rejected": -0.7655187845230103, |
|
"logps/chosen": -657.2144775390625, |
|
"logps/rejected": -1261.987548828125, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19083121418952942, |
|
"rewards/margins": 0.29023870825767517, |
|
"rewards/rejected": -0.481069952249527, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.5999078750610352, |
|
"logits/rejected": -0.7431113719940186, |
|
"logps/chosen": -730.16455078125, |
|
"logps/rejected": -1300.1298828125, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21212446689605713, |
|
"rewards/margins": 0.29901638627052307, |
|
"rewards/rejected": -0.5111408829689026, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.7554610967636108, |
|
"logits/rejected": -1.0210330486297607, |
|
"logps/chosen": -691.9470825195312, |
|
"logps/rejected": -1321.851806640625, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17403197288513184, |
|
"rewards/margins": 0.3116183876991272, |
|
"rewards/rejected": -0.4856503903865814, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.366562843322754, |
|
"logits/rejected": -0.8948714137077332, |
|
"logps/chosen": -622.9371948242188, |
|
"logps/rejected": -1320.292724609375, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1862497329711914, |
|
"rewards/margins": 0.32431843876838684, |
|
"rewards/rejected": -0.5105680823326111, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.5601285696029663, |
|
"logits/rejected": -0.8055307269096375, |
|
"logps/chosen": -620.6419067382812, |
|
"logps/rejected": -1285.7567138671875, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.187259703874588, |
|
"rewards/margins": 0.3192376494407654, |
|
"rewards/rejected": -0.506497323513031, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.2374000549316406, |
|
"logits/rejected": -0.9511027336120605, |
|
"logps/chosen": -584.9683837890625, |
|
"logps/rejected": -1257.975341796875, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20605416595935822, |
|
"rewards/margins": 0.264712929725647, |
|
"rewards/rejected": -0.4707671105861664, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.4687381982803345, |
|
"logits/rejected": -1.0029032230377197, |
|
"logps/chosen": -693.2366943359375, |
|
"logps/rejected": -1394.9901123046875, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20397081971168518, |
|
"rewards/margins": 0.31658852100372314, |
|
"rewards/rejected": -0.5205592513084412, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -1.8167024850845337, |
|
"logits/rejected": -0.9430249929428101, |
|
"logps/chosen": -649.8558959960938, |
|
"logps/rejected": -1447.1058349609375, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18286794424057007, |
|
"rewards/margins": 0.35680800676345825, |
|
"rewards/rejected": -0.5396759510040283, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.5668996572494507, |
|
"logits/rejected": -0.9328106045722961, |
|
"logps/chosen": -549.9508056640625, |
|
"logps/rejected": -1223.6793212890625, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.156307652592659, |
|
"rewards/margins": 0.3133383095264435, |
|
"rewards/rejected": -0.46964597702026367, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.8043934106826782, |
|
"logits/rejected": -1.106671929359436, |
|
"logps/chosen": -603.4810791015625, |
|
"logps/rejected": -1286.2061767578125, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16138550639152527, |
|
"rewards/margins": 0.31447383761405945, |
|
"rewards/rejected": -0.4758593440055847, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.6394857168197632, |
|
"logits/rejected": -1.1434743404388428, |
|
"logps/chosen": -560.7457885742188, |
|
"logps/rejected": -1270.900390625, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18648633360862732, |
|
"rewards/margins": 0.3022904694080353, |
|
"rewards/rejected": -0.4887767732143402, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.4652307033538818, |
|
"logits/rejected": -0.8056305646896362, |
|
"logps/chosen": -588.2001342773438, |
|
"logps/rejected": -1267.324951171875, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17455193400382996, |
|
"rewards/margins": 0.3139300048351288, |
|
"rewards/rejected": -0.48848190903663635, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.417265772819519, |
|
"logits/rejected": -0.8639974594116211, |
|
"logps/chosen": -566.65234375, |
|
"logps/rejected": -1115.6005859375, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1619497835636139, |
|
"rewards/margins": 0.26331502199172974, |
|
"rewards/rejected": -0.42526477575302124, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.53346848487854, |
|
"logits/rejected": -0.9233430027961731, |
|
"logps/chosen": -593.0252075195312, |
|
"logps/rejected": -1353.1142578125, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17712654173374176, |
|
"rewards/margins": 0.3622104525566101, |
|
"rewards/rejected": -0.5393369793891907, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.4553884267807007, |
|
"logits/rejected": -0.867672324180603, |
|
"logps/chosen": -564.8086547851562, |
|
"logps/rejected": -1151.0306396484375, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18404200673103333, |
|
"rewards/margins": 0.25011754035949707, |
|
"rewards/rejected": -0.4341595768928528, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.5238468647003174, |
|
"logits/rejected": -1.0101631879806519, |
|
"logps/chosen": -620.8792724609375, |
|
"logps/rejected": -1464.6549072265625, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1736445277929306, |
|
"rewards/margins": 0.37397870421409607, |
|
"rewards/rejected": -0.5476232171058655, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.4745080471038818, |
|
"logits/rejected": -1.0695136785507202, |
|
"logps/chosen": -757.6289672851562, |
|
"logps/rejected": -1409.22802734375, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22448639571666718, |
|
"rewards/margins": 0.2988077998161316, |
|
"rewards/rejected": -0.5232942700386047, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.6097753047943115, |
|
"logits/rejected": -1.02309250831604, |
|
"logps/chosen": -702.1261596679688, |
|
"logps/rejected": -1313.4244384765625, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2056199014186859, |
|
"rewards/margins": 0.2749633491039276, |
|
"rewards/rejected": -0.4805833399295807, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.528530478477478, |
|
"logits/rejected": -1.0698211193084717, |
|
"logps/chosen": -605.3930053710938, |
|
"logps/rejected": -1313.823486328125, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1993333399295807, |
|
"rewards/margins": 0.3147328794002533, |
|
"rewards/rejected": -0.514066219329834, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.638108253479004, |
|
"logits/rejected": -0.8232443928718567, |
|
"logps/chosen": -782.85986328125, |
|
"logps/rejected": -1360.0474853515625, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2265842854976654, |
|
"rewards/margins": 0.2933647036552429, |
|
"rewards/rejected": -0.5199490785598755, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.5153647661209106, |
|
"logits/rejected": -0.7482441663742065, |
|
"logps/chosen": -806.5430908203125, |
|
"logps/rejected": -1379.1104736328125, |
|
"loss": 0.1037, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21187452971935272, |
|
"rewards/margins": 0.2870264947414398, |
|
"rewards/rejected": -0.4989010691642761, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.422628402709961, |
|
"logits/rejected": -1.1864253282546997, |
|
"logps/chosen": -631.9682006835938, |
|
"logps/rejected": -1306.8543701171875, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20772810280323029, |
|
"rewards/margins": 0.2739812731742859, |
|
"rewards/rejected": -0.48170939087867737, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.5100951194763184, |
|
"logits/rejected": -0.9938896894454956, |
|
"logps/chosen": -725.32373046875, |
|
"logps/rejected": -1294.2386474609375, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20762935280799866, |
|
"rewards/margins": 0.26330476999282837, |
|
"rewards/rejected": -0.47093409299850464, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.4899585247039795, |
|
"logits/rejected": -0.9078477025032043, |
|
"logps/chosen": -672.1417846679688, |
|
"logps/rejected": -1225.94140625, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17601068317890167, |
|
"rewards/margins": 0.2746294140815735, |
|
"rewards/rejected": -0.4506400525569916, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.6049926280975342, |
|
"logits/rejected": -0.8928203582763672, |
|
"logps/chosen": -690.5973510742188, |
|
"logps/rejected": -1238.628173828125, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20545156300067902, |
|
"rewards/margins": 0.27740758657455444, |
|
"rewards/rejected": -0.48285919427871704, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.3215292692184448, |
|
"logits/rejected": -0.9712142944335938, |
|
"logps/chosen": -696.58349609375, |
|
"logps/rejected": -1317.867431640625, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18639104068279266, |
|
"rewards/margins": 0.28709009289741516, |
|
"rewards/rejected": -0.47348111867904663, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.5110366344451904, |
|
"logits/rejected": -0.9558350443840027, |
|
"logps/chosen": -647.8221435546875, |
|
"logps/rejected": -1398.7818603515625, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1730073243379593, |
|
"rewards/margins": 0.34914129972457886, |
|
"rewards/rejected": -0.522148609161377, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.4418294429779053, |
|
"logits/rejected": -0.9980441331863403, |
|
"logps/chosen": -586.1068115234375, |
|
"logps/rejected": -1378.889892578125, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17401185631752014, |
|
"rewards/margins": 0.3377479314804077, |
|
"rewards/rejected": -0.5117597579956055, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.775757074356079, |
|
"logits/rejected": -1.016351342201233, |
|
"logps/chosen": -715.5010986328125, |
|
"logps/rejected": -1282.7650146484375, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18584254384040833, |
|
"rewards/margins": 0.28814181685447693, |
|
"rewards/rejected": -0.47398439049720764, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.5122634172439575, |
|
"logits/rejected": -0.9780920743942261, |
|
"logps/chosen": -668.7765502929688, |
|
"logps/rejected": -1279.484619140625, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18126599490642548, |
|
"rewards/margins": 0.28931209444999695, |
|
"rewards/rejected": -0.47057804465293884, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.7019774913787842, |
|
"logits/rejected": -0.9764993786811829, |
|
"logps/chosen": -598.6218872070312, |
|
"logps/rejected": -1202.2509765625, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16811831295490265, |
|
"rewards/margins": 0.292553186416626, |
|
"rewards/rejected": -0.4606715142726898, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.589091420173645, |
|
"logits/rejected": -1.0338377952575684, |
|
"logps/chosen": -595.810302734375, |
|
"logps/rejected": -1203.7371826171875, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1645013988018036, |
|
"rewards/margins": 0.29109999537467957, |
|
"rewards/rejected": -0.45560139417648315, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.7127647399902344, |
|
"logits/rejected": -0.8404116630554199, |
|
"logps/chosen": -608.0611572265625, |
|
"logps/rejected": -1101.7435302734375, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16957123577594757, |
|
"rewards/margins": 0.2522638440132141, |
|
"rewards/rejected": -0.4218350946903229, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.3308387994766235, |
|
"logits/rejected": -0.711550772190094, |
|
"logps/chosen": -609.5896606445312, |
|
"logps/rejected": -1312.685302734375, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17916107177734375, |
|
"rewards/margins": 0.2821735739707947, |
|
"rewards/rejected": -0.4613346457481384, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.469405174255371, |
|
"logits/rejected": -0.9255521893501282, |
|
"logps/chosen": -650.4967041015625, |
|
"logps/rejected": -1233.0570068359375, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19022206962108612, |
|
"rewards/margins": 0.2924087345600128, |
|
"rewards/rejected": -0.48263078927993774, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.5496537685394287, |
|
"logits/rejected": -1.0329219102859497, |
|
"logps/chosen": -653.6101684570312, |
|
"logps/rejected": -1362.90478515625, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19138206541538239, |
|
"rewards/margins": 0.3122704029083252, |
|
"rewards/rejected": -0.5036525130271912, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.4933403730392456, |
|
"logits/rejected": -0.8053463101387024, |
|
"logps/chosen": -725.1935424804688, |
|
"logps/rejected": -1382.274169921875, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21708261966705322, |
|
"rewards/margins": 0.3156852126121521, |
|
"rewards/rejected": -0.5327678322792053, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.3949609994888306, |
|
"logits/rejected": -0.9586105346679688, |
|
"logps/chosen": -533.8550415039062, |
|
"logps/rejected": -1206.543212890625, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15743830800056458, |
|
"rewards/margins": 0.2968447208404541, |
|
"rewards/rejected": -0.4542829990386963, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.6258302927017212, |
|
"logits/rejected": -1.0097240209579468, |
|
"logps/chosen": -639.5706176757812, |
|
"logps/rejected": -1331.8486328125, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17417296767234802, |
|
"rewards/margins": 0.3265232443809509, |
|
"rewards/rejected": -0.5006962418556213, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.5256226062774658, |
|
"logits/rejected": -0.8268339037895203, |
|
"logps/chosen": -662.9381103515625, |
|
"logps/rejected": -1204.4327392578125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19520241022109985, |
|
"rewards/margins": 0.27870461344718933, |
|
"rewards/rejected": -0.47390708327293396, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.7607667446136475, |
|
"logits/rejected": -1.1123876571655273, |
|
"logps/chosen": -596.0337524414062, |
|
"logps/rejected": -1304.044921875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14941272139549255, |
|
"rewards/margins": 0.33007779717445374, |
|
"rewards/rejected": -0.47949057817459106, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.6048189401626587, |
|
"logits/rejected": -0.984302818775177, |
|
"logps/chosen": -657.6131591796875, |
|
"logps/rejected": -1466.190185546875, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18197950720787048, |
|
"rewards/margins": 0.34665971994400024, |
|
"rewards/rejected": -0.5286391973495483, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.5646374225616455, |
|
"logits/rejected": -0.8982056379318237, |
|
"logps/chosen": -580.00146484375, |
|
"logps/rejected": -1101.258544921875, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1632545292377472, |
|
"rewards/margins": 0.24513199925422668, |
|
"rewards/rejected": -0.4083865284919739, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.5190411806106567, |
|
"logits/rejected": -1.027479887008667, |
|
"logps/chosen": -595.8035888671875, |
|
"logps/rejected": -1325.04443359375, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17017218470573425, |
|
"rewards/margins": 0.305239737033844, |
|
"rewards/rejected": -0.47541195154190063, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.6967909336090088, |
|
"logits/rejected": -0.9378561973571777, |
|
"logps/chosen": -561.2920532226562, |
|
"logps/rejected": -1203.197021484375, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14759351313114166, |
|
"rewards/margins": 0.31226032972335815, |
|
"rewards/rejected": -0.4598538279533386, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.6613948345184326, |
|
"logits/rejected": -1.1440684795379639, |
|
"logps/chosen": -600.6172485351562, |
|
"logps/rejected": -1154.182861328125, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17384423315525055, |
|
"rewards/margins": 0.26200392842292786, |
|
"rewards/rejected": -0.4358481466770172, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.5972778797149658, |
|
"logits/rejected": -0.801485538482666, |
|
"logps/chosen": -688.7052001953125, |
|
"logps/rejected": -1276.092041015625, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18506471812725067, |
|
"rewards/margins": 0.29814431071281433, |
|
"rewards/rejected": -0.4832090437412262, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.405221700668335, |
|
"logits/rejected": -0.8206748962402344, |
|
"logps/chosen": -693.5679931640625, |
|
"logps/rejected": -1329.2283935546875, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19921013712882996, |
|
"rewards/margins": 0.30057448148727417, |
|
"rewards/rejected": -0.4997846186161041, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.4513555765151978, |
|
"logits/rejected": -0.7634484767913818, |
|
"logps/chosen": -613.1361083984375, |
|
"logps/rejected": -1215.9375, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18391281366348267, |
|
"rewards/margins": 0.2894715964794159, |
|
"rewards/rejected": -0.47338438034057617, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.5481555461883545, |
|
"logits/rejected": -0.75025874376297, |
|
"logps/chosen": -655.292236328125, |
|
"logps/rejected": -1392.68115234375, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18728521466255188, |
|
"rewards/margins": 0.3374475836753845, |
|
"rewards/rejected": -0.524732768535614, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.4917545318603516, |
|
"logits/rejected": -1.0273144245147705, |
|
"logps/chosen": -579.1278686523438, |
|
"logps/rejected": -1233.640869140625, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17262960970401764, |
|
"rewards/margins": 0.2730409801006317, |
|
"rewards/rejected": -0.44567054510116577, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.4177316427230835, |
|
"logits/rejected": -0.9999169111251831, |
|
"logps/chosen": -677.0986328125, |
|
"logps/rejected": -1409.077880859375, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19204583764076233, |
|
"rewards/margins": 0.3291808068752289, |
|
"rewards/rejected": -0.5212266445159912, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.4876052141189575, |
|
"logits/rejected": -0.8847878575325012, |
|
"logps/chosen": -651.7437744140625, |
|
"logps/rejected": -1331.395751953125, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15324924886226654, |
|
"rewards/margins": 0.33165210485458374, |
|
"rewards/rejected": -0.4849013388156891, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.6436065435409546, |
|
"logits/rejected": -0.8282138705253601, |
|
"logps/chosen": -606.6455688476562, |
|
"logps/rejected": -1475.3424072265625, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18834789097309113, |
|
"rewards/margins": 0.36257123947143555, |
|
"rewards/rejected": -0.5509191751480103, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.07734704875151316, |
|
"train_runtime": 15655.3296, |
|
"train_samples_per_second": 0.958, |
|
"train_steps_per_second": 0.24 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|