|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-06, |
|
"logits/chosen": -2.861464262008667, |
|
"logits/rejected": -2.907951593399048, |
|
"logps/chosen": -323.6517333984375, |
|
"logps/rejected": -284.9451904296875, |
|
"loss": 0.6921, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0027037172112613916, |
|
"rewards/margins": 0.001292458618991077, |
|
"rewards/rejected": 0.0014112575445324183, |
|
"step": 10, |
|
"use_label": 90.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"logits/chosen": -2.7527613639831543, |
|
"logits/rejected": -2.796025037765503, |
|
"logps/chosen": -236.6191864013672, |
|
"logps/rejected": -242.22232055664062, |
|
"loss": 0.6831, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007748906500637531, |
|
"rewards/margins": 0.018490687012672424, |
|
"rewards/rejected": -0.010741781443357468, |
|
"step": 20, |
|
"use_label": 242.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-05, |
|
"logits/chosen": -2.8575313091278076, |
|
"logits/rejected": -2.829209804534912, |
|
"logps/chosen": -278.3554992675781, |
|
"logps/rejected": -252.61123657226562, |
|
"loss": 0.6574, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.026403894647955894, |
|
"rewards/margins": 0.09085250645875931, |
|
"rewards/rejected": -0.06444860994815826, |
|
"step": 30, |
|
"use_label": 402.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"logits/chosen": -2.840946674346924, |
|
"logits/rejected": -2.8493659496307373, |
|
"logps/chosen": -281.32928466796875, |
|
"logps/rejected": -277.8607482910156, |
|
"loss": 0.6339, |
|
"pred_label": 0.4749999940395355, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02641097828745842, |
|
"rewards/margins": 0.2079576551914215, |
|
"rewards/rejected": -0.1815466731786728, |
|
"step": 40, |
|
"use_label": 561.5250244140625 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-05, |
|
"logits/chosen": -2.8537254333496094, |
|
"logits/rejected": -2.8391127586364746, |
|
"logps/chosen": -266.79296875, |
|
"logps/rejected": -262.0001220703125, |
|
"loss": 0.5836, |
|
"pred_label": 5.775000095367432, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06846104562282562, |
|
"rewards/margins": 0.33990827202796936, |
|
"rewards/rejected": -0.4083693027496338, |
|
"step": 50, |
|
"use_label": 716.2249755859375 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-05, |
|
"logits/chosen": -2.8152918815612793, |
|
"logits/rejected": -2.804291009902954, |
|
"logps/chosen": -301.41326904296875, |
|
"logps/rejected": -291.53997802734375, |
|
"loss": 0.5613, |
|
"pred_label": 28.600000381469727, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09980294108390808, |
|
"rewards/margins": 0.4436502456665039, |
|
"rewards/rejected": -0.5434532165527344, |
|
"step": 60, |
|
"use_label": 853.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"logits/chosen": -2.8159656524658203, |
|
"logits/rejected": -2.807382345199585, |
|
"logps/chosen": -295.85113525390625, |
|
"logps/rejected": -281.4297180175781, |
|
"loss": 0.4736, |
|
"pred_label": 72.82499694824219, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15376296639442444, |
|
"rewards/margins": 0.6926594972610474, |
|
"rewards/rejected": -0.8464224934577942, |
|
"step": 70, |
|
"use_label": 969.1749877929688 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-05, |
|
"logits/chosen": -2.760671377182007, |
|
"logits/rejected": -2.745089292526245, |
|
"logps/chosen": -309.682861328125, |
|
"logps/rejected": -294.1726989746094, |
|
"loss": 0.3682, |
|
"pred_label": 129.4499969482422, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4377492070198059, |
|
"rewards/margins": 1.0782606601715088, |
|
"rewards/rejected": -1.516010046005249, |
|
"step": 80, |
|
"use_label": 1072.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875e-05, |
|
"logits/chosen": -2.689037799835205, |
|
"logits/rejected": -2.7456631660461426, |
|
"logps/chosen": -298.6680603027344, |
|
"logps/rejected": -281.3171081542969, |
|
"loss": 0.3626, |
|
"pred_label": 213.02499389648438, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.28849169611930847, |
|
"rewards/margins": 1.2157728672027588, |
|
"rewards/rejected": -1.5042643547058105, |
|
"step": 90, |
|
"use_label": 1148.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922003e-05, |
|
"logits/chosen": -2.722339153289795, |
|
"logits/rejected": -2.718428611755371, |
|
"logps/chosen": -287.2553405761719, |
|
"logps/rejected": -325.00335693359375, |
|
"loss": 0.3168, |
|
"pred_label": 303.125, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3123645782470703, |
|
"rewards/margins": 1.8343286514282227, |
|
"rewards/rejected": -3.146693468093872, |
|
"step": 100, |
|
"use_label": 1218.875 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918509895227008e-05, |
|
"logits/chosen": -2.694249391555786, |
|
"logits/rejected": -2.633723497390747, |
|
"logps/chosen": -288.16387939453125, |
|
"logps/rejected": -293.7809143066406, |
|
"loss": 0.2607, |
|
"pred_label": 402.625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3489272594451904, |
|
"rewards/margins": 2.1118221282958984, |
|
"rewards/rejected": -4.46074914932251, |
|
"step": 110, |
|
"use_label": 1279.375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860302677532014e-05, |
|
"logits/chosen": -2.718721866607666, |
|
"logits/rejected": -2.699587345123291, |
|
"logps/chosen": -292.71112060546875, |
|
"logps/rejected": -279.4311218261719, |
|
"loss": 0.2879, |
|
"pred_label": 507.5, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3198258876800537, |
|
"rewards/margins": 1.854914903640747, |
|
"rewards/rejected": -3.1747405529022217, |
|
"step": 120, |
|
"use_label": 1334.5 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.80209545983702e-05, |
|
"logits/chosen": -2.7755086421966553, |
|
"logits/rejected": -2.7087435722351074, |
|
"logps/chosen": -329.43267822265625, |
|
"logps/rejected": -308.2383728027344, |
|
"loss": 0.2811, |
|
"pred_label": 611.7249755859375, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.216526508331299, |
|
"rewards/margins": 1.2961757183074951, |
|
"rewards/rejected": -3.512702226638794, |
|
"step": 130, |
|
"use_label": 1390.2750244140625 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.743888242142026e-05, |
|
"logits/chosen": -2.6767191886901855, |
|
"logits/rejected": -2.643078327178955, |
|
"logps/chosen": -318.53924560546875, |
|
"logps/rejected": -322.80078125, |
|
"loss": 0.1985, |
|
"pred_label": 719.9749755859375, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -6.043245792388916, |
|
"rewards/margins": 2.734940767288208, |
|
"rewards/rejected": -8.778186798095703, |
|
"step": 140, |
|
"use_label": 1442.0250244140625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.685681024447032e-05, |
|
"logits/chosen": -2.008868932723999, |
|
"logits/rejected": -2.024056911468506, |
|
"logps/chosen": -2590.871337890625, |
|
"logps/rejected": -2381.74951171875, |
|
"loss": 0.037, |
|
"pred_label": 853.2249755859375, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -229.59854125976562, |
|
"rewards/margins": -19.598337173461914, |
|
"rewards/rejected": -210.0001983642578, |
|
"step": 150, |
|
"use_label": 1468.7750244140625 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6274738067520374e-05, |
|
"logits/chosen": -3.4510104656219482, |
|
"logits/rejected": -3.4814345836639404, |
|
"logps/chosen": -5424.06201171875, |
|
"logps/rejected": -4965.0986328125, |
|
"loss": 0.0229, |
|
"pred_label": 1008.5750122070312, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -516.1680297851562, |
|
"rewards/margins": -46.562461853027344, |
|
"rewards/rejected": -469.6055603027344, |
|
"step": 160, |
|
"use_label": 1473.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5692665890570435e-05, |
|
"logits/chosen": -3.6305947303771973, |
|
"logits/rejected": -3.6412110328674316, |
|
"logps/chosen": -5863.26220703125, |
|
"logps/rejected": -4459.16650390625, |
|
"loss": 0.0239, |
|
"pred_label": 1161.25, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -556.6785888671875, |
|
"rewards/margins": -135.7264862060547, |
|
"rewards/rejected": -420.95208740234375, |
|
"step": 170, |
|
"use_label": 1480.75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.511059371362049e-05, |
|
"logits/chosen": -3.826639175415039, |
|
"logits/rejected": -3.826951503753662, |
|
"logps/chosen": -5895.041015625, |
|
"logps/rejected": -5085.115234375, |
|
"loss": 0.021, |
|
"pred_label": 1315.800048828125, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -563.9112548828125, |
|
"rewards/margins": -82.46792602539062, |
|
"rewards/rejected": -481.443359375, |
|
"step": 180, |
|
"use_label": 1486.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.452852153667055e-05, |
|
"logits/chosen": -3.8287880420684814, |
|
"logits/rejected": -3.829810619354248, |
|
"logps/chosen": -6264.6552734375, |
|
"logps/rejected": -4964.57666015625, |
|
"loss": 0.0083, |
|
"pred_label": 1472.0, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -595.7398681640625, |
|
"rewards/margins": -125.386474609375, |
|
"rewards/rejected": -470.3534240722656, |
|
"step": 190, |
|
"use_label": 1490.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.394644935972061e-05, |
|
"logits/chosen": -3.800830364227295, |
|
"logits/rejected": -3.8033287525177, |
|
"logps/chosen": -5603.17431640625, |
|
"logps/rejected": -5157.21826171875, |
|
"loss": 0.0242, |
|
"pred_label": 1629.2249755859375, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -534.1845092773438, |
|
"rewards/margins": -45.28679656982422, |
|
"rewards/rejected": -488.897705078125, |
|
"step": 200, |
|
"use_label": 1492.7750244140625 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336437718277067e-05, |
|
"logits/chosen": -3.7285819053649902, |
|
"logits/rejected": -3.7191810607910156, |
|
"logps/chosen": -6087.337890625, |
|
"logps/rejected": -5075.240234375, |
|
"loss": 0.0165, |
|
"pred_label": 1786.125, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -578.6089477539062, |
|
"rewards/margins": -96.50392150878906, |
|
"rewards/rejected": -482.1050720214844, |
|
"step": 210, |
|
"use_label": 1495.875 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278230500582072e-05, |
|
"logits/chosen": -3.7653274536132812, |
|
"logits/rejected": -3.7663722038269043, |
|
"logps/chosen": -5865.328125, |
|
"logps/rejected": -5630.29248046875, |
|
"loss": 0.0263, |
|
"pred_label": 1942.125, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -559.0337524414062, |
|
"rewards/margins": -23.070148468017578, |
|
"rewards/rejected": -535.9635620117188, |
|
"step": 220, |
|
"use_label": 1499.875 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.220023282887078e-05, |
|
"logits/chosen": -3.8049216270446777, |
|
"logits/rejected": -3.8088595867156982, |
|
"logps/chosen": -6366.97509765625, |
|
"logps/rejected": -5381.87548828125, |
|
"loss": 0.0175, |
|
"pred_label": 2098.27490234375, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -605.8801879882812, |
|
"rewards/margins": -94.10356140136719, |
|
"rewards/rejected": -511.776611328125, |
|
"step": 230, |
|
"use_label": 1503.7249755859375 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.161816065192084e-05, |
|
"logits/chosen": -3.80168080329895, |
|
"logits/rejected": -3.802356243133545, |
|
"logps/chosen": -5398.353515625, |
|
"logps/rejected": -4512.5625, |
|
"loss": 0.0201, |
|
"pred_label": 2253.375, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -512.3775634765625, |
|
"rewards/margins": -83.42332458496094, |
|
"rewards/rejected": -428.95428466796875, |
|
"step": 240, |
|
"use_label": 1508.625 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10360884749709e-05, |
|
"logits/chosen": -3.815431594848633, |
|
"logits/rejected": -3.8156495094299316, |
|
"logps/chosen": -6113.8330078125, |
|
"logps/rejected": -5319.52783203125, |
|
"loss": 0.0204, |
|
"pred_label": 2408.97509765625, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -582.3192138671875, |
|
"rewards/margins": -77.30831146240234, |
|
"rewards/rejected": -505.01092529296875, |
|
"step": 250, |
|
"use_label": 1513.0250244140625 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.045401629802096e-05, |
|
"logits/chosen": -3.8084158897399902, |
|
"logits/rejected": -3.8078300952911377, |
|
"logps/chosen": -5415.3056640625, |
|
"logps/rejected": -4981.9599609375, |
|
"loss": 0.0144, |
|
"pred_label": 2563.925048828125, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -516.6696166992188, |
|
"rewards/margins": -43.502445220947266, |
|
"rewards/rejected": -473.16717529296875, |
|
"step": 260, |
|
"use_label": 1518.074951171875 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9871944121071014e-05, |
|
"logits/chosen": -3.8132598400115967, |
|
"logits/rejected": -3.8127427101135254, |
|
"logps/chosen": -5882.3447265625, |
|
"logps/rejected": -5165.20703125, |
|
"loss": 0.0155, |
|
"pred_label": 2719.97509765625, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -559.0473022460938, |
|
"rewards/margins": -70.0018310546875, |
|
"rewards/rejected": -489.0455017089844, |
|
"step": 270, |
|
"use_label": 1522.0250244140625 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.928987194412107e-05, |
|
"logits/chosen": -3.8188316822052, |
|
"logits/rejected": -3.818444013595581, |
|
"logps/chosen": -5914.48486328125, |
|
"logps/rejected": -5317.22021484375, |
|
"loss": 0.0222, |
|
"pred_label": 2876.02490234375, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -562.0521240234375, |
|
"rewards/margins": -56.552947998046875, |
|
"rewards/rejected": -505.4991760253906, |
|
"step": 280, |
|
"use_label": 1525.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.870779976717113e-05, |
|
"logits/chosen": -3.819366931915283, |
|
"logits/rejected": -3.82012939453125, |
|
"logps/chosen": -5673.76416015625, |
|
"logps/rejected": -4572.4462890625, |
|
"loss": 0.0131, |
|
"pred_label": 3034.27490234375, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -538.0841674804688, |
|
"rewards/margins": -103.86119079589844, |
|
"rewards/rejected": -434.2230529785156, |
|
"step": 290, |
|
"use_label": 1527.7249755859375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022119e-05, |
|
"logits/chosen": -3.801610231399536, |
|
"logits/rejected": -3.802950382232666, |
|
"logps/chosen": -5732.44921875, |
|
"logps/rejected": -4702.1435546875, |
|
"loss": 0.0155, |
|
"pred_label": 3192.824951171875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -546.8770751953125, |
|
"rewards/margins": -100.64713287353516, |
|
"rewards/rejected": -446.2298889160156, |
|
"step": 300, |
|
"use_label": 1529.175048828125 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7543655413271246e-05, |
|
"logits/chosen": -3.7929720878601074, |
|
"logits/rejected": -3.7945361137390137, |
|
"logps/chosen": -5449.23046875, |
|
"logps/rejected": -5404.5537109375, |
|
"loss": 0.0162, |
|
"pred_label": 3350.675048828125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -518.22998046875, |
|
"rewards/margins": -6.628878593444824, |
|
"rewards/rejected": -511.60107421875, |
|
"step": 310, |
|
"use_label": 1531.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.696158323632131e-05, |
|
"logits/chosen": -3.804478883743286, |
|
"logits/rejected": -3.808168411254883, |
|
"logps/chosen": -6255.1689453125, |
|
"logps/rejected": -5367.044921875, |
|
"loss": 0.0127, |
|
"pred_label": 3508.02490234375, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -595.9188842773438, |
|
"rewards/margins": -87.29522705078125, |
|
"rewards/rejected": -508.6236267089844, |
|
"step": 320, |
|
"use_label": 1533.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637951105937136e-05, |
|
"logits/chosen": -3.806224822998047, |
|
"logits/rejected": -3.809751510620117, |
|
"logps/chosen": -5673.6767578125, |
|
"logps/rejected": -4599.72119140625, |
|
"loss": 0.0221, |
|
"pred_label": 3666.10009765625, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -540.2728881835938, |
|
"rewards/margins": -103.72891998291016, |
|
"rewards/rejected": -436.5439453125, |
|
"step": 330, |
|
"use_label": 1535.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.579743888242142e-05, |
|
"logits/chosen": -3.807875871658325, |
|
"logits/rejected": -3.8099751472473145, |
|
"logps/chosen": -5879.23486328125, |
|
"logps/rejected": -4872.8642578125, |
|
"loss": 0.0213, |
|
"pred_label": 3819.85009765625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -559.0055541992188, |
|
"rewards/margins": -95.80415344238281, |
|
"rewards/rejected": -463.20135498046875, |
|
"step": 340, |
|
"use_label": 1542.1500244140625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5215366705471484e-05, |
|
"logits/chosen": -3.8283824920654297, |
|
"logits/rejected": -3.8290863037109375, |
|
"logps/chosen": -6421.64453125, |
|
"logps/rejected": -5712.4833984375, |
|
"loss": 0.0197, |
|
"pred_label": 3974.35009765625, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -612.4450073242188, |
|
"rewards/margins": -68.9627685546875, |
|
"rewards/rejected": -543.482177734375, |
|
"step": 350, |
|
"use_label": 1547.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.463329452852154e-05, |
|
"logits/chosen": -3.8224472999572754, |
|
"logits/rejected": -3.822279691696167, |
|
"logps/chosen": -5800.58251953125, |
|
"logps/rejected": -5399.095703125, |
|
"loss": 0.0133, |
|
"pred_label": 4133.25, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -552.7788696289062, |
|
"rewards/margins": -39.389373779296875, |
|
"rewards/rejected": -513.3894653320312, |
|
"step": 360, |
|
"use_label": 1548.75 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.40512223515716e-05, |
|
"logits/chosen": -3.8213393688201904, |
|
"logits/rejected": -3.8208725452423096, |
|
"logps/chosen": -5875.4296875, |
|
"logps/rejected": -5105.2080078125, |
|
"loss": 0.0144, |
|
"pred_label": 4289.4501953125, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -559.922607421875, |
|
"rewards/margins": -76.97410583496094, |
|
"rewards/rejected": -482.9485778808594, |
|
"step": 370, |
|
"use_label": 1552.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3469150174621654e-05, |
|
"logits/chosen": -3.786717176437378, |
|
"logits/rejected": -3.7882437705993652, |
|
"logps/chosen": -6002.546875, |
|
"logps/rejected": -5331.99560546875, |
|
"loss": 0.0231, |
|
"pred_label": 4444.1748046875, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -571.731689453125, |
|
"rewards/margins": -64.58997344970703, |
|
"rewards/rejected": -507.1416931152344, |
|
"step": 380, |
|
"use_label": 1557.824951171875 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.288707799767171e-05, |
|
"logits/chosen": -3.6485819816589355, |
|
"logits/rejected": -3.6548709869384766, |
|
"logps/chosen": -5633.61083984375, |
|
"logps/rejected": -4738.9384765625, |
|
"loss": 0.0232, |
|
"pred_label": 4600.875, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -535.0819091796875, |
|
"rewards/margins": -85.61624145507812, |
|
"rewards/rejected": -449.46563720703125, |
|
"step": 390, |
|
"use_label": 1561.125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2305005820721776e-05, |
|
"logits/chosen": -3.767920732498169, |
|
"logits/rejected": -3.767390489578247, |
|
"logps/chosen": -6094.14697265625, |
|
"logps/rejected": -5175.2177734375, |
|
"loss": 0.0231, |
|
"pred_label": 4752.77490234375, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -578.7096557617188, |
|
"rewards/margins": -87.12177276611328, |
|
"rewards/rejected": -491.58782958984375, |
|
"step": 400, |
|
"use_label": 1569.2249755859375 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.172293364377183e-05, |
|
"logits/chosen": -3.7445671558380127, |
|
"logits/rejected": -3.754565715789795, |
|
"logps/chosen": -6168.5, |
|
"logps/rejected": -5233.85009765625, |
|
"loss": 0.0123, |
|
"pred_label": 4906.02490234375, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -587.0402221679688, |
|
"rewards/margins": -89.51008605957031, |
|
"rewards/rejected": -497.5301818847656, |
|
"step": 410, |
|
"use_label": 1575.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1140861466821885e-05, |
|
"logits/chosen": -3.766185760498047, |
|
"logits/rejected": -3.764925003051758, |
|
"logps/chosen": -4928.68701171875, |
|
"logps/rejected": -4211.3857421875, |
|
"loss": 0.0286, |
|
"pred_label": 5062.2001953125, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -468.8106994628906, |
|
"rewards/margins": -72.5836410522461, |
|
"rewards/rejected": -396.22705078125, |
|
"step": 420, |
|
"use_label": 1579.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.055878928987195e-05, |
|
"logits/chosen": -3.76971173286438, |
|
"logits/rejected": -3.766024351119995, |
|
"logps/chosen": -5624.13330078125, |
|
"logps/rejected": -5330.14599609375, |
|
"loss": 0.0128, |
|
"pred_label": 5218.125, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -534.6233520507812, |
|
"rewards/margins": -27.560443878173828, |
|
"rewards/rejected": -507.06292724609375, |
|
"step": 430, |
|
"use_label": 1583.875 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9976717112922005e-05, |
|
"logits/chosen": -3.8054771423339844, |
|
"logits/rejected": -3.8054962158203125, |
|
"logps/chosen": -5717.0419921875, |
|
"logps/rejected": -4923.8671875, |
|
"loss": 0.0159, |
|
"pred_label": 5373.875, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -543.377197265625, |
|
"rewards/margins": -76.2901382446289, |
|
"rewards/rejected": -467.0870666503906, |
|
"step": 440, |
|
"use_label": 1588.125 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.939464493597206e-05, |
|
"logits/chosen": -3.7968783378601074, |
|
"logits/rejected": -3.7904553413391113, |
|
"logps/chosen": -4891.21484375, |
|
"logps/rejected": -4621.8271484375, |
|
"loss": 0.0209, |
|
"pred_label": 5531.77490234375, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -466.149658203125, |
|
"rewards/margins": -26.93206787109375, |
|
"rewards/rejected": -439.21759033203125, |
|
"step": 450, |
|
"use_label": 1590.2249755859375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.881257275902212e-05, |
|
"logits/chosen": -3.8137125968933105, |
|
"logits/rejected": -3.8143749237060547, |
|
"logps/chosen": -6517.14404296875, |
|
"logps/rejected": -5308.48095703125, |
|
"loss": 0.0172, |
|
"pred_label": 5688.375, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -621.5343017578125, |
|
"rewards/margins": -117.5860595703125, |
|
"rewards/rejected": -503.9481506347656, |
|
"step": 460, |
|
"use_label": 1593.625 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8230500582072178e-05, |
|
"logits/chosen": -3.7992587089538574, |
|
"logits/rejected": -3.799516201019287, |
|
"logps/chosen": -5745.47314453125, |
|
"logps/rejected": -5189.96923828125, |
|
"loss": 0.0155, |
|
"pred_label": 5845.52490234375, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -547.8372802734375, |
|
"rewards/margins": -54.52460861206055, |
|
"rewards/rejected": -493.3126525878906, |
|
"step": 470, |
|
"use_label": 1596.4749755859375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7648428405122233e-05, |
|
"logits/chosen": -3.761199951171875, |
|
"logits/rejected": -3.7633252143859863, |
|
"logps/chosen": -5170.09765625, |
|
"logps/rejected": -5077.68310546875, |
|
"loss": 0.0168, |
|
"pred_label": 6002.375, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -492.452392578125, |
|
"rewards/margins": -9.535995483398438, |
|
"rewards/rejected": -482.9164123535156, |
|
"step": 480, |
|
"use_label": 1599.625 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7066356228172297e-05, |
|
"logits/chosen": -3.7587084770202637, |
|
"logits/rejected": -3.758279323577881, |
|
"logps/chosen": -5773.9345703125, |
|
"logps/rejected": -4788.09765625, |
|
"loss": 0.0171, |
|
"pred_label": 6158.6748046875, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -550.6905517578125, |
|
"rewards/margins": -95.62019348144531, |
|
"rewards/rejected": -455.0704040527344, |
|
"step": 490, |
|
"use_label": 1603.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6484284051222352e-05, |
|
"logits/chosen": -3.767758846282959, |
|
"logits/rejected": -3.7685482501983643, |
|
"logps/chosen": -6388.5419921875, |
|
"logps/rejected": -5069.38916015625, |
|
"loss": 0.0222, |
|
"pred_label": 6314.52490234375, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -609.9085693359375, |
|
"rewards/margins": -128.79580688476562, |
|
"rewards/rejected": -481.1127014160156, |
|
"step": 500, |
|
"use_label": 1607.4749755859375 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.590221187427241e-05, |
|
"logits/chosen": -3.7820258140563965, |
|
"logits/rejected": -3.784348964691162, |
|
"logps/chosen": -5971.962890625, |
|
"logps/rejected": -4760.34912109375, |
|
"loss": 0.0301, |
|
"pred_label": 6469.9501953125, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -569.66259765625, |
|
"rewards/margins": -116.97715759277344, |
|
"rewards/rejected": -452.6853942871094, |
|
"step": 510, |
|
"use_label": 1612.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.532013969732247e-05, |
|
"logits/chosen": -3.718219041824341, |
|
"logits/rejected": -3.72932767868042, |
|
"logps/chosen": -6069.69580078125, |
|
"logps/rejected": -5217.16015625, |
|
"loss": 0.0223, |
|
"pred_label": 6623.9248046875, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -577.7742309570312, |
|
"rewards/margins": -83.02960205078125, |
|
"rewards/rejected": -494.74456787109375, |
|
"step": 520, |
|
"use_label": 1618.074951171875 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4738067520372525e-05, |
|
"logits/chosen": -3.7202675342559814, |
|
"logits/rejected": -3.7229580879211426, |
|
"logps/chosen": -6532.5537109375, |
|
"logps/rejected": -5770.68359375, |
|
"loss": 0.0095, |
|
"pred_label": 6780.8251953125, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -623.2237548828125, |
|
"rewards/margins": -75.55280303955078, |
|
"rewards/rejected": -547.6709594726562, |
|
"step": 530, |
|
"use_label": 1621.175048828125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4155995343422587e-05, |
|
"logits/chosen": -3.759662628173828, |
|
"logits/rejected": -3.7599411010742188, |
|
"logps/chosen": -6315.06787109375, |
|
"logps/rejected": -5507.916015625, |
|
"loss": 0.01, |
|
"pred_label": 6938.4501953125, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -602.653076171875, |
|
"rewards/margins": -78.45845031738281, |
|
"rewards/rejected": -524.1947021484375, |
|
"step": 540, |
|
"use_label": 1623.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3573923166472644e-05, |
|
"logits/chosen": -3.738492488861084, |
|
"logits/rejected": -3.7378597259521484, |
|
"logps/chosen": -5971.4853515625, |
|
"logps/rejected": -5198.08935546875, |
|
"loss": 0.0129, |
|
"pred_label": 7093.9501953125, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -568.2282104492188, |
|
"rewards/margins": -74.6135025024414, |
|
"rewards/rejected": -493.61468505859375, |
|
"step": 550, |
|
"use_label": 1628.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2991850989522702e-05, |
|
"logits/chosen": -3.794232130050659, |
|
"logits/rejected": -3.793727397918701, |
|
"logps/chosen": -5239.75048828125, |
|
"logps/rejected": -4281.4697265625, |
|
"loss": 0.0136, |
|
"pred_label": 7250.25, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -496.53460693359375, |
|
"rewards/margins": -90.13624572753906, |
|
"rewards/rejected": -406.3983459472656, |
|
"step": 560, |
|
"use_label": 1631.75 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.240977881257276e-05, |
|
"logits/chosen": -3.7495296001434326, |
|
"logits/rejected": -3.7504706382751465, |
|
"logps/chosen": -6018.4404296875, |
|
"logps/rejected": -5286.20751953125, |
|
"loss": 0.0237, |
|
"pred_label": 7407.02490234375, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -574.8952026367188, |
|
"rewards/margins": -72.47772979736328, |
|
"rewards/rejected": -502.41748046875, |
|
"step": 570, |
|
"use_label": 1634.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1827706635622818e-05, |
|
"logits/chosen": -3.7940216064453125, |
|
"logits/rejected": -3.794236421585083, |
|
"logps/chosen": -5965.88134765625, |
|
"logps/rejected": -4998.4501953125, |
|
"loss": 0.0206, |
|
"pred_label": 7561.5498046875, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -568.77734375, |
|
"rewards/margins": -95.3790054321289, |
|
"rewards/rejected": -473.3983459472656, |
|
"step": 580, |
|
"use_label": 1640.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.124563445867288e-05, |
|
"logits/chosen": -3.725088119506836, |
|
"logits/rejected": -3.7297370433807373, |
|
"logps/chosen": -5610.734375, |
|
"logps/rejected": -5206.3388671875, |
|
"loss": 0.0217, |
|
"pred_label": 7717.9248046875, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -533.9136962890625, |
|
"rewards/margins": -41.3577766418457, |
|
"rewards/rejected": -492.555908203125, |
|
"step": 590, |
|
"use_label": 1644.074951171875 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722934e-05, |
|
"logits/chosen": -2.4542346000671387, |
|
"logits/rejected": -2.457996129989624, |
|
"logps/chosen": -5316.2861328125, |
|
"logps/rejected": -4824.51171875, |
|
"loss": 0.0176, |
|
"pred_label": 7873.5498046875, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -500.2923889160156, |
|
"rewards/margins": -48.11725616455078, |
|
"rewards/rejected": -452.1751403808594, |
|
"step": 600, |
|
"use_label": 1648.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0081490104772992e-05, |
|
"logits/chosen": 1.6535043716430664, |
|
"logits/rejected": 1.6919664144515991, |
|
"logps/chosen": -4125.20458984375, |
|
"logps/rejected": -3309.930419921875, |
|
"loss": 0.019, |
|
"pred_label": 8029.0, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -381.9295349121094, |
|
"rewards/margins": -74.89913177490234, |
|
"rewards/rejected": -307.0304260253906, |
|
"step": 610, |
|
"use_label": 1653.0 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9499417927823053e-05, |
|
"logits/chosen": 3.7263665199279785, |
|
"logits/rejected": 3.714616298675537, |
|
"logps/chosen": -5211.14453125, |
|
"logps/rejected": -4633.3828125, |
|
"loss": 0.0148, |
|
"pred_label": 8184.77490234375, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -493.1163024902344, |
|
"rewards/margins": -54.99699783325195, |
|
"rewards/rejected": -438.11932373046875, |
|
"step": 620, |
|
"use_label": 1657.2249755859375 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8917345750873107e-05, |
|
"logits/chosen": 7.3053741455078125, |
|
"logits/rejected": 7.303783416748047, |
|
"logps/chosen": -7381.1630859375, |
|
"logps/rejected": -6444.02734375, |
|
"loss": 0.009, |
|
"pred_label": 8343.1748046875, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -709.6043701171875, |
|
"rewards/margins": -91.3189697265625, |
|
"rewards/rejected": -618.2854614257812, |
|
"step": 630, |
|
"use_label": 1658.824951171875 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.833527357392317e-05, |
|
"logits/chosen": 8.230302810668945, |
|
"logits/rejected": 8.22825813293457, |
|
"logps/chosen": -7595.42724609375, |
|
"logps/rejected": -7036.8515625, |
|
"loss": 0.0111, |
|
"pred_label": 8500.5, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -734.2236328125, |
|
"rewards/margins": -55.39581298828125, |
|
"rewards/rejected": -678.8277587890625, |
|
"step": 640, |
|
"use_label": 1661.5 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7753201396973227e-05, |
|
"logits/chosen": 8.20081901550293, |
|
"logits/rejected": 8.195457458496094, |
|
"logps/chosen": -9194.9013671875, |
|
"logps/rejected": -7898.6552734375, |
|
"loss": 0.0088, |
|
"pred_label": 8658.349609375, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -889.2952880859375, |
|
"rewards/margins": -127.2280044555664, |
|
"rewards/rejected": -762.0673217773438, |
|
"step": 650, |
|
"use_label": 1663.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.717112922002328e-05, |
|
"logits/chosen": 9.882159233093262, |
|
"logits/rejected": 9.892133712768555, |
|
"logps/chosen": -10026.548828125, |
|
"logps/rejected": -8868.25, |
|
"loss": 0.0147, |
|
"pred_label": 8817.3251953125, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -973.1184692382812, |
|
"rewards/margins": -111.28104400634766, |
|
"rewards/rejected": -861.83740234375, |
|
"step": 660, |
|
"use_label": 1664.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6589057043073342e-05, |
|
"logits/chosen": 11.399931907653809, |
|
"logits/rejected": 11.406278610229492, |
|
"logps/chosen": -11008.333984375, |
|
"logps/rejected": -9124.1875, |
|
"loss": 0.0161, |
|
"pred_label": 8974.8251953125, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1072.7261962890625, |
|
"rewards/margins": -184.44720458984375, |
|
"rewards/rejected": -888.2789916992188, |
|
"step": 670, |
|
"use_label": 1667.175048828125 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.60069848661234e-05, |
|
"logits/chosen": 9.982951164245605, |
|
"logits/rejected": 9.928037643432617, |
|
"logps/chosen": -10043.669921875, |
|
"logps/rejected": -9005.763671875, |
|
"loss": 0.0146, |
|
"pred_label": 9133.150390625, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -975.6735229492188, |
|
"rewards/margins": -100.92012023925781, |
|
"rewards/rejected": -874.75341796875, |
|
"step": 680, |
|
"use_label": 1668.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5424912689173458e-05, |
|
"logits/chosen": 3.900209903717041, |
|
"logits/rejected": 3.7533345222473145, |
|
"logps/chosen": -5247.2783203125, |
|
"logps/rejected": -4165.42138671875, |
|
"loss": 0.015, |
|
"pred_label": 9290.625, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -499.68951416015625, |
|
"rewards/margins": -104.0757827758789, |
|
"rewards/rejected": -395.61370849609375, |
|
"step": 690, |
|
"use_label": 1671.375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223516e-05, |
|
"logits/chosen": 3.9459800720214844, |
|
"logits/rejected": 3.741647243499756, |
|
"logps/chosen": -6615.76708984375, |
|
"logps/rejected": -5040.81982421875, |
|
"loss": 0.0158, |
|
"pred_label": 9445.275390625, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -634.0076904296875, |
|
"rewards/margins": -154.7536163330078, |
|
"rewards/rejected": -479.25408935546875, |
|
"step": 700, |
|
"use_label": 1676.7249755859375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4260768335273575e-05, |
|
"logits/chosen": 5.425192832946777, |
|
"logits/rejected": 5.073692321777344, |
|
"logps/chosen": -8362.833984375, |
|
"logps/rejected": -6741.9013671875, |
|
"loss": 0.0127, |
|
"pred_label": 9602.0498046875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -807.3259887695312, |
|
"rewards/margins": -157.26266479492188, |
|
"rewards/rejected": -650.0633544921875, |
|
"step": 710, |
|
"use_label": 1679.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3678696158323633e-05, |
|
"logits/chosen": 10.10822582244873, |
|
"logits/rejected": 10.002889633178711, |
|
"logps/chosen": -10245.421875, |
|
"logps/rejected": -9104.876953125, |
|
"loss": 0.023, |
|
"pred_label": 9759.0751953125, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -997.3968505859375, |
|
"rewards/margins": -112.24949645996094, |
|
"rewards/rejected": -885.1474609375, |
|
"step": 720, |
|
"use_label": 1682.925048828125 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.309662398137369e-05, |
|
"logits/chosen": 10.97143840789795, |
|
"logits/rejected": 10.992796897888184, |
|
"logps/chosen": -10079.0634765625, |
|
"logps/rejected": -8320.7255859375, |
|
"loss": 0.0134, |
|
"pred_label": 9917.2001953125, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -978.1613159179688, |
|
"rewards/margins": -170.25567626953125, |
|
"rewards/rejected": -807.9056396484375, |
|
"step": 730, |
|
"use_label": 1684.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2514551804423749e-05, |
|
"logits/chosen": 12.233144760131836, |
|
"logits/rejected": 12.248846054077148, |
|
"logps/chosen": -12818.298828125, |
|
"logps/rejected": -11287.875, |
|
"loss": 0.0048, |
|
"pred_label": 10076.650390625, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1251.0927734375, |
|
"rewards/margins": -150.9541473388672, |
|
"rewards/rejected": -1100.138671875, |
|
"step": 740, |
|
"use_label": 1685.3499755859375 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1932479627473807e-05, |
|
"logits/chosen": 12.499679565429688, |
|
"logits/rejected": 12.485097885131836, |
|
"logps/chosen": -11923.8232421875, |
|
"logps/rejected": -10479.5771484375, |
|
"loss": 0.0074, |
|
"pred_label": 10235.875, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -1166.3336181640625, |
|
"rewards/margins": -144.428466796875, |
|
"rewards/rejected": -1021.9050903320312, |
|
"step": 750, |
|
"use_label": 1686.125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1350407450523866e-05, |
|
"logits/chosen": 7.414717197418213, |
|
"logits/rejected": 7.40515661239624, |
|
"logps/chosen": -9329.333984375, |
|
"logps/rejected": -8092.07177734375, |
|
"loss": 0.0077, |
|
"pred_label": 10393.625, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -903.3870849609375, |
|
"rewards/margins": -120.39128112792969, |
|
"rewards/rejected": -782.995849609375, |
|
"step": 760, |
|
"use_label": 1688.375 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0768335273573923e-05, |
|
"logits/chosen": 3.0171780586242676, |
|
"logits/rejected": 2.9968318939208984, |
|
"logps/chosen": -6287.14453125, |
|
"logps/rejected": -5580.78515625, |
|
"loss": 0.0133, |
|
"pred_label": 10549.275390625, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -598.4849243164062, |
|
"rewards/margins": -68.8787841796875, |
|
"rewards/rejected": -529.6060791015625, |
|
"step": 770, |
|
"use_label": 1692.7249755859375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0186263096623982e-05, |
|
"logits/chosen": -1.7731034755706787, |
|
"logits/rejected": -1.784906029701233, |
|
"logps/chosen": -4869.2548828125, |
|
"logps/rejected": -4165.048828125, |
|
"loss": 0.0135, |
|
"pred_label": 10705.75, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -458.80615234375, |
|
"rewards/margins": -67.01484680175781, |
|
"rewards/rejected": -391.7913513183594, |
|
"step": 780, |
|
"use_label": 1696.25 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.60419091967404e-06, |
|
"logits/chosen": -0.7930339574813843, |
|
"logits/rejected": -0.8520814180374146, |
|
"logps/chosen": -4772.41162109375, |
|
"logps/rejected": -4426.7998046875, |
|
"loss": 0.0181, |
|
"pred_label": 10861.849609375, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -450.46026611328125, |
|
"rewards/margins": -33.853233337402344, |
|
"rewards/rejected": -416.60699462890625, |
|
"step": 790, |
|
"use_label": 1700.1500244140625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724098e-06, |
|
"logits/chosen": -2.1026828289031982, |
|
"logits/rejected": -2.1392974853515625, |
|
"logps/chosen": -5048.7392578125, |
|
"logps/rejected": -4407.5849609375, |
|
"loss": 0.0168, |
|
"pred_label": 11020.3251953125, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -473.4267578125, |
|
"rewards/margins": -60.274497985839844, |
|
"rewards/rejected": -413.1521911621094, |
|
"step": 800, |
|
"use_label": 1701.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.440046565774158e-06, |
|
"logits/chosen": -1.4834654331207275, |
|
"logits/rejected": -1.5466824769973755, |
|
"logps/chosen": -3907.274169921875, |
|
"logps/rejected": -3107.385986328125, |
|
"loss": 0.014, |
|
"pred_label": 11177.2001953125, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -362.62506103515625, |
|
"rewards/margins": -74.76776885986328, |
|
"rewards/rejected": -287.8572692871094, |
|
"step": 810, |
|
"use_label": 1704.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.857974388824214e-06, |
|
"logits/chosen": -0.9667215347290039, |
|
"logits/rejected": -1.0632926225662231, |
|
"logps/chosen": -3960.51708984375, |
|
"logps/rejected": -3172.82275390625, |
|
"loss": 0.0275, |
|
"pred_label": 11334.25, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -366.71990966796875, |
|
"rewards/margins": -73.59380340576172, |
|
"rewards/rejected": -293.1261291503906, |
|
"step": 820, |
|
"use_label": 1707.75 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.275902211874273e-06, |
|
"logits/chosen": 3.487344264984131, |
|
"logits/rejected": 3.3718509674072266, |
|
"logps/chosen": -5937.5302734375, |
|
"logps/rejected": -6147.76416015625, |
|
"loss": 0.014, |
|
"pred_label": 11490.599609375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -567.4717407226562, |
|
"rewards/margins": 19.284542083740234, |
|
"rewards/rejected": -586.7562255859375, |
|
"step": 830, |
|
"use_label": 1711.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.693830034924331e-06, |
|
"logits/chosen": 11.813470840454102, |
|
"logits/rejected": 11.792594909667969, |
|
"logps/chosen": -11349.767578125, |
|
"logps/rejected": -10712.0576171875, |
|
"loss": 0.01, |
|
"pred_label": 11647.599609375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1108.175048828125, |
|
"rewards/margins": -63.417640686035156, |
|
"rewards/rejected": -1044.7574462890625, |
|
"step": 840, |
|
"use_label": 1714.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.111757857974389e-06, |
|
"logits/chosen": 12.407671928405762, |
|
"logits/rejected": 12.412581443786621, |
|
"logps/chosen": -12044.3359375, |
|
"logps/rejected": -10440.21875, |
|
"loss": 0.0137, |
|
"pred_label": 11806.0498046875, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -1174.930908203125, |
|
"rewards/margins": -155.24510192871094, |
|
"rewards/rejected": -1019.6856689453125, |
|
"step": 850, |
|
"use_label": 1715.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.529685681024447e-06, |
|
"logits/chosen": 13.048141479492188, |
|
"logits/rejected": 13.045463562011719, |
|
"logps/chosen": -12283.849609375, |
|
"logps/rejected": -11249.0595703125, |
|
"loss": 0.0112, |
|
"pred_label": 11965.0, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -1199.896728515625, |
|
"rewards/margins": -102.23991394042969, |
|
"rewards/rejected": -1097.6568603515625, |
|
"step": 860, |
|
"use_label": 1717.0 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.947613504074506e-06, |
|
"logits/chosen": 13.306634902954102, |
|
"logits/rejected": 13.326390266418457, |
|
"logps/chosen": -10968.7822265625, |
|
"logps/rejected": -10457.6435546875, |
|
"loss": 0.0108, |
|
"pred_label": 12123.5751953125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1072.5794677734375, |
|
"rewards/margins": -50.84003448486328, |
|
"rewards/rejected": -1021.7394409179688, |
|
"step": 870, |
|
"use_label": 1718.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3655413271245635e-06, |
|
"logits/chosen": 13.300872802734375, |
|
"logits/rejected": 13.316276550292969, |
|
"logps/chosen": -13030.8095703125, |
|
"logps/rejected": -11216.4794921875, |
|
"loss": 0.0078, |
|
"pred_label": 12279.9501953125, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1274.422119140625, |
|
"rewards/margins": -178.9391632080078, |
|
"rewards/rejected": -1095.4830322265625, |
|
"step": 880, |
|
"use_label": 1722.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7834691501746217e-06, |
|
"logits/chosen": 13.323100090026855, |
|
"logits/rejected": 13.341893196105957, |
|
"logps/chosen": -13646.083984375, |
|
"logps/rejected": -12056.134765625, |
|
"loss": 0.0108, |
|
"pred_label": 12438.625, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -1336.4798583984375, |
|
"rewards/margins": -158.06932067871094, |
|
"rewards/rejected": -1178.41064453125, |
|
"step": 890, |
|
"use_label": 1723.375 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2013969732246805e-06, |
|
"logits/chosen": 13.762173652648926, |
|
"logits/rejected": 13.755559921264648, |
|
"logps/chosen": -13121.990234375, |
|
"logps/rejected": -10966.107421875, |
|
"loss": 0.0193, |
|
"pred_label": 12596.1748046875, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1284.2850341796875, |
|
"rewards/margins": -213.5261688232422, |
|
"rewards/rejected": -1070.7589111328125, |
|
"step": 900, |
|
"use_label": 1725.824951171875 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6193247962747383e-06, |
|
"logits/chosen": 13.810602188110352, |
|
"logits/rejected": 13.802284240722656, |
|
"logps/chosen": -13679.8857421875, |
|
"logps/rejected": -11569.97265625, |
|
"loss": 0.0091, |
|
"pred_label": 12752.150390625, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -1338.3470458984375, |
|
"rewards/margins": -207.6631317138672, |
|
"rewards/rejected": -1130.6839599609375, |
|
"step": 910, |
|
"use_label": 1729.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.037252619324796e-06, |
|
"logits/chosen": 13.879419326782227, |
|
"logits/rejected": 13.855003356933594, |
|
"logps/chosen": -14082.59375, |
|
"logps/rejected": -11603.6435546875, |
|
"loss": 0.0145, |
|
"pred_label": 12909.7998046875, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -1378.5706787109375, |
|
"rewards/margins": -244.1646270751953, |
|
"rewards/rejected": -1134.406005859375, |
|
"step": 920, |
|
"use_label": 1732.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4551804423748545e-06, |
|
"logits/chosen": 13.563482284545898, |
|
"logits/rejected": 13.548616409301758, |
|
"logps/chosen": -13257.4296875, |
|
"logps/rejected": -10279.7294921875, |
|
"loss": 0.0103, |
|
"pred_label": 13067.3251953125, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1298.921630859375, |
|
"rewards/margins": -295.1283264160156, |
|
"rewards/rejected": -1003.79345703125, |
|
"step": 930, |
|
"use_label": 1734.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.731082654249127e-07, |
|
"logits/chosen": 13.876431465148926, |
|
"logits/rejected": 13.86772632598877, |
|
"logps/chosen": -14265.5234375, |
|
"logps/rejected": -11806.12890625, |
|
"loss": 0.0096, |
|
"pred_label": 13226.525390625, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -1397.813232421875, |
|
"rewards/margins": -243.88876342773438, |
|
"rewards/rejected": -1153.92431640625, |
|
"step": 940, |
|
"use_label": 1735.4749755859375 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.910360884749709e-07, |
|
"logits/chosen": 13.791536331176758, |
|
"logits/rejected": 13.786079406738281, |
|
"logps/chosen": -12623.412109375, |
|
"logps/rejected": -11098.677734375, |
|
"loss": 0.0144, |
|
"pred_label": 13384.7998046875, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1235.495849609375, |
|
"rewards/margins": -150.9226837158203, |
|
"rewards/rejected": -1084.572998046875, |
|
"step": 950, |
|
"use_label": 1737.199951171875 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": 13.824411392211914, |
|
"eval_logits/rejected": 13.813151359558105, |
|
"eval_logps/chosen": -13722.0166015625, |
|
"eval_logps/rejected": -11596.5400390625, |
|
"eval_loss": 0.011624496430158615, |
|
"eval_pred_label": 13789.83984375, |
|
"eval_rewards/accuracies": 0.4740000069141388, |
|
"eval_rewards/chosen": -1343.776123046875, |
|
"eval_rewards/margins": -210.05210876464844, |
|
"eval_rewards/rejected": -1133.72412109375, |
|
"eval_runtime": 449.9968, |
|
"eval_samples_per_second": 4.444, |
|
"eval_steps_per_second": 0.278, |
|
"eval_use_label": 1742.1600341796875, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08065580570807007, |
|
"train_runtime": 25025.0638, |
|
"train_samples_per_second": 2.443, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|