|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": 0.5981850028038025, |
|
"logits/rejected": 0.7171921730041504, |
|
"logps/chosen": -1114.7255859375, |
|
"logps/rejected": -1146.7080078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": 0.4006383419036865, |
|
"logits/rejected": 0.578837513923645, |
|
"logps/chosen": -1050.73681640625, |
|
"logps/rejected": -1196.98828125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.0012154510477557778, |
|
"rewards/margins": 0.0003596623719204217, |
|
"rewards/rejected": -0.0015751136234030128, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": 0.43995967507362366, |
|
"logits/rejected": 0.6202905774116516, |
|
"logps/chosen": -1097.7890625, |
|
"logps/rejected": -1239.5677490234375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.02275204099714756, |
|
"rewards/margins": 0.004399357363581657, |
|
"rewards/rejected": -0.027151400223374367, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": 0.5573526620864868, |
|
"logits/rejected": 0.613277018070221, |
|
"logps/chosen": -1259.811767578125, |
|
"logps/rejected": -1435.0015869140625, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11753509938716888, |
|
"rewards/margins": 0.0262240469455719, |
|
"rewards/rejected": -0.14375916123390198, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": 0.4959263801574707, |
|
"logits/rejected": 0.6557127237319946, |
|
"logps/chosen": -1347.3275146484375, |
|
"logps/rejected": -1479.0496826171875, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.24454805254936218, |
|
"rewards/margins": 0.0380481593310833, |
|
"rewards/rejected": -0.2825961709022522, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-06, |
|
"logits/chosen": 0.5846071243286133, |
|
"logits/rejected": 0.6737171411514282, |
|
"logps/chosen": -1493.431884765625, |
|
"logps/rejected": -1723.5257568359375, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.36657729744911194, |
|
"rewards/margins": 0.11142341792583466, |
|
"rewards/rejected": -0.4780006408691406, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": 0.5173619985580444, |
|
"logits/rejected": 0.784024715423584, |
|
"logps/chosen": -1601.8973388671875, |
|
"logps/rejected": -1787.753173828125, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.46918588876724243, |
|
"rewards/margins": 0.08332391828298569, |
|
"rewards/rejected": -0.5525098443031311, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7761938666470405e-06, |
|
"logits/chosen": 0.5953202247619629, |
|
"logits/rejected": 0.6078029274940491, |
|
"logps/chosen": -1537.4744873046875, |
|
"logps/rejected": -1700.5455322265625, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41651010513305664, |
|
"rewards/margins": 0.07196612656116486, |
|
"rewards/rejected": -0.4884762167930603, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": 0.5242379307746887, |
|
"logits/rejected": 0.600353479385376, |
|
"logps/chosen": -1465.7352294921875, |
|
"logps/rejected": -1685.4124755859375, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.37152013182640076, |
|
"rewards/margins": 0.09310854226350784, |
|
"rewards/rejected": -0.4646286964416504, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4890613722044526e-06, |
|
"logits/chosen": 0.5765184164047241, |
|
"logits/rejected": 0.651719331741333, |
|
"logps/chosen": -1519.32470703125, |
|
"logps/rejected": -1666.23828125, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.4189775884151459, |
|
"rewards/margins": 0.048159025609493256, |
|
"rewards/rejected": -0.4671366214752197, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": 0.5278979539871216, |
|
"logits/rejected": 0.7525848746299744, |
|
"logps/chosen": -1449.8560791015625, |
|
"logps/rejected": -1631.3978271484375, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3548738658428192, |
|
"rewards/margins": 0.08127720654010773, |
|
"rewards/rejected": -0.43615108728408813, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.102189034962561e-06, |
|
"logits/chosen": 0.6309102773666382, |
|
"logits/rejected": 0.6685208082199097, |
|
"logps/chosen": -1543.900634765625, |
|
"logps/rejected": -1706.1783447265625, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3921688199043274, |
|
"rewards/margins": 0.10156150162220001, |
|
"rewards/rejected": -0.49373024702072144, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": 0.6601932048797607, |
|
"logits/rejected": 0.5677546262741089, |
|
"logps/chosen": -1639.169677734375, |
|
"logps/rejected": -1795.3013916015625, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5095900297164917, |
|
"rewards/margins": 0.09199462831020355, |
|
"rewards/rejected": -0.6015846133232117, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-06, |
|
"logits/chosen": 0.569564700126648, |
|
"logits/rejected": 0.6693593859672546, |
|
"logps/chosen": -1584.231689453125, |
|
"logps/rejected": -1769.0833740234375, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.49371838569641113, |
|
"rewards/margins": 0.08162926137447357, |
|
"rewards/rejected": -0.5753476023674011, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": 0.7315655946731567, |
|
"logits/rejected": 0.6511383652687073, |
|
"logps/chosen": -1529.665771484375, |
|
"logps/rejected": -1825.186767578125, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.4015732705593109, |
|
"rewards/margins": 0.1370248943567276, |
|
"rewards/rejected": -0.5385981798171997, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1108510153447352e-06, |
|
"logits/chosen": 0.5098114013671875, |
|
"logits/rejected": 0.7448014616966248, |
|
"logps/chosen": -1524.7633056640625, |
|
"logps/rejected": -1739.652099609375, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.38486695289611816, |
|
"rewards/margins": 0.09735463559627533, |
|
"rewards/rejected": -0.4822216033935547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": 0.523254930973053, |
|
"logits/rejected": 0.7159970998764038, |
|
"logps/chosen": -1473.3905029296875, |
|
"logps/rejected": -1742.4814453125, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.34661874175071716, |
|
"rewards/margins": 0.12415720522403717, |
|
"rewards/rejected": -0.4707759916782379, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.556095160739513e-06, |
|
"logits/chosen": 0.5594567060470581, |
|
"logits/rejected": 0.7879417538642883, |
|
"logps/chosen": -1441.941650390625, |
|
"logps/rejected": -1688.218994140625, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3497202694416046, |
|
"rewards/margins": 0.10849998146295547, |
|
"rewards/rejected": -0.4582202434539795, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": 0.6102200150489807, |
|
"logits/rejected": 0.7705193758010864, |
|
"logps/chosen": -1447.1510009765625, |
|
"logps/rejected": -1748.801513671875, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.40625113248825073, |
|
"rewards/margins": 0.17269271612167358, |
|
"rewards/rejected": -0.5789438486099243, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9985264605418185e-06, |
|
"logits/chosen": 0.7250276803970337, |
|
"logits/rejected": 0.779090940952301, |
|
"logps/chosen": -1608.396484375, |
|
"logps/rejected": -1889.056640625, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5311942100524902, |
|
"rewards/margins": 0.14003470540046692, |
|
"rewards/rejected": -0.6712288856506348, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.6869342923164368, |
|
"logits/rejected": 0.7512118220329285, |
|
"logps/chosen": -1676.6634521484375, |
|
"logps/rejected": -1916.425048828125, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.5880906581878662, |
|
"rewards/margins": 0.13318488001823425, |
|
"rewards/rejected": -0.7212755680084229, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.466103737583699e-06, |
|
"logits/chosen": 0.6197313070297241, |
|
"logits/rejected": 0.7475897073745728, |
|
"logps/chosen": -1537.823974609375, |
|
"logps/rejected": -1862.232421875, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.4764162600040436, |
|
"rewards/margins": 0.189828023314476, |
|
"rewards/rejected": -0.6662443280220032, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": 0.7158914804458618, |
|
"logits/rejected": 0.8779485821723938, |
|
"logps/chosen": -1476.162841796875, |
|
"logps/rejected": -1704.2161865234375, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.4569624066352844, |
|
"rewards/margins": 0.09420967102050781, |
|
"rewards/rejected": -0.5511720180511475, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-07, |
|
"logits/chosen": 0.6375827193260193, |
|
"logits/rejected": 0.7420727014541626, |
|
"logps/chosen": -1576.697509765625, |
|
"logps/rejected": -1821.4849853515625, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4542946219444275, |
|
"rewards/margins": 0.15300381183624268, |
|
"rewards/rejected": -0.6072984933853149, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": 0.6124081015586853, |
|
"logits/rejected": 0.7668013572692871, |
|
"logps/chosen": -1591.124267578125, |
|
"logps/rejected": -1752.868408203125, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.45945438742637634, |
|
"rewards/margins": 0.08051095902919769, |
|
"rewards/rejected": -0.5399652719497681, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049723e-07, |
|
"logits/chosen": 0.6094003915786743, |
|
"logits/rejected": 0.7548435926437378, |
|
"logps/chosen": -1563.33203125, |
|
"logps/rejected": -1689.942626953125, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.48421138525009155, |
|
"rewards/margins": 0.04744899272918701, |
|
"rewards/rejected": -0.5316604375839233, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": 0.6028648614883423, |
|
"logits/rejected": 0.711211085319519, |
|
"logps/chosen": -1524.207763671875, |
|
"logps/rejected": -1821.697509765625, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4109669625759125, |
|
"rewards/margins": 0.14401301741600037, |
|
"rewards/rejected": -0.5549799799919128, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"logits/chosen": 0.5701503753662109, |
|
"logits/rejected": 0.771028459072113, |
|
"logps/chosen": -1587.4259033203125, |
|
"logps/rejected": -1885.8216552734375, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.4386199116706848, |
|
"rewards/margins": 0.14451175928115845, |
|
"rewards/rejected": -0.5831316709518433, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": 0.5914828181266785, |
|
"logits/rejected": 0.6638094186782837, |
|
"logps/chosen": -1495.4044189453125, |
|
"logps/rejected": -1771.0677490234375, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4058058261871338, |
|
"rewards/margins": 0.1175435408949852, |
|
"rewards/rejected": -0.5233493447303772, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-08, |
|
"logits/chosen": 0.6334878206253052, |
|
"logits/rejected": 0.724925696849823, |
|
"logps/chosen": -1534.744384765625, |
|
"logps/rejected": -1730.7095947265625, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.44486203789711, |
|
"rewards/margins": 0.0799860954284668, |
|
"rewards/rejected": -0.5248481035232544, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": 0.5741454362869263, |
|
"logits/rejected": 0.7404450178146362, |
|
"logps/chosen": -1426.1407470703125, |
|
"logps/rejected": -1655.0103759765625, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.3900262713432312, |
|
"rewards/margins": 0.10863729566335678, |
|
"rewards/rejected": -0.4986635744571686, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336734e-10, |
|
"logits/chosen": 0.603456437587738, |
|
"logits/rejected": 0.8570040464401245, |
|
"logps/chosen": -1530.1478271484375, |
|
"logps/rejected": -1771.2337646484375, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.4091600775718689, |
|
"rewards/margins": 0.11743543297052383, |
|
"rewards/rejected": -0.5265954732894897, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6651641822014099, |
|
"train_runtime": 4241.3739, |
|
"train_samples_per_second": 4.715, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|