|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 24.862652137264853, |
|
"learning_rate": 1.25e-08, |
|
"logits/chosen": -0.5811702013015747, |
|
"logits/rejected": -0.11655431985855103, |
|
"logps/chosen": -351.5902099609375, |
|
"logps/rejected": -240.969970703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 23.69292682023629, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 0.26120826601982117, |
|
"logits/rejected": 0.23706814646720886, |
|
"logps/chosen": -333.1805419921875, |
|
"logps/rejected": -244.67898559570312, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5173611044883728, |
|
"rewards/chosen": 0.0021614907309412956, |
|
"rewards/margins": 0.0021554920822381973, |
|
"rewards/rejected": 5.998538654239383e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 18.203526649945516, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.017204787582159042, |
|
"logits/rejected": 0.1991611272096634, |
|
"logps/chosen": -320.430908203125, |
|
"logps/rejected": -234.376220703125, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.033605434000492096, |
|
"rewards/margins": 0.04716432839632034, |
|
"rewards/rejected": -0.01355889905244112, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.096989474079606, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.2575300931930542, |
|
"logits/rejected": -0.4580558240413666, |
|
"logps/chosen": -300.87896728515625, |
|
"logps/rejected": -255.5655517578125, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.14600001275539398, |
|
"rewards/margins": 0.2884979844093323, |
|
"rewards/rejected": -0.14249801635742188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.68944337059453, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.6759181022644043, |
|
"logits/rejected": -0.6345951557159424, |
|
"logps/chosen": -317.50872802734375, |
|
"logps/rejected": -302.39630126953125, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": 0.07102981209754944, |
|
"rewards/margins": 0.6418195366859436, |
|
"rewards/rejected": -0.5707896947860718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 13.329379682299182, |
|
"learning_rate": 4.990486745229364e-07, |
|
"logits/chosen": -0.12263472378253937, |
|
"logits/rejected": 0.44540151953697205, |
|
"logps/chosen": -374.64556884765625, |
|
"logps/rejected": -388.1717224121094, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4549541473388672, |
|
"rewards/margins": 1.0250240564346313, |
|
"rewards/rejected": -1.4799782037734985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 17.333516248641253, |
|
"learning_rate": 4.96201938253052e-07, |
|
"logits/chosen": -0.30300790071487427, |
|
"logits/rejected": 0.3122316002845764, |
|
"logps/chosen": -394.78106689453125, |
|
"logps/rejected": -432.4813537597656, |
|
"loss": 0.3861, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.7015730142593384, |
|
"rewards/margins": 1.1719900369644165, |
|
"rewards/rejected": -1.8735630512237549, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 15.677534908750197, |
|
"learning_rate": 4.91481456572267e-07, |
|
"logits/chosen": 0.7395630478858948, |
|
"logits/rejected": 1.5376254320144653, |
|
"logps/chosen": -425.17236328125, |
|
"logps/rejected": -448.2694396972656, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.8609533309936523, |
|
"rewards/margins": 1.3486477136611938, |
|
"rewards/rejected": -2.2096011638641357, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 17.182808543364636, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 2.598942995071411, |
|
"logits/rejected": 3.4538092613220215, |
|
"logps/chosen": -448.8929748535156, |
|
"logps/rejected": -540.0630493164062, |
|
"loss": 0.3215, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.3736767768859863, |
|
"rewards/margins": 1.7528272867202759, |
|
"rewards/rejected": -3.126504421234131, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 16.648755569621386, |
|
"learning_rate": 4.7657694675916247e-07, |
|
"logits/chosen": 2.8463895320892334, |
|
"logits/rejected": 3.732513427734375, |
|
"logps/chosen": -496.74005126953125, |
|
"logps/rejected": -623.58984375, |
|
"loss": 0.3048, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -1.7601783275604248, |
|
"rewards/margins": 1.9939384460449219, |
|
"rewards/rejected": -3.7541167736053467, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 15.972608527062494, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"logits/chosen": 2.0133347511291504, |
|
"logits/rejected": 3.3279690742492676, |
|
"logps/chosen": -554.5970458984375, |
|
"logps/rejected": -683.0777587890625, |
|
"loss": 0.2797, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -2.21871018409729, |
|
"rewards/margins": 2.000453233718872, |
|
"rewards/rejected": -4.219162940979004, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 16.95927334748175, |
|
"learning_rate": 4.5478801107224794e-07, |
|
"logits/chosen": 2.1293346881866455, |
|
"logits/rejected": 3.9433817863464355, |
|
"logps/chosen": -545.55078125, |
|
"logps/rejected": -698.3030395507812, |
|
"loss": 0.2718, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.248697280883789, |
|
"rewards/margins": 2.459144353866577, |
|
"rewards/rejected": -4.707841873168945, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 15.769838259410646, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 2.2328364849090576, |
|
"logits/rejected": 3.943868637084961, |
|
"logps/chosen": -547.4822998046875, |
|
"logps/rejected": -709.2218017578125, |
|
"loss": 0.2597, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -2.386432409286499, |
|
"rewards/margins": 2.306048631668091, |
|
"rewards/rejected": -4.692481517791748, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 16.240997635455848, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": 3.3713316917419434, |
|
"logits/rejected": 4.970644950866699, |
|
"logps/chosen": -669.5197143554688, |
|
"logps/rejected": -839.8416748046875, |
|
"loss": 0.2523, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.3710944652557373, |
|
"rewards/margins": 2.5790421962738037, |
|
"rewards/rejected": -5.950136661529541, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 16.664869807154886, |
|
"learning_rate": 4.106969024216348e-07, |
|
"logits/chosen": 3.0220611095428467, |
|
"logits/rejected": 4.610594749450684, |
|
"logps/chosen": -647.0032958984375, |
|
"logps/rejected": -834.1439208984375, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.284292221069336, |
|
"rewards/margins": 2.7818052768707275, |
|
"rewards/rejected": -6.066097259521484, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 17.103959159416473, |
|
"learning_rate": 3.933941090877615e-07, |
|
"logits/chosen": 1.9788957834243774, |
|
"logits/rejected": 3.797266721725464, |
|
"logps/chosen": -657.1544799804688, |
|
"logps/rejected": -866.92236328125, |
|
"loss": 0.2465, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.3205840587615967, |
|
"rewards/margins": 2.8848683834075928, |
|
"rewards/rejected": -6.205452919006348, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 22.71759647433438, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 2.598877429962158, |
|
"logits/rejected": 3.922821044921875, |
|
"logps/chosen": -650.6119995117188, |
|
"logps/rejected": -860.2496337890625, |
|
"loss": 0.2424, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.585509777069092, |
|
"rewards/margins": 2.7439045906066895, |
|
"rewards/rejected": -6.3294148445129395, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 17.154680074008297, |
|
"learning_rate": 3.5565456543517485e-07, |
|
"logits/chosen": 1.2129310369491577, |
|
"logits/rejected": 3.644993543624878, |
|
"logps/chosen": -650.40576171875, |
|
"logps/rejected": -869.5897216796875, |
|
"loss": 0.245, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -3.348802089691162, |
|
"rewards/margins": 3.0448169708251953, |
|
"rewards/rejected": -6.393619537353516, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 15.288786440112402, |
|
"learning_rate": 3.355050358314172e-07, |
|
"logits/chosen": 2.0979018211364746, |
|
"logits/rejected": 3.6165339946746826, |
|
"logps/chosen": -733.31298828125, |
|
"logps/rejected": -946.0720825195312, |
|
"loss": 0.225, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -3.9488494396209717, |
|
"rewards/margins": 3.014504909515381, |
|
"rewards/rejected": -6.963354587554932, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 15.20089211524797, |
|
"learning_rate": 3.147047612756302e-07, |
|
"logits/chosen": 1.049578309059143, |
|
"logits/rejected": 3.2230868339538574, |
|
"logps/chosen": -655.8287963867188, |
|
"logps/rejected": -909.56787109375, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.187329053878784, |
|
"rewards/margins": 3.4261791706085205, |
|
"rewards/rejected": -6.613508701324463, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 19.21517389497067, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 2.0917961597442627, |
|
"logits/rejected": 4.381856918334961, |
|
"logps/chosen": -707.9210205078125, |
|
"logps/rejected": -967.8511962890625, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.029782772064209, |
|
"rewards/margins": 3.380286455154419, |
|
"rewards/rejected": -7.410069465637207, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 17.876619392703006, |
|
"learning_rate": 2.717889356869146e-07, |
|
"logits/chosen": 2.075894832611084, |
|
"logits/rejected": 3.812873363494873, |
|
"logps/chosen": -664.9110717773438, |
|
"logps/rejected": -898.7711791992188, |
|
"loss": 0.2335, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.6399245262145996, |
|
"rewards/margins": 3.0123069286346436, |
|
"rewards/rejected": -6.652230739593506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 16.42311250323521, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 2.077141523361206, |
|
"logits/rejected": 4.0336527824401855, |
|
"logps/chosen": -714.0510864257812, |
|
"logps/rejected": -951.3372802734375, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.000287055969238, |
|
"rewards/margins": 3.160945415496826, |
|
"rewards/rejected": -7.161231994628906, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 21.86260854020408, |
|
"learning_rate": 2.2821106431308543e-07, |
|
"logits/chosen": 1.8970081806182861, |
|
"logits/rejected": 3.8517441749572754, |
|
"logps/chosen": -711.104248046875, |
|
"logps/rejected": -952.9786987304688, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.102777481079102, |
|
"rewards/margins": 3.2044379711151123, |
|
"rewards/rejected": -7.307215213775635, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 17.415535830140726, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 1.93063485622406, |
|
"logits/rejected": 3.716691255569458, |
|
"logps/chosen": -733.0238037109375, |
|
"logps/rejected": -995.0330200195312, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -4.205197811126709, |
|
"rewards/margins": 3.2796833515167236, |
|
"rewards/rejected": -7.4848809242248535, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 23.48694643420195, |
|
"learning_rate": 1.8529523872436977e-07, |
|
"logits/chosen": 1.754500150680542, |
|
"logits/rejected": 3.7942306995391846, |
|
"logps/chosen": -756.137939453125, |
|
"logps/rejected": -1002.9494018554688, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.309305667877197, |
|
"rewards/margins": 3.231421947479248, |
|
"rewards/rejected": -7.5407280921936035, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 16.97115932824073, |
|
"learning_rate": 1.6449496416858282e-07, |
|
"logits/chosen": 2.1520519256591797, |
|
"logits/rejected": 4.258932590484619, |
|
"logps/chosen": -750.8233642578125, |
|
"logps/rejected": -1022.2374267578125, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.413332939147949, |
|
"rewards/margins": 3.5410499572753906, |
|
"rewards/rejected": -7.954381465911865, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 20.50346815073402, |
|
"learning_rate": 1.4434543456482518e-07, |
|
"logits/chosen": 1.4445512294769287, |
|
"logits/rejected": 3.574235439300537, |
|
"logps/chosen": -682.8297729492188, |
|
"logps/rejected": -979.3341674804688, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -3.6296730041503906, |
|
"rewards/margins": 3.7298316955566406, |
|
"rewards/rejected": -7.359505653381348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 16.82073433691609, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 1.172753930091858, |
|
"logits/rejected": 3.4942619800567627, |
|
"logps/chosen": -675.3418579101562, |
|
"logps/rejected": -974.0270385742188, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -3.6095943450927734, |
|
"rewards/margins": 3.8261497020721436, |
|
"rewards/rejected": -7.435744285583496, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 27.148115499609514, |
|
"learning_rate": 1.0660589091223854e-07, |
|
"logits/chosen": 1.3045436143875122, |
|
"logits/rejected": 3.874147891998291, |
|
"logps/chosen": -724.1644287109375, |
|
"logps/rejected": -1020.5606689453125, |
|
"loss": 0.2159, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.9626336097717285, |
|
"rewards/margins": 3.8746466636657715, |
|
"rewards/rejected": -7.837281227111816, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 18.896563678409045, |
|
"learning_rate": 8.930309757836516e-08, |
|
"logits/chosen": 1.5070204734802246, |
|
"logits/rejected": 3.8179619312286377, |
|
"logps/chosen": -756.2379150390625, |
|
"logps/rejected": -1010.8900146484375, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.342662811279297, |
|
"rewards/margins": 3.381243944168091, |
|
"rewards/rejected": -7.72390604019165, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 18.302278636631012, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": 2.013995885848999, |
|
"logits/rejected": 4.006863117218018, |
|
"logps/chosen": -755.84326171875, |
|
"logps/rejected": -1039.4290771484375, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.390773296356201, |
|
"rewards/margins": 3.617499589920044, |
|
"rewards/rejected": -8.008273124694824, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 19.269057964941258, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 1.4602447748184204, |
|
"logits/rejected": 3.709857940673828, |
|
"logps/chosen": -771.56640625, |
|
"logps/rejected": -1046.9013671875, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -4.392203330993652, |
|
"rewards/margins": 3.6757659912109375, |
|
"rewards/rejected": -8.067970275878906, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 16.212116381856944, |
|
"learning_rate": 4.521198892775202e-08, |
|
"logits/chosen": 1.5877026319503784, |
|
"logits/rejected": 3.5275306701660156, |
|
"logps/chosen": -748.9736328125, |
|
"logps/rejected": -1030.6241455078125, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -4.198099613189697, |
|
"rewards/margins": 3.724585771560669, |
|
"rewards/rejected": -7.922685146331787, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 23.048514547738275, |
|
"learning_rate": 3.349364905389032e-08, |
|
"logits/chosen": 1.2227389812469482, |
|
"logits/rejected": 3.192277193069458, |
|
"logps/chosen": -744.6568603515625, |
|
"logps/rejected": -1009.3792724609375, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.285494804382324, |
|
"rewards/margins": 3.405372142791748, |
|
"rewards/rejected": -7.690866947174072, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 18.4900810885199, |
|
"learning_rate": 2.3423053240837514e-08, |
|
"logits/chosen": 1.2598426342010498, |
|
"logits/rejected": 3.358072280883789, |
|
"logps/chosen": -737.2872314453125, |
|
"logps/rejected": -1013.8024291992188, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -4.232865333557129, |
|
"rewards/margins": 3.510840892791748, |
|
"rewards/rejected": -7.743706703186035, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.48305540217272, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 1.188299298286438, |
|
"logits/rejected": 3.3616530895233154, |
|
"logps/chosen": -726.2432861328125, |
|
"logps/rejected": -1018.1263427734375, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.075113773345947, |
|
"rewards/margins": 3.6948330402374268, |
|
"rewards/rejected": -7.769946098327637, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 20.27854050236199, |
|
"learning_rate": 8.518543427732949e-09, |
|
"logits/chosen": 1.1218559741973877, |
|
"logits/rejected": 3.376429319381714, |
|
"logps/chosen": -706.4921875, |
|
"logps/rejected": -1009.96533203125, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -4.078927516937256, |
|
"rewards/margins": 3.7843894958496094, |
|
"rewards/rejected": -7.863317966461182, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 21.44365501017452, |
|
"learning_rate": 3.798061746947995e-09, |
|
"logits/chosen": 1.2741193771362305, |
|
"logits/rejected": 3.5388190746307373, |
|
"logps/chosen": -727.7400512695312, |
|
"logps/rejected": -996.4246215820312, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -4.139595985412598, |
|
"rewards/margins": 3.5642218589782715, |
|
"rewards/rejected": -7.703817844390869, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 19.980658149789488, |
|
"learning_rate": 9.513254770636137e-10, |
|
"logits/chosen": 1.3589586019515991, |
|
"logits/rejected": 3.3931171894073486, |
|
"logps/chosen": -751.2428588867188, |
|
"logps/rejected": -1019.2142333984375, |
|
"loss": 0.2117, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.217888832092285, |
|
"rewards/margins": 3.668452739715576, |
|
"rewards/rejected": -7.8863420486450195, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 19.223012927780225, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 1.3647325038909912, |
|
"logits/rejected": 3.651308536529541, |
|
"logps/chosen": -756.8760986328125, |
|
"logps/rejected": -1033.67333984375, |
|
"loss": 0.2003, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.446074485778809, |
|
"rewards/margins": 3.6355972290039062, |
|
"rewards/rejected": -8.081671714782715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 400, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2785977178812027, |
|
"train_runtime": 11929.9898, |
|
"train_samples_per_second": 8.58, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|