|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.79678467461835, |
|
"learning_rate": 1.923076923076923e-08, |
|
"logits/chosen": -0.5216625928878784, |
|
"logits/rejected": -1.6251907348632812, |
|
"logps/chosen": -339.42877197265625, |
|
"logps/rejected": -263.98431396484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 17.217350690399922, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"logits/chosen": -1.7395856380462646, |
|
"logits/rejected": -1.1197137832641602, |
|
"logps/chosen": -194.7418212890625, |
|
"logps/rejected": -322.9896240234375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.0007130156154744327, |
|
"rewards/margins": 0.0009009492350742221, |
|
"rewards/rejected": -0.00018793345952872187, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 17.755504451917034, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.3529917001724243, |
|
"logits/rejected": -1.1581436395645142, |
|
"logps/chosen": -250.0107879638672, |
|
"logps/rejected": -309.7054138183594, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.003061536233872175, |
|
"rewards/margins": 0.0077318595722317696, |
|
"rewards/rejected": -0.004670322872698307, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 15.809698317809115, |
|
"learning_rate": 4.99613632163459e-07, |
|
"logits/chosen": -1.445340633392334, |
|
"logits/rejected": -0.8237818479537964, |
|
"logps/chosen": -262.8525695800781, |
|
"logps/rejected": -319.0481872558594, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.011689678765833378, |
|
"rewards/margins": 0.03540501371026039, |
|
"rewards/rejected": -0.02371532842516899, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 17.829217597278124, |
|
"learning_rate": 4.952806974561517e-07, |
|
"logits/chosen": -1.3351449966430664, |
|
"logits/rejected": -0.8461858034133911, |
|
"logps/chosen": -231.9071807861328, |
|
"logps/rejected": -339.33740234375, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.006707000080496073, |
|
"rewards/margins": 0.18326039612293243, |
|
"rewards/rejected": -0.1765533834695816, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 20.372527502778215, |
|
"learning_rate": 4.862157403595598e-07, |
|
"logits/chosen": -1.0431455373764038, |
|
"logits/rejected": -0.7218812704086304, |
|
"logps/chosen": -242.2100372314453, |
|
"logps/rejected": -352.3864440917969, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.04869229719042778, |
|
"rewards/margins": 0.5344938039779663, |
|
"rewards/rejected": -0.5831860303878784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 24.684374727803654, |
|
"learning_rate": 4.725936445085709e-07, |
|
"logits/chosen": -0.7200717329978943, |
|
"logits/rejected": -0.5803043842315674, |
|
"logps/chosen": -277.9634094238281, |
|
"logps/rejected": -517.9424438476562, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.17333514988422394, |
|
"rewards/margins": 1.385983943939209, |
|
"rewards/rejected": -1.559319019317627, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 22.14152988454074, |
|
"learning_rate": 4.5467721110696685e-07, |
|
"logits/chosen": -0.515616774559021, |
|
"logits/rejected": -0.04089225083589554, |
|
"logps/chosen": -258.40606689453125, |
|
"logps/rejected": -408.2468566894531, |
|
"loss": 0.4328, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.24228188395500183, |
|
"rewards/margins": 1.2516155242919922, |
|
"rewards/rejected": -1.493897557258606, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 18.065752819849052, |
|
"learning_rate": 4.328120888946271e-07, |
|
"logits/chosen": -0.7557204961776733, |
|
"logits/rejected": -0.010405841283500195, |
|
"logps/chosen": -258.38092041015625, |
|
"logps/rejected": -523.7661743164062, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.33308929204940796, |
|
"rewards/margins": 2.278934955596924, |
|
"rewards/rejected": -2.6120240688323975, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 15.734543008245188, |
|
"learning_rate": 4.074201057973785e-07, |
|
"logits/chosen": -1.038962721824646, |
|
"logits/rejected": 0.15530693531036377, |
|
"logps/chosen": -266.2573547363281, |
|
"logps/rejected": -538.8092041015625, |
|
"loss": 0.3708, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.19907574355602264, |
|
"rewards/margins": 2.0345568656921387, |
|
"rewards/rejected": -2.2336325645446777, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 24.698688306800403, |
|
"learning_rate": 3.789911309071252e-07, |
|
"logits/chosen": -0.2497592717409134, |
|
"logits/rejected": 0.24183444678783417, |
|
"logps/chosen": -277.8612365722656, |
|
"logps/rejected": -582.8795776367188, |
|
"loss": 0.34, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.30463331937789917, |
|
"rewards/margins": 2.518612861633301, |
|
"rewards/rejected": -2.823246479034424, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.5214157104492188, |
|
"eval_logits/rejected": 0.016294823959469795, |
|
"eval_logps/chosen": -229.1130828857422, |
|
"eval_logps/rejected": -291.0631408691406, |
|
"eval_loss": 0.5715546011924744, |
|
"eval_rewards/accuracies": 0.7202380895614624, |
|
"eval_rewards/chosen": -0.2893332839012146, |
|
"eval_rewards/margins": 0.37313312292099, |
|
"eval_rewards/rejected": -0.6624664068222046, |
|
"eval_runtime": 410.8502, |
|
"eval_samples_per_second": 9.789, |
|
"eval_steps_per_second": 0.307, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 20.355882133570365, |
|
"learning_rate": 3.4807362379317026e-07, |
|
"logits/chosen": -0.35275131464004517, |
|
"logits/rejected": 0.4792235493659973, |
|
"logps/chosen": -295.2680358886719, |
|
"logps/rejected": -693.8294677734375, |
|
"loss": 0.3238, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.3359094262123108, |
|
"rewards/margins": 3.903926372528076, |
|
"rewards/rejected": -4.239835739135742, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 21.727867963945446, |
|
"learning_rate": 3.152640534699994e-07, |
|
"logits/chosen": -0.5010538697242737, |
|
"logits/rejected": 0.6018115878105164, |
|
"logps/chosen": -291.17840576171875, |
|
"logps/rejected": -668.4251098632812, |
|
"loss": 0.3106, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.3278064429759979, |
|
"rewards/margins": 3.2680602073669434, |
|
"rewards/rejected": -3.595867156982422, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 37.26634759168863, |
|
"learning_rate": 2.811953911537022e-07, |
|
"logits/chosen": -0.57627934217453, |
|
"logits/rejected": 0.6331204175949097, |
|
"logps/chosen": -227.9058837890625, |
|
"logps/rejected": -711.4694213867188, |
|
"loss": 0.299, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.33088651299476624, |
|
"rewards/margins": 3.8740921020507812, |
|
"rewards/rejected": -4.204977989196777, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 38.01185078661505, |
|
"learning_rate": 2.4652489880792125e-07, |
|
"logits/chosen": 0.14344072341918945, |
|
"logits/rejected": 0.4568979740142822, |
|
"logps/chosen": -297.4254455566406, |
|
"logps/rejected": -691.5115356445312, |
|
"loss": 0.2841, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.29703259468078613, |
|
"rewards/margins": 3.58467173576355, |
|
"rewards/rejected": -3.881704330444336, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 23.175417086747803, |
|
"learning_rate": 2.1192144906604874e-07, |
|
"logits/chosen": -0.35931748151779175, |
|
"logits/rejected": 0.38914966583251953, |
|
"logps/chosen": -250.97146606445312, |
|
"logps/rejected": -848.0675659179688, |
|
"loss": 0.2553, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.24952073395252228, |
|
"rewards/margins": 5.0541534423828125, |
|
"rewards/rejected": -5.303674221038818, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 20.4769087360566, |
|
"learning_rate": 1.780526211572016e-07, |
|
"logits/chosen": -0.17432162165641785, |
|
"logits/rejected": 0.5454779267311096, |
|
"logps/chosen": -261.4326477050781, |
|
"logps/rejected": -773.8187866210938, |
|
"loss": 0.2334, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.21171124279499054, |
|
"rewards/margins": 4.594513416290283, |
|
"rewards/rejected": -4.806224822998047, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 37.16207244417959, |
|
"learning_rate": 1.4557182178490635e-07, |
|
"logits/chosen": -0.05513007566332817, |
|
"logits/rejected": 0.6765660047531128, |
|
"logps/chosen": -286.18804931640625, |
|
"logps/rejected": -864.0691528320312, |
|
"loss": 0.2465, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3841165602207184, |
|
"rewards/margins": 5.005141258239746, |
|
"rewards/rejected": -5.389257431030273, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 18.25462627863088, |
|
"learning_rate": 1.1510567942602889e-07, |
|
"logits/chosen": -0.39599961042404175, |
|
"logits/rejected": 0.6781516075134277, |
|
"logps/chosen": -263.38983154296875, |
|
"logps/rejected": -744.1962280273438, |
|
"loss": 0.2155, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.31235271692276, |
|
"rewards/margins": 4.253737926483154, |
|
"rewards/rejected": -4.5660905838012695, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 41.13316289752075, |
|
"learning_rate": 8.724195524258688e-08, |
|
"logits/chosen": -0.2813408672809601, |
|
"logits/rejected": 0.8199517130851746, |
|
"logps/chosen": -277.7645263671875, |
|
"logps/rejected": -847.7828369140625, |
|
"loss": 0.2412, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.3673214316368103, |
|
"rewards/margins": 5.06696081161499, |
|
"rewards/rejected": -5.434282302856445, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 29.643198990740647, |
|
"learning_rate": 6.251820383244468e-08, |
|
"logits/chosen": -0.14953655004501343, |
|
"logits/rejected": 0.8468856811523438, |
|
"logps/chosen": -270.6844177246094, |
|
"logps/rejected": -735.6868286132812, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2686000466346741, |
|
"rewards/margins": 4.150389194488525, |
|
"rewards/rejected": -4.418989658355713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.359634280204773, |
|
"eval_logits/rejected": 0.4307384192943573, |
|
"eval_logps/chosen": -248.32972717285156, |
|
"eval_logps/rejected": -347.9086608886719, |
|
"eval_loss": 0.5082926750183105, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.48149970173835754, |
|
"eval_rewards/margins": 0.749422013759613, |
|
"eval_rewards/rejected": -1.2309216260910034, |
|
"eval_runtime": 409.9823, |
|
"eval_samples_per_second": 9.81, |
|
"eval_steps_per_second": 0.307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 18.630491734171713, |
|
"learning_rate": 4.141140257879319e-08, |
|
"logits/chosen": 0.14813140034675598, |
|
"logits/rejected": 0.9779118299484253, |
|
"logps/chosen": -279.84320068359375, |
|
"logps/rejected": -633.3928833007812, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.1574528068304062, |
|
"rewards/margins": 3.489206314086914, |
|
"rewards/rejected": -3.6466591358184814, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 28.497946746927017, |
|
"learning_rate": 2.4328749671846117e-08, |
|
"logits/chosen": -0.18863503634929657, |
|
"logits/rejected": 0.7909995317459106, |
|
"logps/chosen": -280.1761779785156, |
|
"logps/rejected": -809.5506591796875, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3041425049304962, |
|
"rewards/margins": 4.856934547424316, |
|
"rewards/rejected": -5.161076545715332, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 23.671408143492002, |
|
"learning_rate": 1.1599808329836174e-08, |
|
"logits/chosen": 0.07416832447052002, |
|
"logits/rejected": 0.9995840787887573, |
|
"logps/chosen": -275.8365783691406, |
|
"logps/rejected": -751.8892822265625, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.24566996097564697, |
|
"rewards/margins": 4.163148403167725, |
|
"rewards/rejected": -4.408819198608398, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 27.856145151796408, |
|
"learning_rate": 3.4701487751534475e-09, |
|
"logits/chosen": -0.11121706664562225, |
|
"logits/rejected": 0.7320507168769836, |
|
"logps/chosen": -341.3576965332031, |
|
"logps/rejected": -890.82421875, |
|
"loss": 0.215, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.3052319586277008, |
|
"rewards/margins": 5.738637924194336, |
|
"rewards/rejected": -6.043869972229004, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 25.41636406288576, |
|
"learning_rate": 9.661062636148743e-11, |
|
"logits/chosen": -0.6346914172172546, |
|
"logits/rejected": 0.8026138544082642, |
|
"logps/chosen": -268.1880187988281, |
|
"logps/rejected": -801.5145263671875, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3312497138977051, |
|
"rewards/margins": 3.949364185333252, |
|
"rewards/rejected": -4.280614376068115, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 252, |
|
"total_flos": 0.0, |
|
"train_loss": 0.35703677506673903, |
|
"train_runtime": 4562.5722, |
|
"train_samples_per_second": 3.533, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 252, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|