|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3067.8628948133914, |
|
"learning_rate": 4.9998992904271775e-08, |
|
"logits/chosen": -4.185730934143066, |
|
"logits/rejected": -4.509836196899414, |
|
"logps/chosen": -274.000732421875, |
|
"logps/rejected": -205.8054962158203, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3330.3974170986107, |
|
"learning_rate": 4.9899357349880975e-08, |
|
"logits/chosen": -4.211880207061768, |
|
"logits/rejected": -4.48573637008667, |
|
"logps/chosen": -318.31072998046875, |
|
"logps/rejected": -257.18267822265625, |
|
"loss": 0.7459, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.200405091047287, |
|
"rewards/margins": 0.10155472159385681, |
|
"rewards/rejected": 0.09885036945343018, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2932.727170813642, |
|
"learning_rate": 4.959823971496574e-08, |
|
"logits/chosen": -4.2464704513549805, |
|
"logits/rejected": -4.50115966796875, |
|
"logps/chosen": -304.53350830078125, |
|
"logps/rejected": -244.1282501220703, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.7030802965164185, |
|
"rewards/margins": 0.6052380800247192, |
|
"rewards/rejected": 0.09784229844808578, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2159.097276891197, |
|
"learning_rate": 4.9099071517396326e-08, |
|
"logits/chosen": -4.3018364906311035, |
|
"logits/rejected": -4.5636820793151855, |
|
"logps/chosen": -305.11822509765625, |
|
"logps/rejected": -258.89215087890625, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.3964869976043701, |
|
"rewards/margins": 0.9537334442138672, |
|
"rewards/rejected": 0.44275355339050293, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2233.10446662558, |
|
"learning_rate": 4.8405871765993426e-08, |
|
"logits/chosen": -4.304145812988281, |
|
"logits/rejected": -4.571420192718506, |
|
"logps/chosen": -293.4151916503906, |
|
"logps/rejected": -234.4054412841797, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 2.119215488433838, |
|
"rewards/margins": 1.3193193674087524, |
|
"rewards/rejected": 0.7998961806297302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1863.9092640792912, |
|
"learning_rate": 4.7524221697560474e-08, |
|
"logits/chosen": -4.298985481262207, |
|
"logits/rejected": -4.545313835144043, |
|
"logps/chosen": -299.71026611328125, |
|
"logps/rejected": -252.57339477539062, |
|
"loss": 0.4054, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 3.015381336212158, |
|
"rewards/margins": 1.8283360004425049, |
|
"rewards/rejected": 1.1870452165603638, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1861.0742759245438, |
|
"learning_rate": 4.646121984004665e-08, |
|
"logits/chosen": -4.3018717765808105, |
|
"logits/rejected": -4.5299859046936035, |
|
"logps/chosen": -308.25457763671875, |
|
"logps/rejected": -261.1996154785156, |
|
"loss": 0.3815, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 3.097055673599243, |
|
"rewards/margins": 1.6846046447753906, |
|
"rewards/rejected": 1.412451148033142, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2083.1341477087894, |
|
"learning_rate": 4.522542485937369e-08, |
|
"logits/chosen": -4.417206764221191, |
|
"logits/rejected": -4.548245429992676, |
|
"logps/chosen": -285.4747009277344, |
|
"logps/rejected": -236.24136352539062, |
|
"loss": 0.3773, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 3.4294419288635254, |
|
"rewards/margins": 2.4485509395599365, |
|
"rewards/rejected": 0.9808910489082336, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1999.1118673285923, |
|
"learning_rate": 4.3826786650090273e-08, |
|
"logits/chosen": -4.271725177764893, |
|
"logits/rejected": -4.525103569030762, |
|
"logps/chosen": -292.2157897949219, |
|
"logps/rejected": -239.5623321533203, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.471898317337036, |
|
"rewards/margins": 2.5827386379241943, |
|
"rewards/rejected": 0.8891592025756836, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1543.0151245523064, |
|
"learning_rate": 4.2276566224671614e-08, |
|
"logits/chosen": -4.196888446807861, |
|
"logits/rejected": -4.430451393127441, |
|
"logps/chosen": -303.9364929199219, |
|
"logps/rejected": -258.19708251953125, |
|
"loss": 0.37, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 3.6267776489257812, |
|
"rewards/margins": 2.5005435943603516, |
|
"rewards/rejected": 1.1262344121932983, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2558.2358091969077, |
|
"learning_rate": 4.058724504646834e-08, |
|
"logits/chosen": -4.298203468322754, |
|
"logits/rejected": -4.51765251159668, |
|
"logps/chosen": -291.99151611328125, |
|
"logps/rejected": -240.97909545898438, |
|
"loss": 0.3573, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 3.8364264965057373, |
|
"rewards/margins": 2.6143250465393066, |
|
"rewards/rejected": 1.2221016883850098, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -3.2259409427642822, |
|
"eval_logits/rejected": -3.2259409427642822, |
|
"eval_logps/chosen": -157.8415985107422, |
|
"eval_logps/rejected": -157.8415985107422, |
|
"eval_loss": 0.6931471824645996, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -2.2645912170410156, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -2.2645912170410156, |
|
"eval_runtime": 1.5044, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.665, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2075.8470964199623, |
|
"learning_rate": 3.8772424536302564e-08, |
|
"logits/chosen": -4.3160247802734375, |
|
"logits/rejected": -4.557186126708984, |
|
"logps/chosen": -299.556640625, |
|
"logps/rejected": -250.2120361328125, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 3.8744053840637207, |
|
"rewards/margins": 2.781764268875122, |
|
"rewards/rejected": 1.0926413536071777, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2129.2578794603846, |
|
"learning_rate": 3.6846716561824964e-08, |
|
"logits/chosen": -4.358242988586426, |
|
"logits/rejected": -4.6036834716796875, |
|
"logps/chosen": -288.9602966308594, |
|
"logps/rejected": -237.98257446289062, |
|
"loss": 0.346, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 3.973881959915161, |
|
"rewards/margins": 2.8389506340026855, |
|
"rewards/rejected": 1.1349313259124756, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1374.3088736284383, |
|
"learning_rate": 3.482562579134809e-08, |
|
"logits/chosen": -4.360684871673584, |
|
"logits/rejected": -4.608490467071533, |
|
"logps/chosen": -278.861572265625, |
|
"logps/rejected": -218.7367706298828, |
|
"loss": 0.3426, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 3.8384926319122314, |
|
"rewards/margins": 2.634833812713623, |
|
"rewards/rejected": 1.2036586999893188, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1741.7465783603645, |
|
"learning_rate": 3.272542485937369e-08, |
|
"logits/chosen": -4.276978969573975, |
|
"logits/rejected": -4.593733787536621, |
|
"logps/chosen": -296.0984191894531, |
|
"logps/rejected": -240.01248168945312, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.785256862640381, |
|
"rewards/margins": 2.9941701889038086, |
|
"rewards/rejected": 0.79108726978302, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1837.7137132104272, |
|
"learning_rate": 3.056302334890786e-08, |
|
"logits/chosen": -4.245262622833252, |
|
"logits/rejected": -4.510401725769043, |
|
"logps/chosen": -295.3984680175781, |
|
"logps/rejected": -250.73580932617188, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 4.011710166931152, |
|
"rewards/margins": 3.0462794303894043, |
|
"rewards/rejected": 0.9654304385185242, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1744.335126050233, |
|
"learning_rate": 2.8355831645441387e-08, |
|
"logits/chosen": -4.277425765991211, |
|
"logits/rejected": -4.570274829864502, |
|
"logps/chosen": -296.66839599609375, |
|
"logps/rejected": -235.6475372314453, |
|
"loss": 0.36, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 4.259499549865723, |
|
"rewards/margins": 3.127065658569336, |
|
"rewards/rejected": 1.1324341297149658, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1875.319827037545, |
|
"learning_rate": 2.6121620758762875e-08, |
|
"logits/chosen": -4.229983329772949, |
|
"logits/rejected": -4.467092990875244, |
|
"logps/chosen": -296.31683349609375, |
|
"logps/rejected": -241.3401336669922, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.343829154968262, |
|
"rewards/margins": 3.233609437942505, |
|
"rewards/rejected": 1.1102204322814941, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2082.5003671787076, |
|
"learning_rate": 2.3878379241237133e-08, |
|
"logits/chosen": -4.364750862121582, |
|
"logits/rejected": -4.597868919372559, |
|
"logps/chosen": -285.72869873046875, |
|
"logps/rejected": -241.40652465820312, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 4.1484293937683105, |
|
"rewards/margins": 3.0738511085510254, |
|
"rewards/rejected": 1.074578046798706, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1597.9774938638957, |
|
"learning_rate": 2.164416835455862e-08, |
|
"logits/chosen": -4.3281121253967285, |
|
"logits/rejected": -4.498069763183594, |
|
"logps/chosen": -308.14776611328125, |
|
"logps/rejected": -257.7415466308594, |
|
"loss": 0.2852, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 4.146700859069824, |
|
"rewards/margins": 3.202249526977539, |
|
"rewards/rejected": 0.9444509744644165, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1601.8580723204816, |
|
"learning_rate": 1.943697665109214e-08, |
|
"logits/chosen": -4.358348846435547, |
|
"logits/rejected": -4.601215839385986, |
|
"logps/chosen": -292.93658447265625, |
|
"logps/rejected": -249.59469604492188, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 4.194998741149902, |
|
"rewards/margins": 2.974621534347534, |
|
"rewards/rejected": 1.2203772068023682, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -3.2195205688476562, |
|
"eval_logits/rejected": -3.2195205688476562, |
|
"eval_logps/chosen": -157.37933349609375, |
|
"eval_logps/rejected": -157.37933349609375, |
|
"eval_loss": 0.6931471824645996, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -1.8023262023925781, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -1.8023262023925781, |
|
"eval_runtime": 1.4741, |
|
"eval_samples_per_second": 0.678, |
|
"eval_steps_per_second": 0.678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1818.1510653253358, |
|
"learning_rate": 1.7274575140626317e-08, |
|
"logits/chosen": -4.293700218200684, |
|
"logits/rejected": -4.587708473205566, |
|
"logps/chosen": -306.94647216796875, |
|
"logps/rejected": -254.83981323242188, |
|
"loss": 0.3169, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 4.274092674255371, |
|
"rewards/margins": 3.556690216064453, |
|
"rewards/rejected": 0.7174022793769836, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2084.9707047014217, |
|
"learning_rate": 1.517437420865191e-08, |
|
"logits/chosen": -4.2438554763793945, |
|
"logits/rejected": -4.590119361877441, |
|
"logps/chosen": -297.3277587890625, |
|
"logps/rejected": -225.09414672851562, |
|
"loss": 0.3117, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 4.186089515686035, |
|
"rewards/margins": 3.6873459815979004, |
|
"rewards/rejected": 0.4987434446811676, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1793.5243127965375, |
|
"learning_rate": 1.3153283438175034e-08, |
|
"logits/chosen": -4.3719801902771, |
|
"logits/rejected": -4.563234806060791, |
|
"logps/chosen": -281.373779296875, |
|
"logps/rejected": -226.25576782226562, |
|
"loss": 0.2879, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 3.584429979324341, |
|
"rewards/margins": 2.8807406425476074, |
|
"rewards/rejected": 0.7036892771720886, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1621.528952660571, |
|
"learning_rate": 1.1227575463697438e-08, |
|
"logits/chosen": -4.3936567306518555, |
|
"logits/rejected": -4.714280128479004, |
|
"logps/chosen": -258.6517639160156, |
|
"logps/rejected": -215.28759765625, |
|
"loss": 0.3042, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 3.862626552581787, |
|
"rewards/margins": 3.0625431537628174, |
|
"rewards/rejected": 0.8000835180282593, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2231.5682374793205, |
|
"learning_rate": 9.412754953531663e-09, |
|
"logits/chosen": -4.34213924407959, |
|
"logits/rejected": -4.6162428855896, |
|
"logps/chosen": -278.9085388183594, |
|
"logps/rejected": -232.6056365966797, |
|
"loss": 0.3109, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 3.6670002937316895, |
|
"rewards/margins": 2.8657121658325195, |
|
"rewards/rejected": 0.8012881278991699, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1668.5476234310504, |
|
"learning_rate": 7.723433775328384e-09, |
|
"logits/chosen": -4.386145114898682, |
|
"logits/rejected": -4.632050037384033, |
|
"logps/chosen": -271.8704833984375, |
|
"logps/rejected": -240.48257446289062, |
|
"loss": 0.3039, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 3.5878806114196777, |
|
"rewards/margins": 2.9320101737976074, |
|
"rewards/rejected": 0.6558703184127808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1726.7631750123023, |
|
"learning_rate": 6.173213349909728e-09, |
|
"logits/chosen": -4.517698764801025, |
|
"logits/rejected": -4.687317848205566, |
|
"logps/chosen": -273.4754943847656, |
|
"logps/rejected": -228.2833709716797, |
|
"loss": 0.3356, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 3.6022744178771973, |
|
"rewards/margins": 2.6589503288269043, |
|
"rewards/rejected": 0.9433239698410034, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1197.1122441391342, |
|
"learning_rate": 4.7745751406263165e-09, |
|
"logits/chosen": -4.299304008483887, |
|
"logits/rejected": -4.589285850524902, |
|
"logps/chosen": -274.9901123046875, |
|
"logps/rejected": -229.76449584960938, |
|
"loss": 0.2791, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 4.094995021820068, |
|
"rewards/margins": 3.0975327491760254, |
|
"rewards/rejected": 0.997462272644043, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2356.4193384705377, |
|
"learning_rate": 3.5387801599533474e-09, |
|
"logits/chosen": -4.320891857147217, |
|
"logits/rejected": -4.508334636688232, |
|
"logps/chosen": -282.45013427734375, |
|
"logps/rejected": -236.50424194335938, |
|
"loss": 0.3316, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 4.115664958953857, |
|
"rewards/margins": 3.2731566429138184, |
|
"rewards/rejected": 0.8425084948539734, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1485.14332328563, |
|
"learning_rate": 2.475778302439524e-09, |
|
"logits/chosen": -4.295617580413818, |
|
"logits/rejected": -4.5400543212890625, |
|
"logps/chosen": -298.4153137207031, |
|
"logps/rejected": -240.1478271484375, |
|
"loss": 0.3594, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 4.416214942932129, |
|
"rewards/margins": 3.1984994411468506, |
|
"rewards/rejected": 1.2177152633666992, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -3.220174551010132, |
|
"eval_logits/rejected": -3.220174551010132, |
|
"eval_logps/chosen": -157.367431640625, |
|
"eval_logps/rejected": -157.367431640625, |
|
"eval_loss": 0.6931471824645996, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -1.7904319763183594, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -1.7904319763183594, |
|
"eval_runtime": 1.47, |
|
"eval_samples_per_second": 0.68, |
|
"eval_steps_per_second": 0.68, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2625.0873445651387, |
|
"learning_rate": 1.5941282340065698e-09, |
|
"logits/chosen": -4.43851900100708, |
|
"logits/rejected": -4.580752372741699, |
|
"logps/chosen": -262.37445068359375, |
|
"logps/rejected": -226.46572875976562, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.741738796234131, |
|
"rewards/margins": 2.9144444465637207, |
|
"rewards/rejected": 0.8272944688796997, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1589.6112135444553, |
|
"learning_rate": 9.009284826036689e-10, |
|
"logits/chosen": -4.277141094207764, |
|
"logits/rejected": -4.5314412117004395, |
|
"logps/chosen": -292.65875244140625, |
|
"logps/rejected": -243.8509063720703, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.2250542640686035, |
|
"rewards/margins": 3.1320207118988037, |
|
"rewards/rejected": 1.093034029006958, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2192.855370501752, |
|
"learning_rate": 4.017602850342583e-10, |
|
"logits/chosen": -4.330888271331787, |
|
"logits/rejected": -4.536975383758545, |
|
"logps/chosen": -305.5764465332031, |
|
"logps/rejected": -252.0467529296875, |
|
"loss": 0.3203, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.009243488311768, |
|
"rewards/margins": 3.056270122528076, |
|
"rewards/rejected": 0.9529730677604675, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2158.7231383937637, |
|
"learning_rate": 1.0064265011902328e-10, |
|
"logits/chosen": -4.319821357727051, |
|
"logits/rejected": -4.614516735076904, |
|
"logps/chosen": -285.194091796875, |
|
"logps/rejected": -227.5124053955078, |
|
"loss": 0.3239, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 3.751185178756714, |
|
"rewards/margins": 2.758882522583008, |
|
"rewards/rejected": 0.9923027753829956, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1350.3403367664616, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -4.290497779846191, |
|
"logits/rejected": -4.4949870109558105, |
|
"logps/chosen": -291.93768310546875, |
|
"logps/rejected": -244.3520965576172, |
|
"loss": 0.3142, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 4.083470344543457, |
|
"rewards/margins": 3.2036800384521484, |
|
"rewards/rejected": 0.8797903060913086, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 350, |
|
"total_flos": 0.0, |
|
"train_loss": 0.36299856867109026, |
|
"train_runtime": 5294.123, |
|
"train_samples_per_second": 8.454, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|