|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2089.5337638761494, |
|
"learning_rate": 2.5e-09, |
|
"logits/chosen": -4.623842239379883, |
|
"logits/rejected": -4.85917854309082, |
|
"logps/chosen": -239.31422424316406, |
|
"logps/rejected": -207.56365966796875, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2112.4857671472687, |
|
"learning_rate": 2.5e-08, |
|
"logits/chosen": -4.333562850952148, |
|
"logits/rejected": -4.643319129943848, |
|
"logps/chosen": -265.2981262207031, |
|
"logps/rejected": -215.68804931640625, |
|
"loss": 0.7355, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.09561138600111008, |
|
"rewards/margins": -0.10567205399274826, |
|
"rewards/rejected": 0.010060659609735012, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1939.2525079641944, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -4.508406162261963, |
|
"logits/rejected": -4.7436203956604, |
|
"logps/chosen": -267.76934814453125, |
|
"logps/rejected": -216.88119506835938, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08529385179281235, |
|
"rewards/margins": 0.22122922539710999, |
|
"rewards/rejected": -0.13593538105487823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1485.5526937989268, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -4.591097354888916, |
|
"logits/rejected": -4.771042823791504, |
|
"logps/chosen": -257.5138244628906, |
|
"logps/rejected": -215.06607055664062, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5094950795173645, |
|
"rewards/margins": 0.7761520147323608, |
|
"rewards/rejected": -0.2666569650173187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1059.7800988486467, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -4.61653995513916, |
|
"logits/rejected": -4.705571174621582, |
|
"logps/chosen": -250.05783081054688, |
|
"logps/rejected": -220.47665405273438, |
|
"loss": 0.3139, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.7706722021102905, |
|
"rewards/margins": 2.1734442710876465, |
|
"rewards/rejected": -0.4027720093727112, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 837.9194721075112, |
|
"learning_rate": 9.979985922607475e-08, |
|
"logits/chosen": -4.497745513916016, |
|
"logits/rejected": -4.6963934898376465, |
|
"logps/chosen": -266.4471740722656, |
|
"logps/rejected": -227.05908203125, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.7611026763916016, |
|
"rewards/margins": 3.3548762798309326, |
|
"rewards/rejected": -0.5937734246253967, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 912.9246800740217, |
|
"learning_rate": 9.92010391574745e-08, |
|
"logits/chosen": -4.585003852844238, |
|
"logits/rejected": -4.705927848815918, |
|
"logps/chosen": -235.20071411132812, |
|
"logps/rejected": -217.2942352294922, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 2.9608712196350098, |
|
"rewards/margins": 4.097281455993652, |
|
"rewards/rejected": -1.1364095211029053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 634.4685088072516, |
|
"learning_rate": 9.820833372667812e-08, |
|
"logits/chosen": -4.462503910064697, |
|
"logits/rejected": -4.6857805252075195, |
|
"logps/chosen": -246.69186401367188, |
|
"logps/rejected": -220.57937622070312, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 3.2250447273254395, |
|
"rewards/margins": 4.633510112762451, |
|
"rewards/rejected": -1.4084659814834595, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 468.8604524803785, |
|
"learning_rate": 9.682969016701356e-08, |
|
"logits/chosen": -4.449667453765869, |
|
"logits/rejected": -4.664923667907715, |
|
"logps/chosen": -253.8452606201172, |
|
"logps/rejected": -233.0582733154297, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 3.6873557567596436, |
|
"rewards/margins": 5.057134628295898, |
|
"rewards/rejected": -1.3697788715362549, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 958.5162002808887, |
|
"learning_rate": 9.507614539004081e-08, |
|
"logits/chosen": -4.535862445831299, |
|
"logits/rejected": -4.733909606933594, |
|
"logps/chosen": -243.66317749023438, |
|
"logps/rejected": -206.82388305664062, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 3.7747840881347656, |
|
"rewards/margins": 5.771730899810791, |
|
"rewards/rejected": -1.9969465732574463, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 928.8107393024507, |
|
"learning_rate": 9.296173762811083e-08, |
|
"logits/chosen": -4.406120777130127, |
|
"logits/rejected": -4.672289848327637, |
|
"logps/chosen": -248.62539672851562, |
|
"logps/rejected": -231.67758178710938, |
|
"loss": 0.1833, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 4.046411037445068, |
|
"rewards/margins": 6.330681324005127, |
|
"rewards/rejected": -2.2842705249786377, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -4.5091094970703125, |
|
"eval_logits/rejected": -4.724847316741943, |
|
"eval_logps/chosen": -389.6205749511719, |
|
"eval_logps/rejected": -515.4835205078125, |
|
"eval_loss": 1.8368816375732422, |
|
"eval_rewards/accuracies": 0.375, |
|
"eval_rewards/chosen": 0.4269474744796753, |
|
"eval_rewards/margins": -1.0251328945159912, |
|
"eval_rewards/rejected": 1.452080249786377, |
|
"eval_runtime": 97.8781, |
|
"eval_samples_per_second": 20.434, |
|
"eval_steps_per_second": 0.327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 759.2228167566217, |
|
"learning_rate": 9.050339404945832e-08, |
|
"logits/chosen": -4.45731258392334, |
|
"logits/rejected": -4.700920581817627, |
|
"logps/chosen": -240.77047729492188, |
|
"logps/rejected": -220.7100830078125, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.207625865936279, |
|
"rewards/margins": 6.219720363616943, |
|
"rewards/rejected": -2.012094259262085, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 615.4147404438793, |
|
"learning_rate": 8.77207952455395e-08, |
|
"logits/chosen": -4.41110897064209, |
|
"logits/rejected": -4.632037162780762, |
|
"logps/chosen": -266.83837890625, |
|
"logps/rejected": -232.83670043945312, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 4.575605869293213, |
|
"rewards/margins": 6.689634799957275, |
|
"rewards/rejected": -2.1140289306640625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1154.0005388666061, |
|
"learning_rate": 8.463621767547997e-08, |
|
"logits/chosen": -4.474618434906006, |
|
"logits/rejected": -4.724778652191162, |
|
"logps/chosen": -250.192626953125, |
|
"logps/rejected": -220.4983673095703, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 4.125626564025879, |
|
"rewards/margins": 6.710474967956543, |
|
"rewards/rejected": -2.5848488807678223, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 510.3907389648352, |
|
"learning_rate": 8.127435532896387e-08, |
|
"logits/chosen": -4.497905254364014, |
|
"logits/rejected": -4.757509708404541, |
|
"logps/chosen": -276.1819763183594, |
|
"logps/rejected": -237.9337921142578, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.006547451019287, |
|
"rewards/margins": 6.8867011070251465, |
|
"rewards/rejected": -2.880154848098755, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 616.3949177365913, |
|
"learning_rate": 7.766212203526569e-08, |
|
"logits/chosen": -4.483530521392822, |
|
"logits/rejected": -4.700650691986084, |
|
"logps/chosen": -244.07785034179688, |
|
"logps/rejected": -224.0546417236328, |
|
"loss": 0.1668, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 4.170205116271973, |
|
"rewards/margins": 6.6378936767578125, |
|
"rewards/rejected": -2.4676883220672607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 759.2665515018776, |
|
"learning_rate": 7.382843600106538e-08, |
|
"logits/chosen": -4.538361072540283, |
|
"logits/rejected": -4.685894966125488, |
|
"logps/chosen": -243.0140380859375, |
|
"logps/rejected": -220.0860137939453, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 4.22122859954834, |
|
"rewards/margins": 6.459697723388672, |
|
"rewards/rejected": -2.238469362258911, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 688.1440407430587, |
|
"learning_rate": 6.980398830195784e-08, |
|
"logits/chosen": -4.427027702331543, |
|
"logits/rejected": -4.675489902496338, |
|
"logps/chosen": -251.1200408935547, |
|
"logps/rejected": -225.5527801513672, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.977096080780029, |
|
"rewards/margins": 7.851990699768066, |
|
"rewards/rejected": -2.874894618988037, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 572.2642343737211, |
|
"learning_rate": 6.562099718102787e-08, |
|
"logits/chosen": -4.530760765075684, |
|
"logits/rejected": -4.731973171234131, |
|
"logps/chosen": -228.52304077148438, |
|
"logps/rejected": -202.01510620117188, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.1703057289123535, |
|
"rewards/margins": 7.167737007141113, |
|
"rewards/rejected": -2.9974308013916016, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 887.5255514170451, |
|
"learning_rate": 6.131295012148612e-08, |
|
"logits/chosen": -4.499785423278809, |
|
"logits/rejected": -4.621634006500244, |
|
"logps/chosen": -251.9990692138672, |
|
"logps/rejected": -240.3909149169922, |
|
"loss": 0.1634, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 4.446890830993652, |
|
"rewards/margins": 7.0593156814575195, |
|
"rewards/rejected": -2.6124250888824463, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 622.6699519046258, |
|
"learning_rate": 5.691433575823665e-08, |
|
"logits/chosen": -4.48135232925415, |
|
"logits/rejected": -4.617772102355957, |
|
"logps/chosen": -243.34725952148438, |
|
"logps/rejected": -220.18392944335938, |
|
"loss": 0.1786, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 4.395993709564209, |
|
"rewards/margins": 6.814687252044678, |
|
"rewards/rejected": -2.4186930656433105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -4.4697723388671875, |
|
"eval_logits/rejected": -4.687928199768066, |
|
"eval_logps/chosen": -389.2646179199219, |
|
"eval_logps/rejected": -514.896484375, |
|
"eval_loss": 2.016343355178833, |
|
"eval_rewards/accuracies": 0.375, |
|
"eval_rewards/chosen": 0.6049206256866455, |
|
"eval_rewards/margins": -1.140692114830017, |
|
"eval_rewards/rejected": 1.745612621307373, |
|
"eval_runtime": 97.8297, |
|
"eval_samples_per_second": 20.444, |
|
"eval_steps_per_second": 0.327, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 473.28271440178054, |
|
"learning_rate": 5.2460367774593905e-08, |
|
"logits/chosen": -4.541897773742676, |
|
"logits/rejected": -4.740262031555176, |
|
"logps/chosen": -255.6215362548828, |
|
"logps/rejected": -234.78518676757812, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 5.131775856018066, |
|
"rewards/margins": 8.201360702514648, |
|
"rewards/rejected": -3.069584846496582, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 670.7484200931372, |
|
"learning_rate": 4.798670299452925e-08, |
|
"logits/chosen": -4.39837646484375, |
|
"logits/rejected": -4.688643455505371, |
|
"logps/chosen": -253.91787719726562, |
|
"logps/rejected": -231.707275390625, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 4.446724891662598, |
|
"rewards/margins": 7.963796138763428, |
|
"rewards/rejected": -3.517070770263672, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 763.2480410824999, |
|
"learning_rate": 4.3529155927297226e-08, |
|
"logits/chosen": -4.47940731048584, |
|
"logits/rejected": -4.748034954071045, |
|
"logps/chosen": -252.20700073242188, |
|
"logps/rejected": -230.70425415039062, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.38104248046875, |
|
"rewards/margins": 7.8776116371154785, |
|
"rewards/rejected": -3.4965691566467285, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 547.6628902362396, |
|
"learning_rate": 3.9123412049691636e-08, |
|
"logits/chosen": -4.450512886047363, |
|
"logits/rejected": -4.651386260986328, |
|
"logps/chosen": -263.7304382324219, |
|
"logps/rejected": -227.78604125976562, |
|
"loss": 0.1511, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 4.546363353729248, |
|
"rewards/margins": 7.972568511962891, |
|
"rewards/rejected": -3.4262046813964844, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 706.6026662780071, |
|
"learning_rate": 3.480474212128766e-08, |
|
"logits/chosen": -4.571944236755371, |
|
"logits/rejected": -4.786678791046143, |
|
"logps/chosen": -240.4440155029297, |
|
"logps/rejected": -212.46694946289062, |
|
"loss": 0.1403, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.128727912902832, |
|
"rewards/margins": 6.557607173919678, |
|
"rewards/rejected": -2.428879976272583, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 957.1848027668926, |
|
"learning_rate": 3.060771981975726e-08, |
|
"logits/chosen": -4.445496082305908, |
|
"logits/rejected": -4.674472808837891, |
|
"logps/chosen": -244.96701049804688, |
|
"logps/rejected": -227.3423614501953, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 4.540780067443848, |
|
"rewards/margins": 8.337722778320312, |
|
"rewards/rejected": -3.7969424724578857, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1053.6903730937584, |
|
"learning_rate": 2.6565944956764818e-08, |
|
"logits/chosen": -4.53262996673584, |
|
"logits/rejected": -4.71115255355835, |
|
"logps/chosen": -252.1263427734375, |
|
"logps/rejected": -221.7955322265625, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.453462600708008, |
|
"rewards/margins": 7.838715553283691, |
|
"rewards/rejected": -3.385251998901367, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 661.3688193511013, |
|
"learning_rate": 2.2711774490274766e-08, |
|
"logits/chosen": -4.489356994628906, |
|
"logits/rejected": -4.654987812042236, |
|
"logps/chosen": -254.680908203125, |
|
"logps/rejected": -248.8947296142578, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.912972450256348, |
|
"rewards/margins": 7.8906402587890625, |
|
"rewards/rejected": -2.9776668548583984, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1908.0202284500367, |
|
"learning_rate": 1.9076063486687256e-08, |
|
"logits/chosen": -4.361441135406494, |
|
"logits/rejected": -4.647955417633057, |
|
"logps/chosen": -262.6406555175781, |
|
"logps/rejected": -221.6370086669922, |
|
"loss": 0.1481, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 4.983874320983887, |
|
"rewards/margins": 7.9876885414123535, |
|
"rewards/rejected": -3.0038130283355713, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 734.7948711655655, |
|
"learning_rate": 1.5687918106563324e-08, |
|
"logits/chosen": -4.47251033782959, |
|
"logits/rejected": -4.634402275085449, |
|
"logps/chosen": -243.4433135986328, |
|
"logps/rejected": -222.4509735107422, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 4.982421398162842, |
|
"rewards/margins": 8.225171089172363, |
|
"rewards/rejected": -3.2427496910095215, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -4.4838151931762695, |
|
"eval_logits/rejected": -4.69987678527832, |
|
"eval_logps/chosen": -390.0736999511719, |
|
"eval_logps/rejected": -516.0419921875, |
|
"eval_loss": 1.9448436498641968, |
|
"eval_rewards/accuracies": 0.3984375, |
|
"eval_rewards/chosen": 0.2003953605890274, |
|
"eval_rewards/margins": -0.972442626953125, |
|
"eval_rewards/rejected": 1.1728378534317017, |
|
"eval_runtime": 97.9077, |
|
"eval_samples_per_second": 20.427, |
|
"eval_steps_per_second": 0.327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 701.7075104371141, |
|
"learning_rate": 1.257446259144494e-08, |
|
"logits/chosen": -4.397843360900879, |
|
"logits/rejected": -4.662208557128906, |
|
"logps/chosen": -251.11611938476562, |
|
"logps/rejected": -229.4883270263672, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 5.011745929718018, |
|
"rewards/margins": 8.656941413879395, |
|
"rewards/rejected": -3.6451950073242188, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1065.7098800029996, |
|
"learning_rate": 9.760622117187234e-09, |
|
"logits/chosen": -4.4547929763793945, |
|
"logits/rejected": -4.7404327392578125, |
|
"logps/chosen": -235.986083984375, |
|
"logps/rejected": -213.1405029296875, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 4.589102745056152, |
|
"rewards/margins": 7.9336113929748535, |
|
"rewards/rejected": -3.344507932662964, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 766.1086061468453, |
|
"learning_rate": 7.2689232521989885e-09, |
|
"logits/chosen": -4.407891750335693, |
|
"logits/rejected": -4.665772914886475, |
|
"logps/chosen": -258.3376159667969, |
|
"logps/rejected": -240.0522003173828, |
|
"loss": 0.1405, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 4.90563440322876, |
|
"rewards/margins": 8.58189868927002, |
|
"rewards/rejected": -3.6762642860412598, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 626.4348398301977, |
|
"learning_rate": 5.119313618049309e-09, |
|
"logits/chosen": -4.429708003997803, |
|
"logits/rejected": -4.715014457702637, |
|
"logps/chosen": -263.54986572265625, |
|
"logps/rejected": -213.69723510742188, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 5.298083305358887, |
|
"rewards/margins": 8.755678176879883, |
|
"rewards/rejected": -3.457595109939575, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 564.1193509438065, |
|
"learning_rate": 3.3290021961708158e-09, |
|
"logits/chosen": -4.445944309234619, |
|
"logits/rejected": -4.576190948486328, |
|
"logps/chosen": -247.17697143554688, |
|
"logps/rejected": -233.48477172851562, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 4.14711332321167, |
|
"rewards/margins": 7.0045037269592285, |
|
"rewards/rejected": -2.8573899269104004, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 651.5911217726903, |
|
"learning_rate": 1.9123215591052013e-09, |
|
"logits/chosen": -4.436100482940674, |
|
"logits/rejected": -4.62412166595459, |
|
"logps/chosen": -253.4558563232422, |
|
"logps/rejected": -233.94869995117188, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 4.089536190032959, |
|
"rewards/margins": 7.332627296447754, |
|
"rewards/rejected": -3.243091583251953, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 610.6809759122384, |
|
"learning_rate": 8.806131292167618e-10, |
|
"logits/chosen": -4.4610724449157715, |
|
"logits/rejected": -4.592678070068359, |
|
"logps/chosen": -247.8229217529297, |
|
"logps/rejected": -242.95114135742188, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 4.499147415161133, |
|
"rewards/margins": 7.5511980056762695, |
|
"rewards/rejected": -3.052050828933716, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 661.3141861471707, |
|
"learning_rate": 2.4213638345040867e-10, |
|
"logits/chosen": -4.557965278625488, |
|
"logits/rejected": -4.776811122894287, |
|
"logps/chosen": -252.97561645507812, |
|
"logps/rejected": -227.4269561767578, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 4.553546905517578, |
|
"rewards/margins": 8.004728317260742, |
|
"rewards/rejected": -3.4511806964874268, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1178.6266155115975, |
|
"learning_rate": 2.0027310073833516e-12, |
|
"logits/chosen": -4.544768810272217, |
|
"logits/rejected": -4.75381326675415, |
|
"logps/chosen": -250.1166534423828, |
|
"logps/rejected": -226.92916870117188, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 4.777965068817139, |
|
"rewards/margins": 7.9850053787231445, |
|
"rewards/rejected": -3.2070395946502686, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 391, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20245660769055263, |
|
"train_runtime": 6146.5091, |
|
"train_samples_per_second": 8.135, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 391, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|