|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 504, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02976190476190476, |
|
"grad_norm": 1643.52392578125, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": -0.34639421105384827, |
|
"log_odds_ratio": -1.0579421520233154, |
|
"logits/chosen": 125.67509460449219, |
|
"logits/rejected": 180.79092407226562, |
|
"logps/chosen": -15.30119514465332, |
|
"logps/rejected": -14.954809188842773, |
|
"loss": 15.2275, |
|
"nll_loss": 14.854708671569824, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -7.65059757232666, |
|
"rewards/margins": -0.1731930673122406, |
|
"rewards/rejected": -7.477404594421387, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05952380952380952, |
|
"grad_norm": 859.1522827148438, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": -0.04612647369503975, |
|
"log_odds_ratio": -0.8707455396652222, |
|
"logits/chosen": 226.11355590820312, |
|
"logits/rejected": 262.4227600097656, |
|
"logps/chosen": -10.240728378295898, |
|
"logps/rejected": -10.194613456726074, |
|
"loss": 10.6297, |
|
"nll_loss": 10.136636734008789, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -5.120364189147949, |
|
"rewards/margins": -0.023057078942656517, |
|
"rewards/rejected": -5.097306728363037, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 192.85568237304688, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": -0.1222303956747055, |
|
"log_odds_ratio": -0.8104267120361328, |
|
"logits/chosen": 293.97882080078125, |
|
"logits/rejected": 282.1849670410156, |
|
"logps/chosen": -5.908555507659912, |
|
"logps/rejected": -5.787174224853516, |
|
"loss": 6.2671, |
|
"nll_loss": 5.8525919914245605, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -2.954277753829956, |
|
"rewards/margins": -0.06069115549325943, |
|
"rewards/rejected": -2.893587112426758, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 181.04627990722656, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.1651725322008133, |
|
"log_odds_ratio": -0.8192933797836304, |
|
"logits/chosen": 280.179931640625, |
|
"logits/rejected": 263.22296142578125, |
|
"logps/chosen": -3.191753625869751, |
|
"logps/rejected": -3.352200984954834, |
|
"loss": 3.8066, |
|
"nll_loss": 3.3215103149414062, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.5958768129348755, |
|
"rewards/margins": 0.08022388815879822, |
|
"rewards/rejected": -1.676100492477417, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1488095238095238, |
|
"grad_norm": 124.88711547851562, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.21610824763774872, |
|
"log_odds_ratio": -0.6467095613479614, |
|
"logits/chosen": 337.0777587890625, |
|
"logits/rejected": 361.3624267578125, |
|
"logps/chosen": -2.110491991043091, |
|
"logps/rejected": -2.2703230381011963, |
|
"loss": 2.9167, |
|
"nll_loss": 2.676313638687134, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.0552459955215454, |
|
"rewards/margins": 0.07991557568311691, |
|
"rewards/rejected": -1.1351615190505981, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 120.593994140625, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.2517230808734894, |
|
"log_odds_ratio": -0.639795184135437, |
|
"logits/chosen": 322.35894775390625, |
|
"logits/rejected": 409.290771484375, |
|
"logps/chosen": -2.053133964538574, |
|
"logps/rejected": -2.262042999267578, |
|
"loss": 2.4846, |
|
"nll_loss": 2.1683788299560547, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.026566982269287, |
|
"rewards/margins": 0.10445437580347061, |
|
"rewards/rejected": -1.131021499633789, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 45.12126541137695, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.5470780730247498, |
|
"log_odds_ratio": -0.5346588492393494, |
|
"logits/chosen": 374.33734130859375, |
|
"logits/rejected": 398.3028259277344, |
|
"logps/chosen": -1.4850542545318604, |
|
"logps/rejected": -1.9500999450683594, |
|
"loss": 2.3067, |
|
"nll_loss": 1.7573463916778564, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7425271272659302, |
|
"rewards/margins": 0.23252280056476593, |
|
"rewards/rejected": -0.9750499725341797, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 34.55175018310547, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.48626986145973206, |
|
"log_odds_ratio": -0.5875475406646729, |
|
"logits/chosen": 343.8048400878906, |
|
"logits/rejected": 384.3130187988281, |
|
"logps/chosen": -1.6461362838745117, |
|
"logps/rejected": -2.0677056312561035, |
|
"loss": 2.2439, |
|
"nll_loss": 1.988368272781372, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8230681419372559, |
|
"rewards/margins": 0.2107846736907959, |
|
"rewards/rejected": -1.0338528156280518, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 45.629512786865234, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": -0.03384453058242798, |
|
"log_odds_ratio": -0.7663249969482422, |
|
"logits/chosen": 391.72265625, |
|
"logits/rejected": 376.98858642578125, |
|
"logps/chosen": -1.8141120672225952, |
|
"logps/rejected": -1.77732253074646, |
|
"loss": 2.1803, |
|
"nll_loss": 1.894079566001892, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.9070560336112976, |
|
"rewards/margins": -0.018394792452454567, |
|
"rewards/rejected": -0.88866126537323, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 47.596744537353516, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.6274509429931641, |
|
"log_odds_ratio": -0.5308854579925537, |
|
"logits/chosen": 436.2086486816406, |
|
"logits/rejected": 442.98974609375, |
|
"logps/chosen": -1.5697168111801147, |
|
"logps/rejected": -2.105163097381592, |
|
"loss": 2.1885, |
|
"nll_loss": 2.0384533405303955, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7848584055900574, |
|
"rewards/margins": 0.26772308349609375, |
|
"rewards/rejected": -1.052581548690796, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3273809523809524, |
|
"grad_norm": 35.20968246459961, |
|
"learning_rate": 5.500000000000001e-06, |
|
"log_odds_chosen": 0.3415950834751129, |
|
"log_odds_ratio": -0.6474028825759888, |
|
"logits/chosen": 370.7908020019531, |
|
"logits/rejected": 369.24395751953125, |
|
"logps/chosen": -1.539645791053772, |
|
"logps/rejected": -1.8215446472167969, |
|
"loss": 2.2186, |
|
"nll_loss": 1.9263942241668701, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.769822895526886, |
|
"rewards/margins": 0.14094945788383484, |
|
"rewards/rejected": -0.9107723236083984, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 56.7140998840332, |
|
"learning_rate": 6e-06, |
|
"log_odds_chosen": 0.23950794339179993, |
|
"log_odds_ratio": -0.634242057800293, |
|
"logits/chosen": 393.73089599609375, |
|
"logits/rejected": 364.10833740234375, |
|
"logps/chosen": -1.4558594226837158, |
|
"logps/rejected": -1.6270105838775635, |
|
"loss": 2.0389, |
|
"nll_loss": 2.0331270694732666, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7279297113418579, |
|
"rewards/margins": 0.08557556569576263, |
|
"rewards/rejected": -0.8135052919387817, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3869047619047619, |
|
"grad_norm": 35.615726470947266, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"log_odds_chosen": 0.3183743357658386, |
|
"log_odds_ratio": -0.6254990696907043, |
|
"logits/chosen": 390.70111083984375, |
|
"logits/rejected": 394.5395812988281, |
|
"logps/chosen": -1.4384119510650635, |
|
"logps/rejected": -1.6543052196502686, |
|
"loss": 1.9284, |
|
"nll_loss": 1.6583305597305298, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7192059755325317, |
|
"rewards/margins": 0.10794667154550552, |
|
"rewards/rejected": -0.8271526098251343, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 39.04438018798828, |
|
"learning_rate": 7e-06, |
|
"log_odds_chosen": 0.40005454421043396, |
|
"log_odds_ratio": -0.5705705881118774, |
|
"logits/chosen": 397.2193603515625, |
|
"logits/rejected": 392.36126708984375, |
|
"logps/chosen": -1.761604905128479, |
|
"logps/rejected": -2.098273515701294, |
|
"loss": 2.0169, |
|
"nll_loss": 1.894805908203125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8808024525642395, |
|
"rewards/margins": 0.16833437979221344, |
|
"rewards/rejected": -1.049136757850647, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 81.70245361328125, |
|
"learning_rate": 7.500000000000001e-06, |
|
"log_odds_chosen": 0.6616519689559937, |
|
"log_odds_ratio": -0.5056720972061157, |
|
"logits/chosen": 399.26202392578125, |
|
"logits/rejected": 387.3080139160156, |
|
"logps/chosen": -1.2724144458770752, |
|
"logps/rejected": -1.7848689556121826, |
|
"loss": 1.9258, |
|
"nll_loss": 1.534188985824585, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6362072229385376, |
|
"rewards/margins": 0.2562272548675537, |
|
"rewards/rejected": -0.8924344778060913, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 50.36211395263672, |
|
"learning_rate": 8.000000000000001e-06, |
|
"log_odds_chosen": 0.6271190643310547, |
|
"log_odds_ratio": -0.5201026797294617, |
|
"logits/chosen": 406.4871520996094, |
|
"logits/rejected": 419.87884521484375, |
|
"logps/chosen": -1.585078239440918, |
|
"logps/rejected": -2.038538694381714, |
|
"loss": 2.0136, |
|
"nll_loss": 1.7463384866714478, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.792539119720459, |
|
"rewards/margins": 0.22673015296459198, |
|
"rewards/rejected": -1.019269347190857, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5059523809523809, |
|
"grad_norm": 63.27934265136719, |
|
"learning_rate": 8.5e-06, |
|
"log_odds_chosen": -0.1369583010673523, |
|
"log_odds_ratio": -0.9432764053344727, |
|
"logits/chosen": 416.71893310546875, |
|
"logits/rejected": 396.90789794921875, |
|
"logps/chosen": -2.0069127082824707, |
|
"logps/rejected": -1.7957664728164673, |
|
"loss": 2.0904, |
|
"nll_loss": 2.148869514465332, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0034563541412354, |
|
"rewards/margins": -0.10557299852371216, |
|
"rewards/rejected": -0.8978832364082336, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 43.4862060546875, |
|
"learning_rate": 9e-06, |
|
"log_odds_chosen": 1.0741676092147827, |
|
"log_odds_ratio": -0.4483945965766907, |
|
"logits/chosen": 430.83160400390625, |
|
"logits/rejected": 437.00006103515625, |
|
"logps/chosen": -1.3649173974990845, |
|
"logps/rejected": -2.2547924518585205, |
|
"loss": 1.8406, |
|
"nll_loss": 1.6345653533935547, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6824586987495422, |
|
"rewards/margins": 0.444937527179718, |
|
"rewards/rejected": -1.1273962259292603, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5654761904761905, |
|
"grad_norm": 158.6744384765625, |
|
"learning_rate": 9.5e-06, |
|
"log_odds_chosen": 0.3580256700515747, |
|
"log_odds_ratio": -0.5983911752700806, |
|
"logits/chosen": 365.0293273925781, |
|
"logits/rejected": 395.13739013671875, |
|
"logps/chosen": -1.2539499998092651, |
|
"logps/rejected": -1.4669833183288574, |
|
"loss": 1.9122, |
|
"nll_loss": 1.5921684503555298, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6269749999046326, |
|
"rewards/margins": 0.10651664435863495, |
|
"rewards/rejected": -0.7334916591644287, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 60.67824172973633, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": 0.5569584965705872, |
|
"log_odds_ratio": -0.5973536372184753, |
|
"logits/chosen": 427.83270263671875, |
|
"logits/rejected": 444.78289794921875, |
|
"logps/chosen": -1.552487850189209, |
|
"logps/rejected": -2.052288770675659, |
|
"loss": 1.9997, |
|
"nll_loss": 1.694284439086914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7762439250946045, |
|
"rewards/margins": 0.2499004304409027, |
|
"rewards/rejected": -1.0261443853378296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 66.58687591552734, |
|
"learning_rate": 9.759000729485331e-06, |
|
"log_odds_chosen": 0.6742503643035889, |
|
"log_odds_ratio": -0.5492368340492249, |
|
"logits/chosen": 421.02435302734375, |
|
"logits/rejected": 404.02593994140625, |
|
"logps/chosen": -1.4385154247283936, |
|
"logps/rejected": -1.9352591037750244, |
|
"loss": 1.9337, |
|
"nll_loss": 1.8405609130859375, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7192577123641968, |
|
"rewards/margins": 0.24837179481983185, |
|
"rewards/rejected": -0.9676295518875122, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6547619047619048, |
|
"grad_norm": 378.0513916015625, |
|
"learning_rate": 9.534625892455923e-06, |
|
"log_odds_chosen": 0.7585327625274658, |
|
"log_odds_ratio": -0.467681884765625, |
|
"logits/chosen": 386.55181884765625, |
|
"logits/rejected": 365.63861083984375, |
|
"logps/chosen": -1.359151840209961, |
|
"logps/rejected": -1.9564090967178345, |
|
"loss": 2.0541, |
|
"nll_loss": 1.9982373714447021, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6795759201049805, |
|
"rewards/margins": 0.29862865805625916, |
|
"rewards/rejected": -0.9782045483589172, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6845238095238095, |
|
"grad_norm": 68.57817077636719, |
|
"learning_rate": 9.325048082403139e-06, |
|
"log_odds_chosen": 0.9356454610824585, |
|
"log_odds_ratio": -0.41365212202072144, |
|
"logits/chosen": 406.5351867675781, |
|
"logits/rejected": 452.47161865234375, |
|
"logps/chosen": -1.2247552871704102, |
|
"logps/rejected": -1.8875564336776733, |
|
"loss": 2.0096, |
|
"nll_loss": 1.6129186153411865, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6123776435852051, |
|
"rewards/margins": 0.33140069246292114, |
|
"rewards/rejected": -0.9437782168388367, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 70.14920806884766, |
|
"learning_rate": 9.12870929175277e-06, |
|
"log_odds_chosen": 0.6218617558479309, |
|
"log_odds_ratio": -0.5581934452056885, |
|
"logits/chosen": 400.44232177734375, |
|
"logits/rejected": 403.1569519042969, |
|
"logps/chosen": -1.2707955837249756, |
|
"logps/rejected": -1.6316139698028564, |
|
"loss": 2.0261, |
|
"nll_loss": 1.7148948907852173, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6353977918624878, |
|
"rewards/margins": 0.18040914833545685, |
|
"rewards/rejected": -0.8158069849014282, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7440476190476191, |
|
"grad_norm": 28.353559494018555, |
|
"learning_rate": 8.94427190999916e-06, |
|
"log_odds_chosen": 0.06747283786535263, |
|
"log_odds_ratio": -0.7747208476066589, |
|
"logits/chosen": 394.8547668457031, |
|
"logits/rejected": 402.99114990234375, |
|
"logps/chosen": -1.4064565896987915, |
|
"logps/rejected": -1.443865180015564, |
|
"loss": 1.9533, |
|
"nll_loss": 1.529036283493042, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7032282948493958, |
|
"rewards/margins": 0.018704283982515335, |
|
"rewards/rejected": -0.721932590007782, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7738095238095238, |
|
"grad_norm": 50.000038146972656, |
|
"learning_rate": 8.770580193070294e-06, |
|
"log_odds_chosen": 0.487846702337265, |
|
"log_odds_ratio": -0.62076336145401, |
|
"logits/chosen": 419.7767639160156, |
|
"logits/rejected": 387.2704772949219, |
|
"logps/chosen": -1.2864845991134644, |
|
"logps/rejected": -1.5687696933746338, |
|
"loss": 1.9848, |
|
"nll_loss": 1.5705018043518066, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6432422995567322, |
|
"rewards/margins": 0.14114244282245636, |
|
"rewards/rejected": -0.7843848466873169, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 32.915321350097656, |
|
"learning_rate": 8.606629658238705e-06, |
|
"log_odds_chosen": 0.5996901988983154, |
|
"log_odds_ratio": -0.5842069387435913, |
|
"logits/chosen": 392.77679443359375, |
|
"logits/rejected": 423.60272216796875, |
|
"logps/chosen": -1.3194632530212402, |
|
"logps/rejected": -1.8708839416503906, |
|
"loss": 1.9914, |
|
"nll_loss": 1.6256046295166016, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6597316265106201, |
|
"rewards/margins": 0.2757102847099304, |
|
"rewards/rejected": -0.9354419708251953, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 60.51993179321289, |
|
"learning_rate": 8.451542547285167e-06, |
|
"log_odds_chosen": 0.14726313948631287, |
|
"log_odds_ratio": -0.6693505048751831, |
|
"logits/chosen": 411.6986389160156, |
|
"logits/rejected": 375.326416015625, |
|
"logps/chosen": -1.321712851524353, |
|
"logps/rejected": -1.4512436389923096, |
|
"loss": 1.9797, |
|
"nll_loss": 1.6686627864837646, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6608564257621765, |
|
"rewards/margins": 0.0647653192281723, |
|
"rewards/rejected": -0.7256218194961548, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8630952380952381, |
|
"grad_norm": 31.022499084472656, |
|
"learning_rate": 8.304547985373997e-06, |
|
"log_odds_chosen": 0.1282106339931488, |
|
"log_odds_ratio": -0.6646836996078491, |
|
"logits/chosen": 381.526611328125, |
|
"logits/rejected": 395.548095703125, |
|
"logps/chosen": -1.3501091003417969, |
|
"logps/rejected": -1.4500309228897095, |
|
"loss": 1.9768, |
|
"nll_loss": 1.590570330619812, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6750545501708984, |
|
"rewards/margins": 0.0499609112739563, |
|
"rewards/rejected": -0.7250154614448547, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 48.54278564453125, |
|
"learning_rate": 8.164965809277262e-06, |
|
"log_odds_chosen": 1.1463629007339478, |
|
"log_odds_ratio": -0.46680259704589844, |
|
"logits/chosen": 403.89031982421875, |
|
"logits/rejected": 409.86151123046875, |
|
"logps/chosen": -1.3419028520584106, |
|
"logps/rejected": -2.324528217315674, |
|
"loss": 1.901, |
|
"nll_loss": 1.6430673599243164, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6709514260292053, |
|
"rewards/margins": 0.49131274223327637, |
|
"rewards/rejected": -1.162264108657837, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9226190476190477, |
|
"grad_norm": 44.35458755493164, |
|
"learning_rate": 8.03219328902499e-06, |
|
"log_odds_chosen": 0.22458314895629883, |
|
"log_odds_ratio": -0.6567160487174988, |
|
"logits/chosen": 399.331298828125, |
|
"logits/rejected": 395.67767333984375, |
|
"logps/chosen": -1.445773959159851, |
|
"logps/rejected": -1.61488938331604, |
|
"loss": 1.8732, |
|
"nll_loss": 1.569331407546997, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7228869795799255, |
|
"rewards/margins": 0.08455771207809448, |
|
"rewards/rejected": -0.80744469165802, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 44.52214431762695, |
|
"learning_rate": 7.905694150420949e-06, |
|
"log_odds_chosen": 0.3395001292228699, |
|
"log_odds_ratio": -0.6508662104606628, |
|
"logits/chosen": 404.9471435546875, |
|
"logits/rejected": 443.93865966796875, |
|
"logps/chosen": -1.2228091955184937, |
|
"logps/rejected": -1.4965049028396606, |
|
"loss": 1.8769, |
|
"nll_loss": 1.4934804439544678, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.6114045977592468, |
|
"rewards/margins": 0.13684777915477753, |
|
"rewards/rejected": -0.7482524514198303, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9821428571428571, |
|
"grad_norm": 51.537784576416016, |
|
"learning_rate": 7.78498944161523e-06, |
|
"log_odds_chosen": 0.2777930200099945, |
|
"log_odds_ratio": -0.6248766779899597, |
|
"logits/chosen": 440.94366455078125, |
|
"logits/rejected": 444.90240478515625, |
|
"logps/chosen": -1.3909051418304443, |
|
"logps/rejected": -1.5702444314956665, |
|
"loss": 1.9721, |
|
"nll_loss": 1.7042747735977173, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6954525709152222, |
|
"rewards/margins": 0.08966972678899765, |
|
"rewards/rejected": -0.7851222157478333, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_log_odds_chosen": 0.26492369174957275, |
|
"eval_log_odds_ratio": -0.6573201417922974, |
|
"eval_logits/chosen": 336.2867126464844, |
|
"eval_logits/rejected": 282.1214904785156, |
|
"eval_logps/chosen": -1.2143747806549072, |
|
"eval_logps/rejected": -1.4054285287857056, |
|
"eval_loss": 1.9526195526123047, |
|
"eval_nll_loss": 1.6514703035354614, |
|
"eval_rewards/accuracies": 0.5571428537368774, |
|
"eval_rewards/chosen": -0.6071873903274536, |
|
"eval_rewards/margins": 0.09552692621946335, |
|
"eval_rewards/rejected": -0.7027142643928528, |
|
"eval_runtime": 201.1918, |
|
"eval_samples_per_second": 2.749, |
|
"eval_steps_per_second": 0.348, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0119047619047619, |
|
"grad_norm": 31.192705154418945, |
|
"learning_rate": 7.669649888473705e-06, |
|
"log_odds_chosen": 0.9334543347358704, |
|
"log_odds_ratio": -0.48719945549964905, |
|
"logits/chosen": 377.3502502441406, |
|
"logits/rejected": 415.46356201171875, |
|
"logps/chosen": -1.0376964807510376, |
|
"logps/rejected": -1.7006464004516602, |
|
"loss": 1.7058, |
|
"nll_loss": 1.2853295803070068, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5188482403755188, |
|
"rewards/margins": 0.3314751386642456, |
|
"rewards/rejected": -0.8503232002258301, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 23.733516693115234, |
|
"learning_rate": 7.559289460184545e-06, |
|
"log_odds_chosen": 1.4257056713104248, |
|
"log_odds_ratio": -0.36321204900741577, |
|
"logits/chosen": 406.3605651855469, |
|
"logits/rejected": 437.1375427246094, |
|
"logps/chosen": -0.8629204034805298, |
|
"logps/rejected": -1.8850151300430298, |
|
"loss": 1.2977, |
|
"nll_loss": 1.387385368347168, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4314602017402649, |
|
"rewards/margins": 0.5110472440719604, |
|
"rewards/rejected": -0.9425075650215149, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 24.075864791870117, |
|
"learning_rate": 7.4535599249993e-06, |
|
"log_odds_chosen": 1.4416855573654175, |
|
"log_odds_ratio": -0.30401644110679626, |
|
"logits/chosen": 357.9011535644531, |
|
"logits/rejected": 352.5140380859375, |
|
"logps/chosen": -0.899684727191925, |
|
"logps/rejected": -1.9276078939437866, |
|
"loss": 1.3288, |
|
"nll_loss": 1.3482264280319214, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4498423635959625, |
|
"rewards/margins": 0.513961672782898, |
|
"rewards/rejected": -0.9638039469718933, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1011904761904763, |
|
"grad_norm": 39.60210418701172, |
|
"learning_rate": 7.352146220938079e-06, |
|
"log_odds_chosen": 2.3201422691345215, |
|
"log_odds_ratio": -0.20258066058158875, |
|
"logits/chosen": 449.66485595703125, |
|
"logits/rejected": 380.39923095703125, |
|
"logps/chosen": -0.8746849894523621, |
|
"logps/rejected": -2.625277042388916, |
|
"loss": 1.2651, |
|
"nll_loss": 1.2770421504974365, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.43734249472618103, |
|
"rewards/margins": 0.8752959370613098, |
|
"rewards/rejected": -1.312638521194458, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.130952380952381, |
|
"grad_norm": 19.941560745239258, |
|
"learning_rate": 7.254762501100117e-06, |
|
"log_odds_chosen": 1.2973986864089966, |
|
"log_odds_ratio": -0.36347365379333496, |
|
"logits/chosen": 403.78106689453125, |
|
"logits/rejected": 423.5147399902344, |
|
"logps/chosen": -0.9356532096862793, |
|
"logps/rejected": -1.7905277013778687, |
|
"loss": 1.3584, |
|
"nll_loss": 1.2576459646224976, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46782660484313965, |
|
"rewards/margins": 0.4274372160434723, |
|
"rewards/rejected": -0.8952638506889343, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1607142857142858, |
|
"grad_norm": 22.545778274536133, |
|
"learning_rate": 7.1611487403943295e-06, |
|
"log_odds_chosen": 1.494046688079834, |
|
"log_odds_ratio": -0.28673815727233887, |
|
"logits/chosen": 383.939697265625, |
|
"logits/rejected": 414.34100341796875, |
|
"logps/chosen": -1.043168067932129, |
|
"logps/rejected": -2.1456902027130127, |
|
"loss": 1.3101, |
|
"nll_loss": 1.2422401905059814, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5215840339660645, |
|
"rewards/margins": 0.5512610673904419, |
|
"rewards/rejected": -1.0728451013565063, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 29.15802574157715, |
|
"learning_rate": 7.0710678118654756e-06, |
|
"log_odds_chosen": 2.355477809906006, |
|
"log_odds_ratio": -0.23161384463310242, |
|
"logits/chosen": 443.63848876953125, |
|
"logits/rejected": 404.1456298828125, |
|
"logps/chosen": -0.6128617525100708, |
|
"logps/rejected": -2.293651580810547, |
|
"loss": 1.2885, |
|
"nll_loss": 1.0000107288360596, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3064308762550354, |
|
"rewards/margins": 0.8403949737548828, |
|
"rewards/rejected": -1.1468257904052734, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2202380952380953, |
|
"grad_norm": 25.433277130126953, |
|
"learning_rate": 6.984302957695783e-06, |
|
"log_odds_chosen": 2.206387758255005, |
|
"log_odds_ratio": -0.20685645937919617, |
|
"logits/chosen": 345.31134033203125, |
|
"logits/rejected": 396.1018371582031, |
|
"logps/chosen": -0.8278995752334595, |
|
"logps/rejected": -2.5073094367980957, |
|
"loss": 1.2908, |
|
"nll_loss": 1.2252819538116455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41394978761672974, |
|
"rewards/margins": 0.8397049903869629, |
|
"rewards/rejected": -1.2536547183990479, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 20.637460708618164, |
|
"learning_rate": 6.900655593423542e-06, |
|
"log_odds_chosen": 1.841051697731018, |
|
"log_odds_ratio": -0.25199171900749207, |
|
"logits/chosen": 375.1767272949219, |
|
"logits/rejected": 373.2859191894531, |
|
"logps/chosen": -0.8490890264511108, |
|
"logps/rejected": -2.084282159805298, |
|
"loss": 1.1767, |
|
"nll_loss": 1.2052141427993774, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4245445132255554, |
|
"rewards/margins": 0.617596447467804, |
|
"rewards/rejected": -1.042141079902649, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2797619047619047, |
|
"grad_norm": 44.84423065185547, |
|
"learning_rate": 6.819943394704736e-06, |
|
"log_odds_chosen": 2.0608296394348145, |
|
"log_odds_ratio": -0.26352304220199585, |
|
"logits/chosen": 417.9048767089844, |
|
"logits/rejected": 423.32275390625, |
|
"logps/chosen": -0.8630622029304504, |
|
"logps/rejected": -2.449791669845581, |
|
"loss": 1.2804, |
|
"nll_loss": 1.2531594038009644, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4315311014652252, |
|
"rewards/margins": 0.7933648824691772, |
|
"rewards/rejected": -1.2248958349227905, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3095238095238095, |
|
"grad_norm": 27.052371978759766, |
|
"learning_rate": 6.741998624632421e-06, |
|
"log_odds_chosen": 1.9642305374145508, |
|
"log_odds_ratio": -0.2985631823539734, |
|
"logits/chosen": 365.51226806640625, |
|
"logits/rejected": 383.2918395996094, |
|
"logps/chosen": -0.7986623048782349, |
|
"logps/rejected": -2.1426806449890137, |
|
"loss": 1.2905, |
|
"nll_loss": 1.1290483474731445, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.39933115243911743, |
|
"rewards/margins": 0.6720091700553894, |
|
"rewards/rejected": -1.0713403224945068, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 33.49964141845703, |
|
"learning_rate": 6.666666666666667e-06, |
|
"log_odds_chosen": 1.2250487804412842, |
|
"log_odds_ratio": -0.34452176094055176, |
|
"logits/chosen": 399.16046142578125, |
|
"logits/rejected": 376.3241882324219, |
|
"logps/chosen": -1.1176211833953857, |
|
"logps/rejected": -2.0609307289123535, |
|
"loss": 1.2819, |
|
"nll_loss": 1.3445219993591309, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5588105916976929, |
|
"rewards/margins": 0.47165459394454956, |
|
"rewards/rejected": -1.0304653644561768, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.369047619047619, |
|
"grad_norm": 20.881301879882812, |
|
"learning_rate": 6.593804733957872e-06, |
|
"log_odds_chosen": 2.1563384532928467, |
|
"log_odds_ratio": -0.24819080531597137, |
|
"logits/chosen": 367.4872131347656, |
|
"logits/rejected": 376.99822998046875, |
|
"logps/chosen": -0.9552156329154968, |
|
"logps/rejected": -2.5535261631011963, |
|
"loss": 1.2569, |
|
"nll_loss": 0.9765374064445496, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4776078164577484, |
|
"rewards/margins": 0.7991552948951721, |
|
"rewards/rejected": -1.2767630815505981, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3988095238095237, |
|
"grad_norm": 20.453393936157227, |
|
"learning_rate": 6.523280730534423e-06, |
|
"log_odds_chosen": 2.148529529571533, |
|
"log_odds_ratio": -0.2209801971912384, |
|
"logits/chosen": 396.610595703125, |
|
"logits/rejected": 402.3414306640625, |
|
"logps/chosen": -0.6876020431518555, |
|
"logps/rejected": -2.1799914836883545, |
|
"loss": 1.2833, |
|
"nll_loss": 1.0352197885513306, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.34380102157592773, |
|
"rewards/margins": 0.7461946606636047, |
|
"rewards/rejected": -1.0899957418441772, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 23.43726348876953, |
|
"learning_rate": 6.4549722436790284e-06, |
|
"log_odds_chosen": 1.4042203426361084, |
|
"log_odds_ratio": -0.30760836601257324, |
|
"logits/chosen": 426.6971740722656, |
|
"logits/rejected": 434.68292236328125, |
|
"logps/chosen": -0.9825772047042847, |
|
"logps/rejected": -2.0324692726135254, |
|
"loss": 1.258, |
|
"nll_loss": 1.1937551498413086, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.49128860235214233, |
|
"rewards/margins": 0.5249461531639099, |
|
"rewards/rejected": -1.0162346363067627, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 20.512117385864258, |
|
"learning_rate": 6.3887656499994e-06, |
|
"log_odds_chosen": 1.7208242416381836, |
|
"log_odds_ratio": -0.2485727071762085, |
|
"logits/chosen": 406.27337646484375, |
|
"logits/rejected": 381.36297607421875, |
|
"logps/chosen": -0.7839127779006958, |
|
"logps/rejected": -2.025153398513794, |
|
"loss": 1.3187, |
|
"nll_loss": 1.2436693906784058, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3919563889503479, |
|
"rewards/margins": 0.6206203699111938, |
|
"rewards/rejected": -1.012576699256897, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 31.212268829345703, |
|
"learning_rate": 6.324555320336759e-06, |
|
"log_odds_chosen": 2.171504497528076, |
|
"log_odds_ratio": -0.2626289427280426, |
|
"logits/chosen": 385.3293151855469, |
|
"logits/rejected": 403.4792785644531, |
|
"logps/chosen": -0.912436842918396, |
|
"logps/rejected": -2.581498146057129, |
|
"loss": 1.2409, |
|
"nll_loss": 0.9795435070991516, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.456218421459198, |
|
"rewards/margins": 0.8345306515693665, |
|
"rewards/rejected": -1.2907490730285645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5178571428571428, |
|
"grad_norm": 15.0794095993042, |
|
"learning_rate": 6.262242910851496e-06, |
|
"log_odds_chosen": 1.983006238937378, |
|
"log_odds_ratio": -0.19495443999767303, |
|
"logits/chosen": 411.68487548828125, |
|
"logits/rejected": 416.602783203125, |
|
"logps/chosen": -0.9126818776130676, |
|
"logps/rejected": -2.4313483238220215, |
|
"loss": 1.2696, |
|
"nll_loss": 1.102388620376587, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4563409388065338, |
|
"rewards/margins": 0.7593332529067993, |
|
"rewards/rejected": -1.2156741619110107, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5476190476190477, |
|
"grad_norm": 27.49896812438965, |
|
"learning_rate": 6.2017367294604225e-06, |
|
"log_odds_chosen": 1.9481725692749023, |
|
"log_odds_ratio": -0.2753888964653015, |
|
"logits/chosen": 386.07427978515625, |
|
"logits/rejected": 435.198974609375, |
|
"logps/chosen": -0.995970606803894, |
|
"logps/rejected": -2.5512473583221436, |
|
"loss": 1.2264, |
|
"nll_loss": 1.1618086099624634, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.497985303401947, |
|
"rewards/margins": 0.7776384353637695, |
|
"rewards/rejected": -1.2756236791610718, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5773809523809523, |
|
"grad_norm": 23.22437286376953, |
|
"learning_rate": 6.142951168339513e-06, |
|
"log_odds_chosen": 1.3944361209869385, |
|
"log_odds_ratio": -0.4270511567592621, |
|
"logits/chosen": 435.14678955078125, |
|
"logits/rejected": 427.51226806640625, |
|
"logps/chosen": -1.104048252105713, |
|
"logps/rejected": -2.160891056060791, |
|
"loss": 1.4157, |
|
"nll_loss": 1.3079249858856201, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5520241260528564, |
|
"rewards/margins": 0.5284214019775391, |
|
"rewards/rejected": -1.0804455280303955, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 16.965179443359375, |
|
"learning_rate": 6.0858061945018455e-06, |
|
"log_odds_chosen": 1.3110793828964233, |
|
"log_odds_ratio": -0.33642929792404175, |
|
"logits/chosen": 370.2662658691406, |
|
"logits/rejected": 376.83587646484375, |
|
"logps/chosen": -1.1280148029327393, |
|
"logps/rejected": -2.0509979724884033, |
|
"loss": 1.3064, |
|
"nll_loss": 1.3204824924468994, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5640074014663696, |
|
"rewards/margins": 0.46149152517318726, |
|
"rewards/rejected": -1.0254989862442017, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6369047619047619, |
|
"grad_norm": 16.231523513793945, |
|
"learning_rate": 6.030226891555273e-06, |
|
"log_odds_chosen": 1.2828176021575928, |
|
"log_odds_ratio": -0.31571871042251587, |
|
"logits/chosen": 336.78143310546875, |
|
"logits/rejected": 352.0708312988281, |
|
"logps/chosen": -0.8390641212463379, |
|
"logps/rejected": -1.6753209829330444, |
|
"loss": 1.2993, |
|
"nll_loss": 1.0436512231826782, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41953206062316895, |
|
"rewards/margins": 0.4181283414363861, |
|
"rewards/rejected": -0.8376604914665222, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 19.623210906982422, |
|
"learning_rate": 5.976143046671968e-06, |
|
"log_odds_chosen": 1.4294779300689697, |
|
"log_odds_ratio": -0.3383074104785919, |
|
"logits/chosen": 406.8611145019531, |
|
"logits/rejected": 369.0583190917969, |
|
"logps/chosen": -0.7519195675849915, |
|
"logps/rejected": -1.7197290658950806, |
|
"loss": 1.3075, |
|
"nll_loss": 1.0520720481872559, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3759597837924957, |
|
"rewards/margins": 0.4839046597480774, |
|
"rewards/rejected": -0.8598645329475403, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6964285714285714, |
|
"grad_norm": 27.15911293029785, |
|
"learning_rate": 5.923488777590924e-06, |
|
"log_odds_chosen": 1.6469166278839111, |
|
"log_odds_ratio": -0.31998661160469055, |
|
"logits/chosen": 351.8115234375, |
|
"logits/rejected": 378.2767333984375, |
|
"logps/chosen": -0.8496532440185547, |
|
"logps/rejected": -2.034947156906128, |
|
"loss": 1.2238, |
|
"nll_loss": 1.0820589065551758, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.42482662200927734, |
|
"rewards/margins": 0.5926468968391418, |
|
"rewards/rejected": -1.017473578453064, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7261904761904763, |
|
"grad_norm": 20.430383682250977, |
|
"learning_rate": 5.8722021951470355e-06, |
|
"log_odds_chosen": 2.2868614196777344, |
|
"log_odds_ratio": -0.23290471732616425, |
|
"logits/chosen": 376.9328918457031, |
|
"logits/rejected": 444.3408203125, |
|
"logps/chosen": -0.8962064981460571, |
|
"logps/rejected": -2.7214858531951904, |
|
"loss": 1.2798, |
|
"nll_loss": 1.1513203382492065, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.44810324907302856, |
|
"rewards/margins": 0.9126396179199219, |
|
"rewards/rejected": -1.3607429265975952, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.755952380952381, |
|
"grad_norm": 23.614900588989258, |
|
"learning_rate": 5.822225097395821e-06, |
|
"log_odds_chosen": 2.100496768951416, |
|
"log_odds_ratio": -0.2871156930923462, |
|
"logits/chosen": 391.25726318359375, |
|
"logits/rejected": 399.76214599609375, |
|
"logps/chosen": -1.0318877696990967, |
|
"logps/rejected": -2.5897607803344727, |
|
"loss": 1.318, |
|
"nll_loss": 1.339220404624939, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5159438848495483, |
|
"rewards/margins": 0.778936505317688, |
|
"rewards/rejected": -1.2948803901672363, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 24.594526290893555, |
|
"learning_rate": 5.773502691896259e-06, |
|
"log_odds_chosen": 1.4617587327957153, |
|
"log_odds_ratio": -0.268736869096756, |
|
"logits/chosen": 392.9894714355469, |
|
"logits/rejected": 392.593994140625, |
|
"logps/chosen": -0.716581404209137, |
|
"logps/rejected": -1.6443252563476562, |
|
"loss": 1.2342, |
|
"nll_loss": 1.0071418285369873, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3582907021045685, |
|
"rewards/margins": 0.46387186646461487, |
|
"rewards/rejected": -0.8221626281738281, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8154761904761905, |
|
"grad_norm": 18.39592742919922, |
|
"learning_rate": 5.725983343138682e-06, |
|
"log_odds_chosen": 1.3188327550888062, |
|
"log_odds_ratio": -0.39420002698898315, |
|
"logits/chosen": 443.49505615234375, |
|
"logits/rejected": 423.3663635253906, |
|
"logps/chosen": -1.0477676391601562, |
|
"logps/rejected": -2.009697914123535, |
|
"loss": 1.2633, |
|
"nll_loss": 1.2299957275390625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5238838195800781, |
|
"rewards/margins": 0.4809652864933014, |
|
"rewards/rejected": -1.0048489570617676, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.8452380952380953, |
|
"grad_norm": 16.925289154052734, |
|
"learning_rate": 5.679618342470648e-06, |
|
"log_odds_chosen": 1.8564685583114624, |
|
"log_odds_ratio": -0.2928754985332489, |
|
"logits/chosen": 358.899169921875, |
|
"logits/rejected": 364.58160400390625, |
|
"logps/chosen": -0.94548100233078, |
|
"logps/rejected": -2.3730039596557617, |
|
"loss": 1.2961, |
|
"nll_loss": 1.2056444883346558, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47274050116539, |
|
"rewards/margins": 0.7137616276741028, |
|
"rewards/rejected": -1.1865019798278809, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 27.0323543548584, |
|
"learning_rate": 5.63436169819011e-06, |
|
"log_odds_chosen": 1.889154076576233, |
|
"log_odds_ratio": -0.3951663076877594, |
|
"logits/chosen": 385.2926025390625, |
|
"logits/rejected": 391.605712890625, |
|
"logps/chosen": -1.166025161743164, |
|
"logps/rejected": -2.7089405059814453, |
|
"loss": 1.3311, |
|
"nll_loss": 1.3940035104751587, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.583012580871582, |
|
"rewards/margins": 0.7714576125144958, |
|
"rewards/rejected": -1.3544702529907227, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 18.699766159057617, |
|
"learning_rate": 5.590169943749475e-06, |
|
"log_odds_chosen": 1.6885192394256592, |
|
"log_odds_ratio": -0.2244713306427002, |
|
"logits/chosen": 402.7138366699219, |
|
"logits/rejected": 435.62646484375, |
|
"logps/chosen": -0.8159521222114563, |
|
"logps/rejected": -2.033271551132202, |
|
"loss": 1.2117, |
|
"nll_loss": 0.9898951649665833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40797606110572815, |
|
"rewards/margins": 0.6086598634719849, |
|
"rewards/rejected": -1.016635775566101, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9345238095238095, |
|
"grad_norm": 29.9803409576416, |
|
"learning_rate": 5.547001962252292e-06, |
|
"log_odds_chosen": 2.429278612136841, |
|
"log_odds_ratio": -0.19841960072517395, |
|
"logits/chosen": 411.4713439941406, |
|
"logits/rejected": 433.3556213378906, |
|
"logps/chosen": -0.8200858235359192, |
|
"logps/rejected": -2.58778977394104, |
|
"loss": 1.1848, |
|
"nll_loss": 1.0703952312469482, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4100429117679596, |
|
"rewards/margins": 0.8838518857955933, |
|
"rewards/rejected": -1.29389488697052, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 15.92293643951416, |
|
"learning_rate": 5.504818825631804e-06, |
|
"log_odds_chosen": 1.846983551979065, |
|
"log_odds_ratio": -0.29716789722442627, |
|
"logits/chosen": 407.4903869628906, |
|
"logits/rejected": 413.0613708496094, |
|
"logps/chosen": -0.9037710428237915, |
|
"logps/rejected": -2.2958996295928955, |
|
"loss": 1.245, |
|
"nll_loss": 0.9782114028930664, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.45188552141189575, |
|
"rewards/margins": 0.6960643529891968, |
|
"rewards/rejected": -1.1479498147964478, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.994047619047619, |
|
"grad_norm": 15.895071029663086, |
|
"learning_rate": 5.4635836470815305e-06, |
|
"log_odds_chosen": 1.2502014636993408, |
|
"log_odds_ratio": -0.3804013133049011, |
|
"logits/chosen": 390.8824157714844, |
|
"logits/rejected": 404.8577575683594, |
|
"logps/chosen": -1.2015631198883057, |
|
"logps/rejected": -2.143710136413574, |
|
"loss": 1.3299, |
|
"nll_loss": 1.439396619796753, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6007815599441528, |
|
"rewards/margins": 0.47107353806495667, |
|
"rewards/rejected": -1.071855068206787, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_log_odds_chosen": 0.2173391729593277, |
|
"eval_log_odds_ratio": -0.6792100071907043, |
|
"eval_logits/chosen": 345.2332763671875, |
|
"eval_logits/rejected": 293.2820129394531, |
|
"eval_logps/chosen": -1.1971725225448608, |
|
"eval_logps/rejected": -1.3610831499099731, |
|
"eval_loss": 1.9015214443206787, |
|
"eval_nll_loss": 1.5933443307876587, |
|
"eval_rewards/accuracies": 0.5, |
|
"eval_rewards/chosen": -0.5985862612724304, |
|
"eval_rewards/margins": 0.08195527642965317, |
|
"eval_rewards/rejected": -0.6805415749549866, |
|
"eval_runtime": 201.3424, |
|
"eval_samples_per_second": 2.747, |
|
"eval_steps_per_second": 0.348, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.0238095238095237, |
|
"grad_norm": 18.064809799194336, |
|
"learning_rate": 5.423261445466404e-06, |
|
"log_odds_chosen": 3.1630072593688965, |
|
"log_odds_ratio": -0.14342114329338074, |
|
"logits/chosen": 407.103759765625, |
|
"logits/rejected": 396.9122009277344, |
|
"logps/chosen": -0.5497515201568604, |
|
"logps/rejected": -2.797853946685791, |
|
"loss": 0.7506, |
|
"nll_loss": 0.7164371609687805, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2748757600784302, |
|
"rewards/margins": 1.1240513324737549, |
|
"rewards/rejected": -1.3989269733428955, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0535714285714284, |
|
"grad_norm": 12.874629974365234, |
|
"learning_rate": 5.383819020581656e-06, |
|
"log_odds_chosen": 4.300416946411133, |
|
"log_odds_ratio": -0.10298861563205719, |
|
"logits/chosen": 406.04144287109375, |
|
"logits/rejected": 365.400390625, |
|
"logps/chosen": -0.5357804894447327, |
|
"logps/rejected": -3.574709415435791, |
|
"loss": 0.6675, |
|
"nll_loss": 0.8001909255981445, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.26789024472236633, |
|
"rewards/margins": 1.5194646120071411, |
|
"rewards/rejected": -1.7873547077178955, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 13.677016258239746, |
|
"learning_rate": 5.345224838248489e-06, |
|
"log_odds_chosen": 4.409786224365234, |
|
"log_odds_ratio": -0.03061295673251152, |
|
"logits/chosen": 372.30560302734375, |
|
"logits/rejected": 401.3008728027344, |
|
"logps/chosen": -0.46024495363235474, |
|
"logps/rejected": -3.737445116043091, |
|
"loss": 0.5993, |
|
"nll_loss": 0.6028808355331421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23012247681617737, |
|
"rewards/margins": 1.6386003494262695, |
|
"rewards/rejected": -1.8687225580215454, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.113095238095238, |
|
"grad_norm": 10.955292701721191, |
|
"learning_rate": 5.307448924342753e-06, |
|
"log_odds_chosen": 4.361363410949707, |
|
"log_odds_ratio": -0.04152694344520569, |
|
"logits/chosen": 373.2965393066406, |
|
"logits/rejected": 396.0318603515625, |
|
"logps/chosen": -0.4621841311454773, |
|
"logps/rejected": -3.721182346343994, |
|
"loss": 0.5932, |
|
"nll_loss": 0.5964406132698059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23109206557273865, |
|
"rewards/margins": 1.6294991970062256, |
|
"rewards/rejected": -1.860591173171997, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 28.92525863647461, |
|
"learning_rate": 5.270462766947299e-06, |
|
"log_odds_chosen": 4.147698402404785, |
|
"log_odds_ratio": -0.032929155975580215, |
|
"logits/chosen": 380.5809631347656, |
|
"logits/rejected": 416.3023986816406, |
|
"logps/chosen": -0.381146639585495, |
|
"logps/rejected": -3.1521759033203125, |
|
"loss": 0.612, |
|
"nll_loss": 0.6254645586013794, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1905733197927475, |
|
"rewards/margins": 1.3855146169662476, |
|
"rewards/rejected": -1.5760879516601562, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1726190476190474, |
|
"grad_norm": 12.443119049072266, |
|
"learning_rate": 5.234239225902137e-06, |
|
"log_odds_chosen": 4.118858337402344, |
|
"log_odds_ratio": -0.03749427944421768, |
|
"logits/chosen": 345.683349609375, |
|
"logits/rejected": 312.6221923828125, |
|
"logps/chosen": -0.32834386825561523, |
|
"logps/rejected": -3.117326498031616, |
|
"loss": 0.5807, |
|
"nll_loss": 0.531657338142395, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16417193412780762, |
|
"rewards/margins": 1.3944913148880005, |
|
"rewards/rejected": -1.558663249015808, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.2023809523809526, |
|
"grad_norm": 10.90052318572998, |
|
"learning_rate": 5.198752449100364e-06, |
|
"log_odds_chosen": 3.4755806922912598, |
|
"log_odds_ratio": -0.04619182273745537, |
|
"logits/chosen": 332.7835693359375, |
|
"logits/rejected": 340.1016845703125, |
|
"logps/chosen": -0.4380973279476166, |
|
"logps/rejected": -2.8499865531921387, |
|
"loss": 0.6158, |
|
"nll_loss": 0.6762995719909668, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2190486639738083, |
|
"rewards/margins": 1.2059446573257446, |
|
"rewards/rejected": -1.4249932765960693, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 14.661003112792969, |
|
"learning_rate": 5.163977794943223e-06, |
|
"log_odds_chosen": 3.5086026191711426, |
|
"log_odds_ratio": -0.11584819853305817, |
|
"logits/chosen": 396.9617614746094, |
|
"logits/rejected": 412.2447814941406, |
|
"logps/chosen": -0.4914408326148987, |
|
"logps/rejected": -2.892894744873047, |
|
"loss": 0.6302, |
|
"nll_loss": 0.5993391275405884, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.24572041630744934, |
|
"rewards/margins": 1.2007267475128174, |
|
"rewards/rejected": -1.4464473724365234, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.261904761904762, |
|
"grad_norm": 13.687707901000977, |
|
"learning_rate": 5.129891760425772e-06, |
|
"log_odds_chosen": 3.4452381134033203, |
|
"log_odds_ratio": -0.04614276438951492, |
|
"logits/chosen": 386.617919921875, |
|
"logits/rejected": 348.6525573730469, |
|
"logps/chosen": -0.43144264817237854, |
|
"logps/rejected": -2.679448366165161, |
|
"loss": 0.5916, |
|
"nll_loss": 0.61055588722229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21572132408618927, |
|
"rewards/margins": 1.1240026950836182, |
|
"rewards/rejected": -1.3397241830825806, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 12.487543106079102, |
|
"learning_rate": 5.096471914376255e-06, |
|
"log_odds_chosen": 4.143012046813965, |
|
"log_odds_ratio": -0.055028241127729416, |
|
"logits/chosen": 369.7757873535156, |
|
"logits/rejected": 385.3197326660156, |
|
"logps/chosen": -0.42009004950523376, |
|
"logps/rejected": -3.1088523864746094, |
|
"loss": 0.5661, |
|
"nll_loss": 0.5417618155479431, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21004502475261688, |
|
"rewards/margins": 1.3443810939788818, |
|
"rewards/rejected": -1.5544261932373047, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 14.711711883544922, |
|
"learning_rate": 5.0636968354183334e-06, |
|
"log_odds_chosen": 3.86775541305542, |
|
"log_odds_ratio": -0.051612865179777145, |
|
"logits/chosen": 386.55340576171875, |
|
"logits/rejected": 420.259033203125, |
|
"logps/chosen": -0.40563899278640747, |
|
"logps/rejected": -3.13773775100708, |
|
"loss": 0.6336, |
|
"nll_loss": 0.5235159993171692, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20281949639320374, |
|
"rewards/margins": 1.3660494089126587, |
|
"rewards/rejected": -1.56886887550354, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.3511904761904763, |
|
"grad_norm": 14.341813087463379, |
|
"learning_rate": 5.031546054266276e-06, |
|
"log_odds_chosen": 4.3614888191223145, |
|
"log_odds_ratio": -0.02659059502184391, |
|
"logits/chosen": 381.09478759765625, |
|
"logits/rejected": 371.54730224609375, |
|
"logps/chosen": -0.3334965705871582, |
|
"logps/rejected": -3.2248425483703613, |
|
"loss": 0.6006, |
|
"nll_loss": 0.5441581010818481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1667482852935791, |
|
"rewards/margins": 1.4456731081008911, |
|
"rewards/rejected": -1.6124212741851807, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 10.141727447509766, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 5.030593395233154, |
|
"log_odds_ratio": -0.060914844274520874, |
|
"logits/chosen": 360.45758056640625, |
|
"logits/rejected": 407.6477966308594, |
|
"logps/chosen": -0.41616684198379517, |
|
"logps/rejected": -4.148199558258057, |
|
"loss": 0.5815, |
|
"nll_loss": 0.6026356220245361, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.20808342099189758, |
|
"rewards/margins": 1.8660163879394531, |
|
"rewards/rejected": -2.0740997791290283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4107142857142856, |
|
"grad_norm": 12.312828063964844, |
|
"learning_rate": 4.969039949999534e-06, |
|
"log_odds_chosen": 3.7259132862091064, |
|
"log_odds_ratio": -0.05210161954164505, |
|
"logits/chosen": 441.5244140625, |
|
"logits/rejected": 379.39215087890625, |
|
"logps/chosen": -0.31666994094848633, |
|
"logps/rejected": -2.7463951110839844, |
|
"loss": 0.6506, |
|
"nll_loss": 0.5460222959518433, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15833497047424316, |
|
"rewards/margins": 1.214862585067749, |
|
"rewards/rejected": -1.3731975555419922, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.4404761904761907, |
|
"grad_norm": 10.084498405456543, |
|
"learning_rate": 4.938647983247949e-06, |
|
"log_odds_chosen": 4.13731575012207, |
|
"log_odds_ratio": -0.07053720951080322, |
|
"logits/chosen": 375.71875, |
|
"logits/rejected": 404.51898193359375, |
|
"logps/chosen": -0.4222637116909027, |
|
"logps/rejected": -3.3887131214141846, |
|
"loss": 0.5941, |
|
"nll_loss": 0.5796958804130554, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21113185584545135, |
|
"rewards/margins": 1.483224630355835, |
|
"rewards/rejected": -1.6943565607070923, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.4702380952380953, |
|
"grad_norm": 10.309317588806152, |
|
"learning_rate": 4.9088069367381605e-06, |
|
"log_odds_chosen": 4.0256781578063965, |
|
"log_odds_ratio": -0.02946905419230461, |
|
"logits/chosen": 410.3990783691406, |
|
"logits/rejected": 388.10638427734375, |
|
"logps/chosen": -0.34235674142837524, |
|
"logps/rejected": -2.9506092071533203, |
|
"loss": 0.6024, |
|
"nll_loss": 0.5277774333953857, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17117837071418762, |
|
"rewards/margins": 1.3041261434555054, |
|
"rewards/rejected": -1.4753046035766602, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 17.37739372253418, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 4.524823188781738, |
|
"log_odds_ratio": -0.054420001804828644, |
|
"logits/chosen": 391.82403564453125, |
|
"logits/rejected": 331.8238220214844, |
|
"logps/chosen": -0.296555757522583, |
|
"logps/rejected": -3.383528470993042, |
|
"loss": 0.5781, |
|
"nll_loss": 0.6599777340888977, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1482778787612915, |
|
"rewards/margins": 1.5434863567352295, |
|
"rewards/rejected": -1.691764235496521, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.5297619047619047, |
|
"grad_norm": 14.520256042480469, |
|
"learning_rate": 4.850712500726659e-06, |
|
"log_odds_chosen": 4.5146284103393555, |
|
"log_odds_ratio": -0.020355457440018654, |
|
"logits/chosen": 364.44293212890625, |
|
"logits/rejected": 381.6509094238281, |
|
"logps/chosen": -0.4376618266105652, |
|
"logps/rejected": -3.6549148559570312, |
|
"loss": 0.6064, |
|
"nll_loss": 0.7541596293449402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2188309133052826, |
|
"rewards/margins": 1.6086266040802002, |
|
"rewards/rejected": -1.8274574279785156, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.5595238095238093, |
|
"grad_norm": 18.008617401123047, |
|
"learning_rate": 4.822428221704122e-06, |
|
"log_odds_chosen": 4.2900166511535645, |
|
"log_odds_ratio": -0.05251041799783707, |
|
"logits/chosen": 418.76812744140625, |
|
"logits/rejected": 422.1298828125, |
|
"logps/chosen": -0.3535473942756653, |
|
"logps/rejected": -3.3458237648010254, |
|
"loss": 0.5961, |
|
"nll_loss": 0.49905306100845337, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17677369713783264, |
|
"rewards/margins": 1.496138334274292, |
|
"rewards/rejected": -1.6729118824005127, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.5892857142857144, |
|
"grad_norm": 14.501811027526855, |
|
"learning_rate": 4.794633014853843e-06, |
|
"log_odds_chosen": 3.7202048301696777, |
|
"log_odds_ratio": -0.08040798455476761, |
|
"logits/chosen": 378.04962158203125, |
|
"logits/rejected": 374.47149658203125, |
|
"logps/chosen": -0.4141341745853424, |
|
"logps/rejected": -3.029280424118042, |
|
"loss": 0.6443, |
|
"nll_loss": 0.5974027514457703, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2070670872926712, |
|
"rewards/margins": 1.3075730800628662, |
|
"rewards/rejected": -1.514640212059021, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.619047619047619, |
|
"grad_norm": 13.21033000946045, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 4.036019325256348, |
|
"log_odds_ratio": -0.0349675677716732, |
|
"logits/chosen": 374.5437927246094, |
|
"logits/rejected": 379.2315368652344, |
|
"logps/chosen": -0.357850581407547, |
|
"logps/rejected": -3.0918896198272705, |
|
"loss": 0.6481, |
|
"nll_loss": 0.5479155778884888, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1789252907037735, |
|
"rewards/margins": 1.3670194149017334, |
|
"rewards/rejected": -1.5459448099136353, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6488095238095237, |
|
"grad_norm": 10.330572128295898, |
|
"learning_rate": 4.740454631399773e-06, |
|
"log_odds_chosen": 4.903168678283691, |
|
"log_odds_ratio": -0.02322390116751194, |
|
"logits/chosen": 356.780029296875, |
|
"logits/rejected": 385.78631591796875, |
|
"logps/chosen": -0.3364141881465912, |
|
"logps/rejected": -3.9139695167541504, |
|
"loss": 0.5982, |
|
"nll_loss": 0.5599361658096313, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1682070940732956, |
|
"rewards/margins": 1.7887779474258423, |
|
"rewards/rejected": -1.9569847583770752, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 11.032569885253906, |
|
"learning_rate": 4.714045207910318e-06, |
|
"log_odds_chosen": 4.497644901275635, |
|
"log_odds_ratio": -0.030209308490157127, |
|
"logits/chosen": 394.76336669921875, |
|
"logits/rejected": 376.0134582519531, |
|
"logps/chosen": -0.41811317205429077, |
|
"logps/rejected": -3.552691698074341, |
|
"loss": 0.6268, |
|
"nll_loss": 0.5461404919624329, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20905658602714539, |
|
"rewards/margins": 1.567289113998413, |
|
"rewards/rejected": -1.7763458490371704, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 17.51761245727539, |
|
"learning_rate": 4.688072309384955e-06, |
|
"log_odds_chosen": 4.075136661529541, |
|
"log_odds_ratio": -0.025673285126686096, |
|
"logits/chosen": 384.58154296875, |
|
"logits/rejected": 360.6435546875, |
|
"logps/chosen": -0.29184406995773315, |
|
"logps/rejected": -2.7901484966278076, |
|
"loss": 0.6721, |
|
"nll_loss": 0.5232545733451843, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14592203497886658, |
|
"rewards/margins": 1.249152421951294, |
|
"rewards/rejected": -1.3950742483139038, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.738095238095238, |
|
"grad_norm": 13.356114387512207, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 3.7381584644317627, |
|
"log_odds_ratio": -0.041400760412216187, |
|
"logits/chosen": 424.15606689453125, |
|
"logits/rejected": 436.59417724609375, |
|
"logps/chosen": -0.3177054226398468, |
|
"logps/rejected": -2.7719643115997314, |
|
"loss": 0.6171, |
|
"nll_loss": 0.5559448599815369, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1588527113199234, |
|
"rewards/margins": 1.2271292209625244, |
|
"rewards/rejected": -1.3859821557998657, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.767857142857143, |
|
"grad_norm": 16.436243057250977, |
|
"learning_rate": 4.6373889576016826e-06, |
|
"log_odds_chosen": 3.9720139503479004, |
|
"log_odds_ratio": -0.04150586202740669, |
|
"logits/chosen": 419.3877868652344, |
|
"logits/rejected": 399.4032287597656, |
|
"logps/chosen": -0.3887273669242859, |
|
"logps/rejected": -3.1958889961242676, |
|
"loss": 0.6517, |
|
"nll_loss": 0.7023509740829468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19436368346214294, |
|
"rewards/margins": 1.403580665588379, |
|
"rewards/rejected": -1.5979444980621338, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.7976190476190474, |
|
"grad_norm": 13.92647647857666, |
|
"learning_rate": 4.6126560401444256e-06, |
|
"log_odds_chosen": 3.6036312580108643, |
|
"log_odds_ratio": -0.0841737613081932, |
|
"logits/chosen": 376.7361145019531, |
|
"logits/rejected": 346.24578857421875, |
|
"logps/chosen": -0.38905754685401917, |
|
"logps/rejected": -2.8610401153564453, |
|
"loss": 0.5864, |
|
"nll_loss": 0.5663691163063049, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.19452877342700958, |
|
"rewards/margins": 1.235991358757019, |
|
"rewards/rejected": -1.4305200576782227, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.8273809523809526, |
|
"grad_norm": 22.44719123840332, |
|
"learning_rate": 4.588314677411235e-06, |
|
"log_odds_chosen": 4.282876014709473, |
|
"log_odds_ratio": -0.02305762842297554, |
|
"logits/chosen": 351.80535888671875, |
|
"logits/rejected": 435.5419006347656, |
|
"logps/chosen": -0.376276433467865, |
|
"logps/rejected": -3.4008407592773438, |
|
"loss": 0.6159, |
|
"nll_loss": 0.5037115216255188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1881382167339325, |
|
"rewards/margins": 1.512282133102417, |
|
"rewards/rejected": -1.7004203796386719, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 11.814839363098145, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 5.438357353210449, |
|
"log_odds_ratio": -0.03407539427280426, |
|
"logits/chosen": 393.2117614746094, |
|
"logits/rejected": 400.9731140136719, |
|
"logps/chosen": -0.38579824566841125, |
|
"logps/rejected": -4.569952964782715, |
|
"loss": 0.5976, |
|
"nll_loss": 0.6172084212303162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19289912283420563, |
|
"rewards/margins": 2.0920770168304443, |
|
"rewards/rejected": -2.2849764823913574, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.886904761904762, |
|
"grad_norm": 12.605895042419434, |
|
"learning_rate": 4.540766091864998e-06, |
|
"log_odds_chosen": 3.7765631675720215, |
|
"log_odds_ratio": -0.03285397216677666, |
|
"logits/chosen": 376.1605529785156, |
|
"logits/rejected": 400.92620849609375, |
|
"logps/chosen": -0.4142071604728699, |
|
"logps/rejected": -3.060633659362793, |
|
"loss": 0.5919, |
|
"nll_loss": 0.5258246660232544, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20710358023643494, |
|
"rewards/margins": 1.3232133388519287, |
|
"rewards/rejected": -1.5303168296813965, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 10.655911445617676, |
|
"learning_rate": 4.517539514526257e-06, |
|
"log_odds_chosen": 3.3025474548339844, |
|
"log_odds_ratio": -0.07940138876438141, |
|
"logits/chosen": 416.90509033203125, |
|
"logits/rejected": 319.48687744140625, |
|
"logps/chosen": -0.6181944012641907, |
|
"logps/rejected": -2.8924221992492676, |
|
"loss": 0.6671, |
|
"nll_loss": 0.5718962550163269, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.30909720063209534, |
|
"rewards/margins": 1.1371139287948608, |
|
"rewards/rejected": -1.4462110996246338, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.946428571428571, |
|
"grad_norm": 11.349733352661133, |
|
"learning_rate": 4.4946657497549474e-06, |
|
"log_odds_chosen": 4.556464672088623, |
|
"log_odds_ratio": -0.033807143568992615, |
|
"logits/chosen": 363.4445495605469, |
|
"logits/rejected": 406.9335021972656, |
|
"logps/chosen": -0.3999672532081604, |
|
"logps/rejected": -3.7019195556640625, |
|
"loss": 0.6043, |
|
"nll_loss": 0.5209922790527344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1999836266040802, |
|
"rewards/margins": 1.6509761810302734, |
|
"rewards/rejected": -1.8509597778320312, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.9761904761904763, |
|
"grad_norm": 10.738035202026367, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": 4.959429740905762, |
|
"log_odds_ratio": -0.03518088907003403, |
|
"logits/chosen": 417.14154052734375, |
|
"logits/rejected": 379.5878601074219, |
|
"logps/chosen": -0.2811127007007599, |
|
"logps/rejected": -3.7553272247314453, |
|
"loss": 0.6266, |
|
"nll_loss": 0.42872363328933716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14055635035037994, |
|
"rewards/margins": 1.7371070384979248, |
|
"rewards/rejected": -1.8776636123657227, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_log_odds_chosen": 0.44554343819618225, |
|
"eval_log_odds_ratio": -0.6395100355148315, |
|
"eval_logits/chosen": 319.3428649902344, |
|
"eval_logits/rejected": 262.5517883300781, |
|
"eval_logps/chosen": -1.3908888101577759, |
|
"eval_logps/rejected": -1.6692452430725098, |
|
"eval_loss": 2.1094672679901123, |
|
"eval_nll_loss": 1.7835990190505981, |
|
"eval_rewards/accuracies": 0.5571428537368774, |
|
"eval_rewards/chosen": -0.6954444050788879, |
|
"eval_rewards/margins": 0.13917820155620575, |
|
"eval_rewards/rejected": -0.8346226215362549, |
|
"eval_runtime": 201.4063, |
|
"eval_samples_per_second": 2.746, |
|
"eval_steps_per_second": 0.348, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 504, |
|
"total_flos": 0.0, |
|
"train_loss": 1.593297282854716, |
|
"train_runtime": 15168.4679, |
|
"train_samples_per_second": 1.061, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 504, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|