|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 42, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": 0.09027537703514099, |
|
"debug/policy_chosen_logps": -162.730224609375, |
|
"debug/policy_rejected_logits": 0.5158556699752808, |
|
"debug/policy_rejected_logps": -184.16571044921875, |
|
"debug/reference_chosen_logps": -162.730224609375, |
|
"debug/reference_rejected_logps": -184.16571044921875, |
|
"epoch": 0.023809523809523808, |
|
"grad_norm": 4.685966665550777, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.09027537703514099, |
|
"logits/rejected": 0.5158556699752808, |
|
"logps/chosen": -162.730224609375, |
|
"logps/rejected": -184.16571044921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7074397802352905, |
|
"debug/policy_chosen_logps": -150.46807861328125, |
|
"debug/policy_rejected_logits": 0.43174317479133606, |
|
"debug/policy_rejected_logps": -140.48440551757812, |
|
"debug/reference_chosen_logps": -153.92564392089844, |
|
"debug/reference_rejected_logps": -142.85406494140625, |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 15.044621835270732, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7074397802352905, |
|
"logits/rejected": 0.43174317479133606, |
|
"logps/chosen": -150.46807861328125, |
|
"logps/rejected": -140.48440551757812, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.034575510770082474, |
|
"rewards/margins": 0.010879031382501125, |
|
"rewards/rejected": 0.023696478456258774, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4280781149864197, |
|
"debug/policy_chosen_logps": -153.80137634277344, |
|
"debug/policy_rejected_logits": 1.081570029258728, |
|
"debug/policy_rejected_logps": -173.27056884765625, |
|
"debug/reference_chosen_logps": -151.54473876953125, |
|
"debug/reference_rejected_logps": -169.95703125, |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 4.933146485934281, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4280781149864197, |
|
"logits/rejected": 1.081570029258728, |
|
"logps/chosen": -153.80137634277344, |
|
"logps/rejected": -173.27056884765625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.022566460072994232, |
|
"rewards/margins": 0.010568867437541485, |
|
"rewards/rejected": -0.03313532844185829, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.49108535051345825, |
|
"debug/policy_chosen_logps": -155.66534423828125, |
|
"debug/policy_rejected_logits": 0.4322296977043152, |
|
"debug/policy_rejected_logps": -149.7783203125, |
|
"debug/reference_chosen_logps": -153.9282989501953, |
|
"debug/reference_rejected_logps": -148.75108337402344, |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 6.198349828531256, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.49108535051345825, |
|
"logits/rejected": 0.4322296977043152, |
|
"logps/chosen": -155.66534423828125, |
|
"logps/rejected": -149.7783203125, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0173704344779253, |
|
"rewards/margins": -0.0070981215685606, |
|
"rewards/rejected": -0.010272311978042126, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0142210721969604, |
|
"debug/policy_chosen_logps": -158.026611328125, |
|
"debug/policy_rejected_logits": 1.0418132543563843, |
|
"debug/policy_rejected_logps": -199.23785400390625, |
|
"debug/reference_chosen_logps": -155.4060516357422, |
|
"debug/reference_rejected_logps": -194.74618530273438, |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 4.004423522491588, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0142210721969604, |
|
"logits/rejected": 1.0418132543563843, |
|
"logps/chosen": -158.026611328125, |
|
"logps/rejected": -199.23785400390625, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.026205480098724365, |
|
"rewards/margins": 0.018711339682340622, |
|
"rewards/rejected": -0.04491681978106499, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.803213894367218, |
|
"debug/policy_chosen_logps": -156.5592041015625, |
|
"debug/policy_rejected_logits": 0.9607799649238586, |
|
"debug/policy_rejected_logps": -173.58987426757812, |
|
"debug/reference_chosen_logps": -146.985595703125, |
|
"debug/reference_rejected_logps": -163.15786743164062, |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 10.059361542630269, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.803213894367218, |
|
"logits/rejected": 0.9607799649238586, |
|
"logps/chosen": -156.5592041015625, |
|
"logps/rejected": -173.58987426757812, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09573620557785034, |
|
"rewards/margins": 0.008583765476942062, |
|
"rewards/rejected": -0.1043199747800827, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5479093194007874, |
|
"debug/policy_chosen_logps": -176.60946655273438, |
|
"debug/policy_rejected_logits": 0.11254727840423584, |
|
"debug/policy_rejected_logps": -175.84767150878906, |
|
"debug/reference_chosen_logps": -174.020751953125, |
|
"debug/reference_rejected_logps": -170.24949645996094, |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 20.07889018294013, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5479093194007874, |
|
"logits/rejected": 0.11254727840423584, |
|
"logps/chosen": -176.60946655273438, |
|
"logps/rejected": -175.84767150878906, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.025887146592140198, |
|
"rewards/margins": 0.030094660818576813, |
|
"rewards/rejected": -0.05598180741071701, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5916139483451843, |
|
"debug/policy_chosen_logps": -164.50384521484375, |
|
"debug/policy_rejected_logits": 0.5801162123680115, |
|
"debug/policy_rejected_logps": -156.9475860595703, |
|
"debug/reference_chosen_logps": -153.96173095703125, |
|
"debug/reference_rejected_logps": -147.9217071533203, |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 4.5921375475915776, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5916139483451843, |
|
"logits/rejected": 0.5801162123680115, |
|
"logps/chosen": -164.50384521484375, |
|
"logps/rejected": -156.9475860595703, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.10542111098766327, |
|
"rewards/margins": -0.015162268653512001, |
|
"rewards/rejected": -0.09025884419679642, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2481817752122879, |
|
"debug/policy_chosen_logps": -169.9642333984375, |
|
"debug/policy_rejected_logits": 0.48347601294517517, |
|
"debug/policy_rejected_logps": -190.4657440185547, |
|
"debug/reference_chosen_logps": -162.74264526367188, |
|
"debug/reference_rejected_logps": -181.60940551757812, |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 5.023010276429253, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2481817752122879, |
|
"logits/rejected": 0.48347601294517517, |
|
"logps/chosen": -169.9642333984375, |
|
"logps/rejected": -190.4657440185547, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07221580296754837, |
|
"rewards/margins": 0.01634763740003109, |
|
"rewards/rejected": -0.08856344223022461, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2575492560863495, |
|
"debug/policy_chosen_logps": -184.9705810546875, |
|
"debug/policy_rejected_logits": 0.5005592703819275, |
|
"debug/policy_rejected_logps": -172.09518432617188, |
|
"debug/reference_chosen_logps": -172.8156280517578, |
|
"debug/reference_rejected_logps": -160.72515869140625, |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 8.326836844842857, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2575492560863495, |
|
"logits/rejected": 0.5005592703819275, |
|
"logps/chosen": -184.9705810546875, |
|
"logps/rejected": -172.09518432617188, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.12154942750930786, |
|
"rewards/margins": -0.00784902460873127, |
|
"rewards/rejected": -0.11370040476322174, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.02329457737505436, |
|
"debug/policy_chosen_logps": -151.5535888671875, |
|
"debug/policy_rejected_logits": 0.5039985179901123, |
|
"debug/policy_rejected_logps": -164.93890380859375, |
|
"debug/reference_chosen_logps": -145.47381591796875, |
|
"debug/reference_rejected_logps": -155.04107666015625, |
|
"epoch": 0.2619047619047619, |
|
"grad_norm": 8.956479762651878, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.02329457737505436, |
|
"logits/rejected": 0.5039985179901123, |
|
"logps/chosen": -151.5535888671875, |
|
"logps/rejected": -164.93890380859375, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06079769879579544, |
|
"rewards/margins": 0.038180749863386154, |
|
"rewards/rejected": -0.0989784449338913, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.47942259907722473, |
|
"debug/policy_chosen_logps": -159.60877990722656, |
|
"debug/policy_rejected_logits": 0.5704939365386963, |
|
"debug/policy_rejected_logps": -154.61744689941406, |
|
"debug/reference_chosen_logps": -147.24301147460938, |
|
"debug/reference_rejected_logps": -141.2715301513672, |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 11.992421788281984, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.47942259907722473, |
|
"logits/rejected": 0.5704939365386963, |
|
"logps/chosen": -159.60877990722656, |
|
"logps/rejected": -154.61744689941406, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12365761399269104, |
|
"rewards/margins": 0.009801514446735382, |
|
"rewards/rejected": -0.13345913589000702, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.20997528731822968, |
|
"debug/policy_chosen_logps": -142.75146484375, |
|
"debug/policy_rejected_logits": 0.3726802468299866, |
|
"debug/policy_rejected_logps": -175.70962524414062, |
|
"debug/reference_chosen_logps": -134.545166015625, |
|
"debug/reference_rejected_logps": -164.0076141357422, |
|
"epoch": 0.30952380952380953, |
|
"grad_norm": 5.358671233182435, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.20997528731822968, |
|
"logits/rejected": 0.3726802468299866, |
|
"logps/chosen": -142.75146484375, |
|
"logps/rejected": -175.70962524414062, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08206304907798767, |
|
"rewards/margins": 0.0349571518599987, |
|
"rewards/rejected": -0.11702020466327667, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.20442210137844086, |
|
"debug/policy_chosen_logps": -177.51991271972656, |
|
"debug/policy_rejected_logits": 0.34923601150512695, |
|
"debug/policy_rejected_logps": -161.62881469726562, |
|
"debug/reference_chosen_logps": -164.69485473632812, |
|
"debug/reference_rejected_logps": -150.70733642578125, |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 6.471200581198782, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.20442210137844086, |
|
"logits/rejected": 0.34923601150512695, |
|
"logps/chosen": -177.51991271972656, |
|
"logps/rejected": -161.62881469726562, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.12825068831443787, |
|
"rewards/margins": -0.019035786390304565, |
|
"rewards/rejected": -0.1092148944735527, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.20042140781879425, |
|
"debug/policy_chosen_logps": -198.75933837890625, |
|
"debug/policy_rejected_logits": 0.24180738627910614, |
|
"debug/policy_rejected_logps": -177.14825439453125, |
|
"debug/reference_chosen_logps": -178.46697998046875, |
|
"debug/reference_rejected_logps": -158.2596435546875, |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 15.202276429910315, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.20042140781879425, |
|
"logits/rejected": 0.24180738627910614, |
|
"logps/chosen": -198.75933837890625, |
|
"logps/rejected": -177.14825439453125, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.2029237002134323, |
|
"rewards/margins": -0.014037556946277618, |
|
"rewards/rejected": -0.1888861358165741, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.3575197160243988, |
|
"debug/policy_chosen_logps": -159.78720092773438, |
|
"debug/policy_rejected_logits": 0.6836833953857422, |
|
"debug/policy_rejected_logps": -167.8487548828125, |
|
"debug/reference_chosen_logps": -148.35433959960938, |
|
"debug/reference_rejected_logps": -153.20465087890625, |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 5.011772584899409, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.3575197160243988, |
|
"logits/rejected": 0.6836833953857422, |
|
"logps/chosen": -159.78720092773438, |
|
"logps/rejected": -167.8487548828125, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11432872712612152, |
|
"rewards/margins": 0.0321124903857708, |
|
"rewards/rejected": -0.14644122123718262, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.3418176472187042, |
|
"debug/policy_chosen_logps": -153.46934509277344, |
|
"debug/policy_rejected_logits": 0.33436495065689087, |
|
"debug/policy_rejected_logps": -164.7410888671875, |
|
"debug/reference_chosen_logps": -145.3973388671875, |
|
"debug/reference_rejected_logps": -149.6763458251953, |
|
"epoch": 0.40476190476190477, |
|
"grad_norm": 20.39016346970483, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.3418176472187042, |
|
"logits/rejected": 0.33436495065689087, |
|
"logps/chosen": -153.46934509277344, |
|
"logps/rejected": -164.7410888671875, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08072000741958618, |
|
"rewards/margins": 0.06992734968662262, |
|
"rewards/rejected": -0.1506473571062088, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.21861077845096588, |
|
"debug/policy_chosen_logps": -184.25680541992188, |
|
"debug/policy_rejected_logits": 0.3139030635356903, |
|
"debug/policy_rejected_logps": -186.92042541503906, |
|
"debug/reference_chosen_logps": -167.13250732421875, |
|
"debug/reference_rejected_logps": -167.22145080566406, |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 9.604003163834712, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.21861077845096588, |
|
"logits/rejected": 0.3139030635356903, |
|
"logps/chosen": -184.25680541992188, |
|
"logps/rejected": -186.92042541503906, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.17124298214912415, |
|
"rewards/margins": 0.025746773928403854, |
|
"rewards/rejected": -0.1969897449016571, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4868623614311218, |
|
"debug/policy_chosen_logps": -164.40745544433594, |
|
"debug/policy_rejected_logits": 0.407953143119812, |
|
"debug/policy_rejected_logps": -192.24801635742188, |
|
"debug/reference_chosen_logps": -149.22422790527344, |
|
"debug/reference_rejected_logps": -176.274658203125, |
|
"epoch": 0.4523809523809524, |
|
"grad_norm": 5.467573091749328, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4868623614311218, |
|
"logits/rejected": 0.407953143119812, |
|
"logps/chosen": -164.40745544433594, |
|
"logps/rejected": -192.24801635742188, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1518322378396988, |
|
"rewards/margins": 0.007901255041360855, |
|
"rewards/rejected": -0.15973350405693054, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.059103213250637054, |
|
"debug/policy_chosen_logps": -174.8370361328125, |
|
"debug/policy_rejected_logits": 0.5786897540092468, |
|
"debug/policy_rejected_logps": -199.56698608398438, |
|
"debug/reference_chosen_logps": -161.7840118408203, |
|
"debug/reference_rejected_logps": -185.17050170898438, |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 5.234500875119642, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.059103213250637054, |
|
"logits/rejected": 0.5786897540092468, |
|
"logps/chosen": -174.8370361328125, |
|
"logps/rejected": -199.56698608398438, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13053017854690552, |
|
"rewards/margins": 0.013434587977826595, |
|
"rewards/rejected": -0.1439647674560547, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5743213891983032, |
|
"debug/policy_chosen_logps": -178.57594299316406, |
|
"debug/policy_rejected_logits": 0.20286375284194946, |
|
"debug/policy_rejected_logps": -179.84762573242188, |
|
"debug/reference_chosen_logps": -158.3350067138672, |
|
"debug/reference_rejected_logps": -160.45053100585938, |
|
"epoch": 0.5, |
|
"grad_norm": 6.102878645835385, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5743213891983032, |
|
"logits/rejected": 0.20286375284194946, |
|
"logps/chosen": -178.57594299316406, |
|
"logps/rejected": -179.84762573242188, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20240947604179382, |
|
"rewards/margins": -0.008438415825366974, |
|
"rewards/rejected": -0.19397103786468506, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.3122093677520752, |
|
"debug/policy_chosen_logps": -164.51431274414062, |
|
"debug/policy_rejected_logits": 0.3530707061290741, |
|
"debug/policy_rejected_logps": -176.93601989746094, |
|
"debug/reference_chosen_logps": -147.92042541503906, |
|
"debug/reference_rejected_logps": -156.74851989746094, |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 6.315550057559611, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.3122093677520752, |
|
"logits/rejected": 0.3530707061290741, |
|
"logps/chosen": -164.51431274414062, |
|
"logps/rejected": -176.93601989746094, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16593879461288452, |
|
"rewards/margins": 0.03593616560101509, |
|
"rewards/rejected": -0.20187495648860931, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.044268831610679626, |
|
"debug/policy_chosen_logps": -157.81039428710938, |
|
"debug/policy_rejected_logits": 0.3559872508049011, |
|
"debug/policy_rejected_logps": -147.35525512695312, |
|
"debug/reference_chosen_logps": -150.7872314453125, |
|
"debug/reference_rejected_logps": -138.82229614257812, |
|
"epoch": 0.5476190476190477, |
|
"grad_norm": 23.620495593781406, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.044268831610679626, |
|
"logits/rejected": 0.3559872508049011, |
|
"logps/chosen": -157.81039428710938, |
|
"logps/rejected": -147.35525512695312, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07023164629936218, |
|
"rewards/margins": 0.015097856521606445, |
|
"rewards/rejected": -0.08532950282096863, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.018966468051075935, |
|
"debug/policy_chosen_logps": -159.8534698486328, |
|
"debug/policy_rejected_logits": 0.1428254395723343, |
|
"debug/policy_rejected_logps": -164.7286834716797, |
|
"debug/reference_chosen_logps": -147.0450439453125, |
|
"debug/reference_rejected_logps": -149.85382080078125, |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 4.677349017880479, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.018966468051075935, |
|
"logits/rejected": 0.1428254395723343, |
|
"logps/chosen": -159.8534698486328, |
|
"logps/rejected": -164.7286834716797, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1280841827392578, |
|
"rewards/margins": 0.02066453918814659, |
|
"rewards/rejected": -0.1487487256526947, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2853807806968689, |
|
"debug/policy_chosen_logps": -176.42611694335938, |
|
"debug/policy_rejected_logits": 0.13074414432048798, |
|
"debug/policy_rejected_logps": -155.15415954589844, |
|
"debug/reference_chosen_logps": -158.4669647216797, |
|
"debug/reference_rejected_logps": -136.29364013671875, |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 11.079802876278416, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2853807806968689, |
|
"logits/rejected": 0.13074414432048798, |
|
"logps/chosen": -176.42611694335938, |
|
"logps/rejected": -155.15415954589844, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.179591566324234, |
|
"rewards/margins": 0.00901371892541647, |
|
"rewards/rejected": -0.18860529363155365, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.40262606739997864, |
|
"debug/policy_chosen_logps": -153.15701293945312, |
|
"debug/policy_rejected_logits": 0.7936873435974121, |
|
"debug/policy_rejected_logps": -169.62689208984375, |
|
"debug/reference_chosen_logps": -140.70889282226562, |
|
"debug/reference_rejected_logps": -152.8561248779297, |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 5.32694876840012, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.40262606739997864, |
|
"logits/rejected": 0.7936873435974121, |
|
"logps/chosen": -153.15701293945312, |
|
"logps/rejected": -169.62689208984375, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12448111921548843, |
|
"rewards/margins": 0.04322664067149162, |
|
"rewards/rejected": -0.16770777106285095, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4326091408729553, |
|
"debug/policy_chosen_logps": -157.39126586914062, |
|
"debug/policy_rejected_logits": 0.607225239276886, |
|
"debug/policy_rejected_logps": -188.34918212890625, |
|
"debug/reference_chosen_logps": -145.33380126953125, |
|
"debug/reference_rejected_logps": -162.48890686035156, |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 4.700163204340666, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4326091408729553, |
|
"logits/rejected": 0.607225239276886, |
|
"logps/chosen": -157.39126586914062, |
|
"logps/rejected": -188.34918212890625, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12057456374168396, |
|
"rewards/margins": 0.13802826404571533, |
|
"rewards/rejected": -0.2586028277873993, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.031958021223545074, |
|
"debug/policy_chosen_logps": -159.67169189453125, |
|
"debug/policy_rejected_logits": 0.1899116486310959, |
|
"debug/policy_rejected_logps": -187.28189086914062, |
|
"debug/reference_chosen_logps": -141.6376953125, |
|
"debug/reference_rejected_logps": -162.19659423828125, |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 6.569512152499291, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.031958021223545074, |
|
"logits/rejected": 0.1899116486310959, |
|
"logps/chosen": -159.67169189453125, |
|
"logps/rejected": -187.28189086914062, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.180339977145195, |
|
"rewards/margins": 0.07051312178373337, |
|
"rewards/rejected": -0.2508530914783478, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.10809233784675598, |
|
"debug/policy_chosen_logps": -176.20724487304688, |
|
"debug/policy_rejected_logits": 0.41801854968070984, |
|
"debug/policy_rejected_logps": -197.15541076660156, |
|
"debug/reference_chosen_logps": -158.1036834716797, |
|
"debug/reference_rejected_logps": -176.26634216308594, |
|
"epoch": 0.6904761904761905, |
|
"grad_norm": 6.541965417685165, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.10809233784675598, |
|
"logits/rejected": 0.41801854968070984, |
|
"logps/chosen": -176.20724487304688, |
|
"logps/rejected": -197.15541076660156, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.181035578250885, |
|
"rewards/margins": 0.02785516157746315, |
|
"rewards/rejected": -0.20889073610305786, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.15116974711418152, |
|
"debug/policy_chosen_logps": -164.75247192382812, |
|
"debug/policy_rejected_logits": 0.12009341269731522, |
|
"debug/policy_rejected_logps": -175.12867736816406, |
|
"debug/reference_chosen_logps": -146.2644805908203, |
|
"debug/reference_rejected_logps": -149.75460815429688, |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 4.855645858354973, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.15116974711418152, |
|
"logits/rejected": 0.12009341269731522, |
|
"logps/chosen": -164.75247192382812, |
|
"logps/rejected": -175.12867736816406, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.18488001823425293, |
|
"rewards/margins": 0.06886060535907745, |
|
"rewards/rejected": -0.2537406086921692, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.17149780690670013, |
|
"debug/policy_chosen_logps": -188.273193359375, |
|
"debug/policy_rejected_logits": 0.3072517216205597, |
|
"debug/policy_rejected_logps": -187.52651977539062, |
|
"debug/reference_chosen_logps": -165.44805908203125, |
|
"debug/reference_rejected_logps": -158.7125244140625, |
|
"epoch": 0.7380952380952381, |
|
"grad_norm": 6.097067478609718, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.17149780690670013, |
|
"logits/rejected": 0.3072517216205597, |
|
"logps/chosen": -188.273193359375, |
|
"logps/rejected": -187.52651977539062, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22825142741203308, |
|
"rewards/margins": 0.059888482093811035, |
|
"rewards/rejected": -0.2881399095058441, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.46266281604766846, |
|
"debug/policy_chosen_logps": -169.596435546875, |
|
"debug/policy_rejected_logits": 0.4477306604385376, |
|
"debug/policy_rejected_logps": -173.21981811523438, |
|
"debug/reference_chosen_logps": -152.52273559570312, |
|
"debug/reference_rejected_logps": -151.03050231933594, |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 10.520633954833, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.46266281604766846, |
|
"logits/rejected": 0.4477306604385376, |
|
"logps/chosen": -169.596435546875, |
|
"logps/rejected": -173.21981811523438, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17073702812194824, |
|
"rewards/margins": 0.05115606635808945, |
|
"rewards/rejected": -0.2218931019306183, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7084560394287109, |
|
"debug/policy_chosen_logps": -161.38735961914062, |
|
"debug/policy_rejected_logits": 0.6357196569442749, |
|
"debug/policy_rejected_logps": -164.42718505859375, |
|
"debug/reference_chosen_logps": -154.86412048339844, |
|
"debug/reference_rejected_logps": -157.79238891601562, |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 15.939572697711974, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7084560394287109, |
|
"logits/rejected": 0.6357196569442749, |
|
"logps/chosen": -161.38735961914062, |
|
"logps/rejected": -164.42718505859375, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06523235142230988, |
|
"rewards/margins": 0.001115655992180109, |
|
"rewards/rejected": -0.06634800881147385, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.12151144444942474, |
|
"debug/policy_chosen_logps": -161.50515747070312, |
|
"debug/policy_rejected_logits": 1.0420786142349243, |
|
"debug/policy_rejected_logps": -164.21615600585938, |
|
"debug/reference_chosen_logps": -150.59689331054688, |
|
"debug/reference_rejected_logps": -152.6244354248047, |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 9.868780868712001, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.12151144444942474, |
|
"logits/rejected": 1.0420786142349243, |
|
"logps/chosen": -161.50515747070312, |
|
"logps/rejected": -164.21615600585938, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.10908253490924835, |
|
"rewards/margins": 0.006834707222878933, |
|
"rewards/rejected": -0.11591724306344986, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.027947237715125084, |
|
"debug/policy_chosen_logps": -157.77456665039062, |
|
"debug/policy_rejected_logits": 0.31475889682769775, |
|
"debug/policy_rejected_logps": -159.06002807617188, |
|
"debug/reference_chosen_logps": -136.69996643066406, |
|
"debug/reference_rejected_logps": -138.58349609375, |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 8.177811902764494, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.027947237715125084, |
|
"logits/rejected": 0.31475889682769775, |
|
"logps/chosen": -157.77456665039062, |
|
"logps/rejected": -159.06002807617188, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.21074604988098145, |
|
"rewards/margins": -0.005980661138892174, |
|
"rewards/rejected": -0.20476537942886353, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2593013346195221, |
|
"debug/policy_chosen_logps": -165.31509399414062, |
|
"debug/policy_rejected_logits": -0.04938528686761856, |
|
"debug/policy_rejected_logps": -168.42660522460938, |
|
"debug/reference_chosen_logps": -145.93374633789062, |
|
"debug/reference_rejected_logps": -145.29168701171875, |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 12.91232630873052, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2593013346195221, |
|
"logits/rejected": -0.04938528686761856, |
|
"logps/chosen": -165.31509399414062, |
|
"logps/rejected": -168.42660522460938, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.19381342828273773, |
|
"rewards/margins": 0.03753571957349777, |
|
"rewards/rejected": -0.2313491404056549, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0012390613555908, |
|
"debug/policy_chosen_logps": -165.09347534179688, |
|
"debug/policy_rejected_logits": 1.0178093910217285, |
|
"debug/policy_rejected_logps": -171.16152954101562, |
|
"debug/reference_chosen_logps": -150.7286834716797, |
|
"debug/reference_rejected_logps": -150.47354125976562, |
|
"epoch": 0.8809523809523809, |
|
"grad_norm": 4.747576829430422, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0012390613555908, |
|
"logits/rejected": 1.0178093910217285, |
|
"logps/chosen": -165.09347534179688, |
|
"logps/rejected": -171.16152954101562, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1436479538679123, |
|
"rewards/margins": 0.06323190778493881, |
|
"rewards/rejected": -0.2068798542022705, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.11002390831708908, |
|
"debug/policy_chosen_logps": -173.82029724121094, |
|
"debug/policy_rejected_logits": 0.446510910987854, |
|
"debug/policy_rejected_logps": -212.50643920898438, |
|
"debug/reference_chosen_logps": -150.2817840576172, |
|
"debug/reference_rejected_logps": -185.846923828125, |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 5.476549746954031, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.11002390831708908, |
|
"logits/rejected": 0.446510910987854, |
|
"logps/chosen": -173.82029724121094, |
|
"logps/rejected": -212.50643920898438, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.23538516461849213, |
|
"rewards/margins": 0.031209895387291908, |
|
"rewards/rejected": -0.2665950655937195, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.20052273571491241, |
|
"debug/policy_chosen_logps": -163.867431640625, |
|
"debug/policy_rejected_logits": 0.5886087417602539, |
|
"debug/policy_rejected_logps": -185.58941650390625, |
|
"debug/reference_chosen_logps": -141.18801879882812, |
|
"debug/reference_rejected_logps": -159.76058959960938, |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 12.5233373844755, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.20052273571491241, |
|
"logits/rejected": 0.5886087417602539, |
|
"logps/chosen": -163.867431640625, |
|
"logps/rejected": -185.58941650390625, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.22679391503334045, |
|
"rewards/margins": 0.0314943790435791, |
|
"rewards/rejected": -0.25828829407691956, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.25073912739753723, |
|
"debug/policy_chosen_logps": -157.51876831054688, |
|
"debug/policy_rejected_logits": 0.30381596088409424, |
|
"debug/policy_rejected_logps": -176.589599609375, |
|
"debug/reference_chosen_logps": -138.98110961914062, |
|
"debug/reference_rejected_logps": -154.03880310058594, |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 5.529655141820795, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.25073912739753723, |
|
"logits/rejected": 0.30381596088409424, |
|
"logps/chosen": -157.51876831054688, |
|
"logps/rejected": -176.589599609375, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1853766143321991, |
|
"rewards/margins": 0.040131378918886185, |
|
"rewards/rejected": -0.22550798952579498, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.23757268488407135, |
|
"debug/policy_chosen_logps": -167.54112243652344, |
|
"debug/policy_rejected_logits": 0.40399065613746643, |
|
"debug/policy_rejected_logps": -191.4041748046875, |
|
"debug/reference_chosen_logps": -145.42752075195312, |
|
"debug/reference_rejected_logps": -164.51507568359375, |
|
"epoch": 0.9761904761904762, |
|
"grad_norm": 5.959237498635192, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.23757268488407135, |
|
"logits/rejected": 0.40399065613746643, |
|
"logps/chosen": -167.54112243652344, |
|
"logps/rejected": -191.4041748046875, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22113589942455292, |
|
"rewards/margins": 0.047754913568496704, |
|
"rewards/rejected": -0.2688907980918884, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.029359659180045128, |
|
"debug/policy_chosen_logps": -178.32794189453125, |
|
"debug/policy_rejected_logits": 0.20693586766719818, |
|
"debug/policy_rejected_logps": -185.86007690429688, |
|
"debug/reference_chosen_logps": -151.25294494628906, |
|
"debug/reference_rejected_logps": -157.61886596679688, |
|
"epoch": 1.0, |
|
"grad_norm": 15.740573011032756, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.029359659180045128, |
|
"logits/rejected": 0.20693586766719818, |
|
"logps/chosen": -178.32794189453125, |
|
"logps/rejected": -185.86007690429688, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27075010538101196, |
|
"rewards/margins": 0.01166202500462532, |
|
"rewards/rejected": -0.2824121117591858, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 42, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6828021520660037, |
|
"train_runtime": 390.3052, |
|
"train_samples_per_second": 6.784, |
|
"train_steps_per_second": 0.108 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 42, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|