|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0010206338135982445, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.4021127119941484e-05, |
|
"grad_norm": 33.091712951660156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -3.133824586868286, |
|
"logits/rejected": -3.1356313228607178, |
|
"logps/chosen": -317.8847961425781, |
|
"logps/rejected": -306.8866271972656, |
|
"loss": 0.8544, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10576057434082031, |
|
"rewards/margins": 0.2207910716533661, |
|
"rewards/rejected": -0.3265516459941864, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 6.804225423988297e-05, |
|
"grad_norm": 38.49541473388672, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -3.1433358192443848, |
|
"logits/rejected": -3.1124918460845947, |
|
"logps/chosen": -212.93820190429688, |
|
"logps/rejected": -198.22511291503906, |
|
"loss": 1.02, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.28638574481010437, |
|
"rewards/margins": -0.37813109159469604, |
|
"rewards/rejected": 0.0917452871799469, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00010206338135982445, |
|
"grad_norm": 27.203516006469727, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -3.2104451656341553, |
|
"logits/rejected": -3.2103281021118164, |
|
"logps/chosen": -235.89254760742188, |
|
"logps/rejected": -206.0758514404297, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.40017586946487427, |
|
"rewards/margins": 0.24887371063232422, |
|
"rewards/rejected": 0.15130215883255005, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00013608450847976594, |
|
"grad_norm": 26.42736053466797, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -3.0741634368896484, |
|
"logits/rejected": -2.953117847442627, |
|
"logps/chosen": -428.7364196777344, |
|
"logps/rejected": -192.357666015625, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4332427978515625, |
|
"rewards/margins": 0.5804362893104553, |
|
"rewards/rejected": -0.1471935510635376, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00017010563559970743, |
|
"grad_norm": 35.623634338378906, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.985285758972168, |
|
"logits/rejected": -2.8854246139526367, |
|
"logps/chosen": -342.38604736328125, |
|
"logps/rejected": -211.78237915039062, |
|
"loss": 0.8273, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.3475755751132965, |
|
"rewards/margins": -0.19371166825294495, |
|
"rewards/rejected": -0.15386392176151276, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0002041267627196489, |
|
"grad_norm": 31.70409393310547, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": -3.074589967727661, |
|
"logits/rejected": -2.85581636428833, |
|
"logps/chosen": -593.6435546875, |
|
"logps/rejected": -259.5284729003906, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2495414763689041, |
|
"rewards/margins": 0.35888367891311646, |
|
"rewards/rejected": -0.10934218764305115, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00023814788983959038, |
|
"grad_norm": 32.41214370727539, |
|
"learning_rate": 7.000000000000001e-06, |
|
"logits/chosen": -2.976675033569336, |
|
"logits/rejected": -2.978133201599121, |
|
"logps/chosen": -218.11688232421875, |
|
"logps/rejected": -231.7698974609375, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.08671779930591583, |
|
"rewards/margins": 0.21950224041938782, |
|
"rewards/rejected": -0.30622005462646484, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00027216901695953187, |
|
"grad_norm": 27.637351989746094, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -2.997600793838501, |
|
"logits/rejected": -3.1078853607177734, |
|
"logps/chosen": -165.71444702148438, |
|
"logps/rejected": -335.5938720703125, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.057393088936805725, |
|
"rewards/margins": 0.34109365940093994, |
|
"rewards/rejected": -0.3984867036342621, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00030619014407947336, |
|
"grad_norm": 32.246559143066406, |
|
"learning_rate": 9e-06, |
|
"logits/chosen": -3.0760390758514404, |
|
"logits/rejected": -2.938197135925293, |
|
"logps/chosen": -463.1333312988281, |
|
"logps/rejected": -173.99951171875, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5581258535385132, |
|
"rewards/margins": 0.7653559446334839, |
|
"rewards/rejected": -0.20723000168800354, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00034021127119941485, |
|
"grad_norm": 39.84626388549805, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -3.192596673965454, |
|
"logits/rejected": -2.9751501083374023, |
|
"logps/chosen": -473.14532470703125, |
|
"logps/rejected": -110.90155029296875, |
|
"loss": 0.9794, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.3494076132774353, |
|
"rewards/margins": -0.31546899676322937, |
|
"rewards/rejected": -0.03393859416246414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00037423239831935634, |
|
"grad_norm": 39.444889068603516, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"logits/chosen": -3.096883535385132, |
|
"logits/rejected": -3.043616771697998, |
|
"logps/chosen": -391.7703552246094, |
|
"logps/rejected": -271.8331604003906, |
|
"loss": 0.8029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6240158081054688, |
|
"rewards/margins": 0.4651091992855072, |
|
"rewards/rejected": 0.15890654921531677, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0004082535254392978, |
|
"grad_norm": 40.78356170654297, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": -3.000021457672119, |
|
"logits/rejected": -3.0893115997314453, |
|
"logps/chosen": -255.94607543945312, |
|
"logps/rejected": -405.3093566894531, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.33697396516799927, |
|
"rewards/margins": 0.3315456509590149, |
|
"rewards/rejected": 0.005428321659564972, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00044227465255923927, |
|
"grad_norm": 35.69674301147461, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"logits/chosen": -3.011014699935913, |
|
"logits/rejected": -3.0214414596557617, |
|
"logps/chosen": -223.4480438232422, |
|
"logps/rejected": -261.62469482421875, |
|
"loss": 0.815, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7416718006134033, |
|
"rewards/margins": 0.19626998901367188, |
|
"rewards/rejected": 0.5454018115997314, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00047629577967918076, |
|
"grad_norm": 32.88405990600586, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"logits/chosen": -3.065549373626709, |
|
"logits/rejected": -3.1094508171081543, |
|
"logps/chosen": -169.08047485351562, |
|
"logps/rejected": -227.85162353515625, |
|
"loss": 0.8115, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.37659722566604614, |
|
"rewards/margins": -0.11390496790409088, |
|
"rewards/rejected": 0.4905022084712982, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005103169067991223, |
|
"grad_norm": 20.918012619018555, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -3.045846462249756, |
|
"logits/rejected": -2.9114990234375, |
|
"logps/chosen": -573.1253662109375, |
|
"logps/rejected": -273.9326477050781, |
|
"loss": 0.3105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7755428552627563, |
|
"rewards/margins": 1.082038164138794, |
|
"rewards/rejected": -0.3064952790737152, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0005443380339190637, |
|
"grad_norm": 38.434749603271484, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -3.1250810623168945, |
|
"logits/rejected": -2.96527099609375, |
|
"logps/chosen": -396.9795837402344, |
|
"logps/rejected": -174.92918395996094, |
|
"loss": 0.9971, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.2244594693183899, |
|
"rewards/margins": -0.37520748376846313, |
|
"rewards/rejected": 0.5996670126914978, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0005783591610390052, |
|
"grad_norm": 37.71885681152344, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"logits/chosen": -2.970050096511841, |
|
"logits/rejected": -3.0346829891204834, |
|
"logps/chosen": -236.67071533203125, |
|
"logps/rejected": -403.0212097167969, |
|
"loss": 1.0307, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.9259212613105774, |
|
"rewards/margins": -0.23367927968502045, |
|
"rewards/rejected": 1.1596004962921143, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0006123802881589467, |
|
"grad_norm": 25.964290618896484, |
|
"learning_rate": 1.8e-05, |
|
"logits/chosen": -3.0690722465515137, |
|
"logits/rejected": -3.0348947048187256, |
|
"logps/chosen": -312.81317138671875, |
|
"logps/rejected": -259.230712890625, |
|
"loss": 0.6989, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6348915100097656, |
|
"rewards/margins": 0.6418477892875671, |
|
"rewards/rejected": -0.006956290453672409, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006464014152788882, |
|
"grad_norm": 27.076108932495117, |
|
"learning_rate": 1.9e-05, |
|
"logits/chosen": -3.135136842727661, |
|
"logits/rejected": -2.911625862121582, |
|
"logps/chosen": -473.7462158203125, |
|
"logps/rejected": -141.9491729736328, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.9424793720245361, |
|
"rewards/margins": 0.9380611181259155, |
|
"rewards/rejected": 0.00441819429397583, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0006804225423988297, |
|
"grad_norm": 51.820159912109375, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -2.8403027057647705, |
|
"logits/rejected": -3.0664803981781006, |
|
"logps/chosen": -219.79153442382812, |
|
"logps/rejected": -231.70004272460938, |
|
"loss": 1.1022, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.052263081073760986, |
|
"rewards/margins": -0.6957410573959351, |
|
"rewards/rejected": 0.7480041980743408, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007144436695187712, |
|
"grad_norm": 45.9146614074707, |
|
"learning_rate": 2.1e-05, |
|
"logits/chosen": -2.9764764308929443, |
|
"logits/rejected": -2.9391565322875977, |
|
"logps/chosen": -458.0147705078125, |
|
"logps/rejected": -218.37648010253906, |
|
"loss": 1.0772, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.8049196004867554, |
|
"rewards/margins": -0.448437362909317, |
|
"rewards/rejected": 1.2533570528030396, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0007484647966387127, |
|
"grad_norm": 22.854082107543945, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"logits/chosen": -3.1029181480407715, |
|
"logits/rejected": -3.044419527053833, |
|
"logps/chosen": -154.8139190673828, |
|
"logps/rejected": -155.980712890625, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45841503143310547, |
|
"rewards/margins": 0.8753025531768799, |
|
"rewards/rejected": -0.41688746213912964, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0007824859237586541, |
|
"grad_norm": 43.94650650024414, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"logits/chosen": -2.9984076023101807, |
|
"logits/rejected": -3.114562511444092, |
|
"logps/chosen": -263.8316650390625, |
|
"logps/rejected": -340.1427307128906, |
|
"loss": 1.0427, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.6933631896972656, |
|
"rewards/margins": -0.14637237787246704, |
|
"rewards/rejected": 0.8397356271743774, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0008165070508785956, |
|
"grad_norm": 49.11759948730469, |
|
"learning_rate": 2.4e-05, |
|
"logits/chosen": -3.025609016418457, |
|
"logits/rejected": -3.0236239433288574, |
|
"logps/chosen": -412.2383117675781, |
|
"logps/rejected": -643.138427734375, |
|
"loss": 1.0374, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 1.1175220012664795, |
|
"rewards/margins": -0.31503716111183167, |
|
"rewards/rejected": 1.4325592517852783, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.000850528177998537, |
|
"grad_norm": 37.57383728027344, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -3.024618625640869, |
|
"logits/rejected": -3.123701572418213, |
|
"logps/chosen": -201.70530700683594, |
|
"logps/rejected": -224.99038696289062, |
|
"loss": 0.7477, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.9206753373146057, |
|
"rewards/margins": 0.5947309732437134, |
|
"rewards/rejected": 0.32594436407089233, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0008845493051184785, |
|
"grad_norm": 40.297794342041016, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -3.1100215911865234, |
|
"logits/rejected": -3.0195024013519287, |
|
"logps/chosen": -226.2320098876953, |
|
"logps/rejected": -193.0676727294922, |
|
"loss": 0.8151, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.9122087359428406, |
|
"rewards/margins": 0.0203973650932312, |
|
"rewards/rejected": 0.8918113708496094, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00091857043223842, |
|
"grad_norm": 43.394229888916016, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"logits/chosen": -3.0187630653381348, |
|
"logits/rejected": -3.053678035736084, |
|
"logps/chosen": -153.02883911132812, |
|
"logps/rejected": -260.0540771484375, |
|
"loss": 1.3457, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7145153284072876, |
|
"rewards/margins": -0.806611180305481, |
|
"rewards/rejected": 1.5211265087127686, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0009525915593583615, |
|
"grad_norm": 28.59680938720703, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"logits/chosen": -3.032409191131592, |
|
"logits/rejected": -3.0502161979675293, |
|
"logps/chosen": -240.1644287109375, |
|
"logps/rejected": -245.81436157226562, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.0826761722564697, |
|
"rewards/margins": 0.3433869779109955, |
|
"rewards/rejected": 1.7392891645431519, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.000986612686478303, |
|
"grad_norm": 37.069583892822266, |
|
"learning_rate": 2.9e-05, |
|
"logits/chosen": -3.0516104698181152, |
|
"logits/rejected": -3.144774913787842, |
|
"logps/chosen": -296.03125, |
|
"logps/rejected": -292.9052429199219, |
|
"loss": 1.1087, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8324771523475647, |
|
"rewards/margins": -0.34529268741607666, |
|
"rewards/rejected": 1.1777698993682861, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0010206338135982445, |
|
"grad_norm": 30.238658905029297, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -3.064671039581299, |
|
"logits/rejected": -2.8963613510131836, |
|
"logps/chosen": -415.6085510253906, |
|
"logps/rejected": -149.9669952392578, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0249770879745483, |
|
"rewards/margins": 0.7549355030059814, |
|
"rewards/rejected": 0.27004164457321167, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|