| { |
| "best_global_step": 98, |
| "best_metric": 0.14128435, |
| "best_model_checkpoint": "./output_dpo/v0-20260226-085120/checkpoint-98", |
| "epoch": 1.9861635220125786, |
| "eval_steps": 50, |
| "global_step": 98, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02012578616352201, |
| "grad_norm": 1.4377635717391968, |
| "learning_rate": 2e-05, |
| "logits/chosen": -1.7360858917236328, |
| "logits/rejected": -1.7113451957702637, |
| "logps/chosen": -111.01881408691406, |
| "logps/rejected": -147.11973571777344, |
| "loss": 1.319612741470337, |
| "memory(GiB)": 239.65, |
| "nll_loss": 0.6264656782150269, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1, |
| "train_speed(iter/s)": 0.004523 |
| }, |
| { |
| "epoch": 0.10062893081761007, |
| "grad_norm": 1.4525984525680542, |
| "learning_rate": 0.0001, |
| "logits/chosen": -1.6965384483337402, |
| "logits/rejected": -1.681287407875061, |
| "logps/chosen": -111.46014404296875, |
| "logps/rejected": -143.75, |
| "loss": 1.361119270324707, |
| "memory(GiB)": 239.65, |
| "nll_loss": 0.6818519830703735, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": 0.04633765667676926, |
| "rewards/margins": 0.02939797379076481, |
| "rewards/rejected": 0.01693967543542385, |
| "step": 5, |
| "train_speed(iter/s)": 0.003936 |
| }, |
| { |
| "epoch": 0.20125786163522014, |
| "grad_norm": 1.0078742504119873, |
| "learning_rate": 9.928848976574019e-05, |
| "logits/chosen": -1.7403156757354736, |
| "logits/rejected": -1.726575255393982, |
| "logps/chosen": -92.17589569091797, |
| "logps/rejected": -137.906005859375, |
| "loss": 0.9127995491027832, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.5469792485237122, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 2.0497705936431885, |
| "rewards/margins": 1.318472146987915, |
| "rewards/rejected": 0.7312980890274048, |
| "step": 10, |
| "train_speed(iter/s)": 0.003899 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 1.1189488172531128, |
| "learning_rate": 9.717420893549902e-05, |
| "logits/chosen": -1.8927457332611084, |
| "logits/rejected": -1.8742872476577759, |
| "logps/chosen": -56.06190872192383, |
| "logps/rejected": -129.63563537597656, |
| "loss": 0.5759311199188233, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.3772023618221283, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 5.639416694641113, |
| "rewards/margins": 3.9854512214660645, |
| "rewards/rejected": 1.6539649963378906, |
| "step": 15, |
| "train_speed(iter/s)": 0.003905 |
| }, |
| { |
| "epoch": 0.4025157232704403, |
| "grad_norm": 2.065215826034546, |
| "learning_rate": 9.371733080722911e-05, |
| "logits/chosen": -2.0726945400238037, |
| "logits/rejected": -2.0517024993896484, |
| "logps/chosen": -38.782867431640625, |
| "logps/rejected": -141.28872680664062, |
| "loss": 0.34540715217590334, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.24602404236793518, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 7.165956020355225, |
| "rewards/margins": 7.171680450439453, |
| "rewards/rejected": -0.005724119953811169, |
| "step": 20, |
| "train_speed(iter/s)": 0.003816 |
| }, |
| { |
| "epoch": 0.5031446540880503, |
| "grad_norm": 0.9638963937759399, |
| "learning_rate": 8.90162395476046e-05, |
| "logits/chosen": -2.205498456954956, |
| "logits/rejected": -2.182650089263916, |
| "logps/chosen": -34.5748405456543, |
| "logps/rejected": -168.0699462890625, |
| "loss": 0.29475107192993166, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.2225954234600067, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 7.66351842880249, |
| "rewards/margins": 9.37935733795166, |
| "rewards/rejected": -1.7158397436141968, |
| "step": 25, |
| "train_speed(iter/s)": 0.00375 |
| }, |
| { |
| "epoch": 0.6037735849056604, |
| "grad_norm": 0.5872039794921875, |
| "learning_rate": 8.320473013836196e-05, |
| "logits/chosen": -2.2474639415740967, |
| "logits/rejected": -2.2216179370880127, |
| "logps/chosen": -23.524024963378906, |
| "logps/rejected": -159.84942626953125, |
| "loss": 0.23147854804992676, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.18826261162757874, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": 8.869623184204102, |
| "rewards/margins": 10.225828170776367, |
| "rewards/rejected": -1.3562055826187134, |
| "step": 30, |
| "train_speed(iter/s)": 0.003835 |
| }, |
| { |
| "epoch": 0.7044025157232704, |
| "grad_norm": 0.8212366700172424, |
| "learning_rate": 7.644820051634812e-05, |
| "logits/chosen": -2.2804150581359863, |
| "logits/rejected": -2.2608768939971924, |
| "logps/chosen": -20.996126174926758, |
| "logps/rejected": -161.36029052734375, |
| "loss": 0.1881607413291931, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.13474711775779724, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 9.420888900756836, |
| "rewards/margins": 10.558382987976074, |
| "rewards/rejected": -1.137495517730713, |
| "step": 35, |
| "train_speed(iter/s)": 0.003906 |
| }, |
| { |
| "epoch": 0.8050314465408805, |
| "grad_norm": 0.9303659200668335, |
| "learning_rate": 6.89389442805288e-05, |
| "logits/chosen": -2.2562363147735596, |
| "logits/rejected": -2.2325804233551025, |
| "logps/chosen": -26.601587295532227, |
| "logps/rejected": -155.21389770507812, |
| "loss": 0.21106297969818116, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.15431135892868042, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 9.138971328735352, |
| "rewards/margins": 9.48228931427002, |
| "rewards/rejected": -0.34331730008125305, |
| "step": 40, |
| "train_speed(iter/s)": 0.003892 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 0.8759572505950928, |
| "learning_rate": 6.0890677937442574e-05, |
| "logits/chosen": -2.2504515647888184, |
| "logits/rejected": -2.236832618713379, |
| "logps/chosen": -24.932228088378906, |
| "logps/rejected": -150.9632110595703, |
| "loss": 0.21578831672668458, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.1573367863893509, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.940786361694336, |
| "rewards/margins": 9.429086685180664, |
| "rewards/rejected": -0.488300621509552, |
| "step": 45, |
| "train_speed(iter/s)": 0.003836 |
| }, |
| { |
| "epoch": 1.020125786163522, |
| "grad_norm": 1.6238784790039062, |
| "learning_rate": 5.2532458441935636e-05, |
| "logits/chosen": -2.3447046279907227, |
| "logits/rejected": -2.316112995147705, |
| "logps/chosen": -17.97600746154785, |
| "logps/rejected": -169.5856475830078, |
| "loss": 0.1865710735321045, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.1159815713763237, |
| "rewards/accuracies": 0.9767441749572754, |
| "rewards/chosen": 9.204967498779297, |
| "rewards/margins": 11.411535263061523, |
| "rewards/rejected": -2.2065672874450684, |
| "step": 50, |
| "train_speed(iter/s)": 0.003798 |
| }, |
| { |
| "epoch": 1.020125786163522, |
| "eval_logits/chosen": -2.462606191635132, |
| "eval_logits/rejected": -2.437251091003418, |
| "eval_logps/chosen": -19.061992645263672, |
| "eval_logps/rejected": -184.38104248046875, |
| "eval_loss": 0.1830219328403473, |
| "eval_nll_loss": 0.17293420433998108, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 8.904085159301758, |
| "eval_rewards/margins": 12.307174682617188, |
| "eval_rewards/rejected": -3.4030885696411133, |
| "eval_runtime": 55.6446, |
| "eval_samples_per_second": 0.288, |
| "eval_steps_per_second": 0.144, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.120754716981132, |
| "grad_norm": 0.5176746249198914, |
| "learning_rate": 4.410216414245771e-05, |
| "logits/chosen": -2.3740134239196777, |
| "logits/rejected": -2.3573694229125977, |
| "logps/chosen": -26.2227840423584, |
| "logps/rejected": -179.9822540283203, |
| "loss": 0.19258421659469604, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.17000555992126465, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.813023567199707, |
| "rewards/margins": 11.842904090881348, |
| "rewards/rejected": -3.029881715774536, |
| "step": 55, |
| "train_speed(iter/s)": 0.00372 |
| }, |
| { |
| "epoch": 1.221383647798742, |
| "grad_norm": 0.6022250056266785, |
| "learning_rate": 3.58397246658848e-05, |
| "logits/chosen": -2.4972939491271973, |
| "logits/rejected": -2.4699082374572754, |
| "logps/chosen": -14.000228881835938, |
| "logps/rejected": -196.9097442626953, |
| "loss": 0.10635790824890137, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.08761530369520187, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.721292495727539, |
| "rewards/margins": 14.493858337402344, |
| "rewards/rejected": -4.772566795349121, |
| "step": 60, |
| "train_speed(iter/s)": 0.003743 |
| }, |
| { |
| "epoch": 1.3220125786163521, |
| "grad_norm": 0.2501760721206665, |
| "learning_rate": 2.798029242211828e-05, |
| "logits/chosen": -2.5347957611083984, |
| "logits/rejected": -2.50445818901062, |
| "logps/chosen": -23.887548446655273, |
| "logps/rejected": -183.65591430664062, |
| "loss": 0.18030774593353271, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.14212127029895782, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 9.50097370147705, |
| "rewards/margins": 12.938058853149414, |
| "rewards/rejected": -3.4370861053466797, |
| "step": 65, |
| "train_speed(iter/s)": 0.003757 |
| }, |
| { |
| "epoch": 1.4226415094339622, |
| "grad_norm": 0.42134493589401245, |
| "learning_rate": 2.074755007023461e-05, |
| "logits/chosen": -2.5006675720214844, |
| "logits/rejected": -2.478884220123291, |
| "logps/chosen": -12.177281379699707, |
| "logps/rejected": -190.2030487060547, |
| "loss": 0.09010829329490662, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.07332514226436615, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": 10.052157402038574, |
| "rewards/margins": 13.963111877441406, |
| "rewards/rejected": -3.910953998565674, |
| "step": 70, |
| "train_speed(iter/s)": 0.003774 |
| }, |
| { |
| "epoch": 1.5232704402515722, |
| "grad_norm": 0.5933993458747864, |
| "learning_rate": 1.434734441843899e-05, |
| "logits/chosen": -2.502887487411499, |
| "logits/rejected": -2.486396551132202, |
| "logps/chosen": -18.57794189453125, |
| "logps/rejected": -170.333740234375, |
| "loss": 0.13938431739807128, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.11240720748901367, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.312703132629395, |
| "rewards/margins": 11.638362884521484, |
| "rewards/rejected": -2.325660467147827, |
| "step": 75, |
| "train_speed(iter/s)": 0.00376 |
| }, |
| { |
| "epoch": 1.6238993710691823, |
| "grad_norm": 0.2634561061859131, |
| "learning_rate": 8.961827939636196e-06, |
| "logits/chosen": -2.5577776432037354, |
| "logits/rejected": -2.5379796028137207, |
| "logps/chosen": -16.603967666625977, |
| "logps/rejected": -171.06466674804688, |
| "loss": 0.10857141017913818, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.09158992022275925, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.913006782531738, |
| "rewards/margins": 12.598286628723145, |
| "rewards/rejected": -2.685279369354248, |
| "step": 80, |
| "train_speed(iter/s)": 0.003778 |
| }, |
| { |
| "epoch": 1.7245283018867923, |
| "grad_norm": 0.3385748267173767, |
| "learning_rate": 4.744274637483936e-06, |
| "logits/chosen": -2.562164783477783, |
| "logits/rejected": -2.5376689434051514, |
| "logps/chosen": -14.094012260437012, |
| "logps/rejected": -163.73416137695312, |
| "loss": 0.11240246295928955, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.09068052470684052, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": 9.352496147155762, |
| "rewards/margins": 11.771881103515625, |
| "rewards/rejected": -2.419384479522705, |
| "step": 85, |
| "train_speed(iter/s)": 0.003777 |
| }, |
| { |
| "epoch": 1.8251572327044026, |
| "grad_norm": 0.3210693895816803, |
| "learning_rate": 1.8147178055029579e-06, |
| "logits/chosen": -2.602306842803955, |
| "logits/rejected": -2.567457675933838, |
| "logps/chosen": -17.956844329833984, |
| "logps/rejected": -175.5157470703125, |
| "loss": 0.11938213109970093, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.09758913516998291, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": 9.622145652770996, |
| "rewards/margins": 12.495707511901855, |
| "rewards/rejected": -2.873561382293701, |
| "step": 90, |
| "train_speed(iter/s)": 0.003796 |
| }, |
| { |
| "epoch": 1.9257861635220126, |
| "grad_norm": 0.3329070210456848, |
| "learning_rate": 2.5653383040524227e-07, |
| "logits/chosen": -2.591177463531494, |
| "logits/rejected": -2.568394422531128, |
| "logps/chosen": -17.04227638244629, |
| "logps/rejected": -188.1129913330078, |
| "loss": 0.11813113689422608, |
| "memory(GiB)": 284.39, |
| "nll_loss": 0.10378739982843399, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.477154731750488, |
| "rewards/margins": 13.704524040222168, |
| "rewards/rejected": -4.227367877960205, |
| "step": 95, |
| "train_speed(iter/s)": 0.003803 |
| }, |
| { |
| "epoch": 1.9861635220125786, |
| "eval_logits/chosen": -2.6464767456054688, |
| "eval_logits/rejected": -2.6153650283813477, |
| "eval_logps/chosen": -15.376700401306152, |
| "eval_logps/rejected": -193.30332946777344, |
| "eval_loss": 0.14128434658050537, |
| "eval_nll_loss": 0.14024823904037476, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 9.272613525390625, |
| "eval_rewards/margins": 13.567930221557617, |
| "eval_rewards/rejected": -4.295315742492676, |
| "eval_runtime": 55.5933, |
| "eval_samples_per_second": 0.288, |
| "eval_steps_per_second": 0.144, |
| "step": 98 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 98, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.261229460544324e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|