|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9905213270142181, |
|
"eval_steps": 100, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009478672985781991, |
|
"grad_norm": 31.999091462105085, |
|
"learning_rate": 2.3809523809523807e-08, |
|
"logits/chosen": -1.3901093006134033, |
|
"logits/rejected": -1.3982200622558594, |
|
"logps/chosen": -439.7777099609375, |
|
"logps/rejected": -517.9480590820312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 31.526233424514775, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -1.1840474605560303, |
|
"logits/rejected": -1.2023670673370361, |
|
"logps/chosen": -318.02642822265625, |
|
"logps/rejected": -345.5296325683594, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0005787869449704885, |
|
"rewards/margins": 0.002175838453695178, |
|
"rewards/rejected": -0.0015970510430634022, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 29.460026918462425, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -1.2405064105987549, |
|
"logits/rejected": -1.2777436971664429, |
|
"logps/chosen": -325.66754150390625, |
|
"logps/rejected": -444.10162353515625, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0043294974602758884, |
|
"rewards/margins": 0.01897953823208809, |
|
"rewards/rejected": -0.014650041237473488, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 29.858845825573614, |
|
"learning_rate": 4.972077065562821e-07, |
|
"logits/chosen": -1.2667722702026367, |
|
"logits/rejected": -1.2541126012802124, |
|
"logps/chosen": -379.63861083984375, |
|
"logps/rejected": -386.56842041015625, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.015792345628142357, |
|
"rewards/margins": 0.05691219121217728, |
|
"rewards/rejected": -0.04111984372138977, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 28.04723285384618, |
|
"learning_rate": 4.876353872369572e-07, |
|
"logits/chosen": -1.2606487274169922, |
|
"logits/rejected": -1.2776422500610352, |
|
"logps/chosen": -330.6627197265625, |
|
"logps/rejected": -432.9537658691406, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.060253970324993134, |
|
"rewards/margins": 0.0897565707564354, |
|
"rewards/rejected": -0.029502594843506813, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 29.04625829417335, |
|
"learning_rate": 4.715123776075336e-07, |
|
"logits/chosen": -1.2534068822860718, |
|
"logits/rejected": -1.1865966320037842, |
|
"logps/chosen": -307.3436279296875, |
|
"logps/rejected": -284.7574462890625, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.020858120173215866, |
|
"rewards/margins": 0.09515110403299332, |
|
"rewards/rejected": -0.07429297268390656, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 28.336384421687487, |
|
"learning_rate": 4.492831268057306e-07, |
|
"logits/chosen": -1.2906352281570435, |
|
"logits/rejected": -1.3070456981658936, |
|
"logps/chosen": -324.20489501953125, |
|
"logps/rejected": -374.00274658203125, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08636633306741714, |
|
"rewards/margins": 0.28788790106773376, |
|
"rewards/rejected": -0.20152156054973602, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 24.885818315067937, |
|
"learning_rate": 4.2156040946718343e-07, |
|
"logits/chosen": -1.330127477645874, |
|
"logits/rejected": -1.276735782623291, |
|
"logps/chosen": -344.3301086425781, |
|
"logps/rejected": -326.61151123046875, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.132747620344162, |
|
"rewards/margins": 0.28044360876083374, |
|
"rewards/rejected": -0.14769601821899414, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 25.726269521197576, |
|
"learning_rate": 3.891084338941603e-07, |
|
"logits/chosen": -1.2656139135360718, |
|
"logits/rejected": -1.2805755138397217, |
|
"logps/chosen": -338.24822998046875, |
|
"logps/rejected": -349.0716857910156, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.06792763620615005, |
|
"rewards/margins": 0.5581067204475403, |
|
"rewards/rejected": -0.4901791214942932, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 23.99960856337552, |
|
"learning_rate": 3.528217757826529e-07, |
|
"logits/chosen": -1.303812026977539, |
|
"logits/rejected": -1.2814157009124756, |
|
"logps/chosen": -288.6119689941406, |
|
"logps/rejected": -334.3503723144531, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0316891223192215, |
|
"rewards/margins": 0.408654123544693, |
|
"rewards/rejected": -0.37696507573127747, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 24.512409938030444, |
|
"learning_rate": 3.137007182236637e-07, |
|
"logits/chosen": -1.3553069829940796, |
|
"logits/rejected": -1.3930418491363525, |
|
"logps/chosen": -394.2057800292969, |
|
"logps/rejected": -524.2303466796875, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.024631375446915627, |
|
"rewards/margins": 0.491068035364151, |
|
"rewards/rejected": -0.5156994462013245, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"eval_logits/chosen": -1.3159226179122925, |
|
"eval_logits/rejected": -1.3169375658035278, |
|
"eval_logps/chosen": -320.6200866699219, |
|
"eval_logps/rejected": -330.54302978515625, |
|
"eval_loss": 0.5266835689544678, |
|
"eval_rewards/accuracies": 0.7604166865348816, |
|
"eval_rewards/chosen": -0.025471201166510582, |
|
"eval_rewards/margins": 0.5800454020500183, |
|
"eval_rewards/rejected": -0.6055166125297546, |
|
"eval_runtime": 37.7626, |
|
"eval_samples_per_second": 19.861, |
|
"eval_steps_per_second": 0.636, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 18.302035950206864, |
|
"learning_rate": 2.728236777596621e-07, |
|
"logits/chosen": -1.3288469314575195, |
|
"logits/rejected": -1.3007264137268066, |
|
"logps/chosen": -330.83392333984375, |
|
"logps/rejected": -358.9930419921875, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.03551309555768967, |
|
"rewards/margins": 0.604155421257019, |
|
"rewards/rejected": -0.5686423778533936, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 21.060796057076118, |
|
"learning_rate": 2.3131747660339394e-07, |
|
"logits/chosen": -1.3152925968170166, |
|
"logits/rejected": -1.2728514671325684, |
|
"logps/chosen": -354.1836242675781, |
|
"logps/rejected": -403.5113525390625, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.02837497368454933, |
|
"rewards/margins": 0.9122093915939331, |
|
"rewards/rejected": -0.8838345408439636, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 20.045154065240116, |
|
"learning_rate": 1.9032628049921556e-07, |
|
"logits/chosen": -1.2807761430740356, |
|
"logits/rejected": -1.3415155410766602, |
|
"logps/chosen": -319.76373291015625, |
|
"logps/rejected": -381.88702392578125, |
|
"loss": 0.4052, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.052242428064346313, |
|
"rewards/margins": 1.2852985858917236, |
|
"rewards/rejected": -1.2330560684204102, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 20.731255319297293, |
|
"learning_rate": 1.5098005849021078e-07, |
|
"logits/chosen": -1.3120365142822266, |
|
"logits/rejected": -1.2953948974609375, |
|
"logps/chosen": -295.6787109375, |
|
"logps/rejected": -355.78753662109375, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.07798402011394501, |
|
"rewards/margins": 0.8329852819442749, |
|
"rewards/rejected": -0.9109692573547363, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 22.334784640837082, |
|
"learning_rate": 1.1436343403356016e-07, |
|
"logits/chosen": -1.3462207317352295, |
|
"logits/rejected": -1.3286292552947998, |
|
"logps/chosen": -368.55035400390625, |
|
"logps/rejected": -399.04498291015625, |
|
"loss": 0.3699, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05135868862271309, |
|
"rewards/margins": 1.201449990272522, |
|
"rewards/rejected": -1.2528085708618164, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 20.701829718895063, |
|
"learning_rate": 8.148578611867113e-08, |
|
"logits/chosen": -1.3034837245941162, |
|
"logits/rejected": -1.3216516971588135, |
|
"logps/chosen": -353.6603088378906, |
|
"logps/rejected": -452.823486328125, |
|
"loss": 0.3858, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15693092346191406, |
|
"rewards/margins": 1.356492519378662, |
|
"rewards/rejected": -1.5134233236312866, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 19.451153050692426, |
|
"learning_rate": 5.325342458482779e-08, |
|
"logits/chosen": -1.2586638927459717, |
|
"logits/rejected": -1.292633295059204, |
|
"logps/chosen": -292.8143310546875, |
|
"logps/rejected": -365.19879150390625, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14380544424057007, |
|
"rewards/margins": 1.1733076572418213, |
|
"rewards/rejected": -1.317112922668457, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 21.916474338608154, |
|
"learning_rate": 3.044460665744283e-08, |
|
"logits/chosen": -1.3701001405715942, |
|
"logits/rejected": -1.3554545640945435, |
|
"logps/chosen": -404.88909912109375, |
|
"logps/rejected": -457.29754638671875, |
|
"loss": 0.3622, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.04532099887728691, |
|
"rewards/margins": 1.295754075050354, |
|
"rewards/rejected": -1.3410749435424805, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 18.57991978572858, |
|
"learning_rate": 1.368808340056879e-08, |
|
"logits/chosen": -1.3218441009521484, |
|
"logits/rejected": -1.3573790788650513, |
|
"logps/chosen": -320.82342529296875, |
|
"logps/rejected": -399.82952880859375, |
|
"loss": 0.3755, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12398891150951385, |
|
"rewards/margins": 1.0439832210540771, |
|
"rewards/rejected": -1.1679723262786865, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 19.884917777757575, |
|
"learning_rate": 3.4457674771554422e-09, |
|
"logits/chosen": -1.2637640237808228, |
|
"logits/rejected": -1.3061563968658447, |
|
"logps/chosen": -331.66986083984375, |
|
"logps/rejected": -383.78948974609375, |
|
"loss": 0.3731, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10066553205251694, |
|
"rewards/margins": 1.0765092372894287, |
|
"rewards/rejected": -1.177174687385559, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": -1.3081656694412231, |
|
"eval_logits/rejected": -1.3098862171173096, |
|
"eval_logps/chosen": -325.0733337402344, |
|
"eval_logps/rejected": -340.2323303222656, |
|
"eval_loss": 0.4820757508277893, |
|
"eval_rewards/accuracies": 0.7604166865348816, |
|
"eval_rewards/chosen": -0.2481323480606079, |
|
"eval_rewards/margins": 0.8418500423431396, |
|
"eval_rewards/rejected": -1.0899823904037476, |
|
"eval_runtime": 37.8271, |
|
"eval_samples_per_second": 19.827, |
|
"eval_steps_per_second": 0.634, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 21.350222142767173, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.3175251483917236, |
|
"logits/rejected": -1.3090837001800537, |
|
"logps/chosen": -369.90289306640625, |
|
"logps/rejected": -375.2828674316406, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.085756316781044, |
|
"rewards/margins": 0.9539656639099121, |
|
"rewards/rejected": -1.039721965789795, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"step": 210, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5004417010716029, |
|
"train_runtime": 1390.6464, |
|
"train_samples_per_second": 9.708, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|