|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.072941541671753, |
|
"logits/rejected": -2.0026817321777344, |
|
"logps/chosen": -474.7008361816406, |
|
"logps/pi_response": -295.3243408203125, |
|
"logps/ref_response": -295.3243408203125, |
|
"logps/rejected": -399.129638671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.0674960613250732, |
|
"logits/rejected": -2.0091359615325928, |
|
"logps/chosen": -293.94415283203125, |
|
"logps/pi_response": -184.32736206054688, |
|
"logps/ref_response": -184.39166259765625, |
|
"logps/rejected": -331.1109924316406, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": -0.005304105579853058, |
|
"rewards/margins": 0.014647711999714375, |
|
"rewards/rejected": -0.019951816648244858, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -1.976543664932251, |
|
"logits/rejected": -1.9456901550292969, |
|
"logps/chosen": -287.80316162109375, |
|
"logps/pi_response": -200.89390563964844, |
|
"logps/ref_response": -194.6990509033203, |
|
"logps/rejected": -415.5035095214844, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.28011927008628845, |
|
"rewards/margins": 0.24019071459770203, |
|
"rewards/rejected": -0.5203099846839905, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -1.948282241821289, |
|
"logits/rejected": -1.9048038721084595, |
|
"logps/chosen": -322.1227111816406, |
|
"logps/pi_response": -202.88558959960938, |
|
"logps/ref_response": -198.0297088623047, |
|
"logps/rejected": -393.98382568359375, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.31841763854026794, |
|
"rewards/margins": 0.2520221173763275, |
|
"rewards/rejected": -0.5704396963119507, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -1.99917471408844, |
|
"logits/rejected": -1.9424034357070923, |
|
"logps/chosen": -306.33917236328125, |
|
"logps/pi_response": -221.41934204101562, |
|
"logps/ref_response": -196.04620361328125, |
|
"logps/rejected": -473.10430908203125, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4164486825466156, |
|
"rewards/margins": 0.34302154183387756, |
|
"rewards/rejected": -0.7594702243804932, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.022153615951538, |
|
"logits/rejected": -1.8780553340911865, |
|
"logps/chosen": -300.8150634765625, |
|
"logps/pi_response": -213.9830322265625, |
|
"logps/ref_response": -212.6190185546875, |
|
"logps/rejected": -455.1544494628906, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07085980474948883, |
|
"rewards/margins": 0.4123227000236511, |
|
"rewards/rejected": -0.48318248987197876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -1.623453140258789, |
|
"logits/rejected": -1.5108482837677002, |
|
"logps/chosen": -327.77496337890625, |
|
"logps/pi_response": -236.1122589111328, |
|
"logps/ref_response": -207.8074493408203, |
|
"logps/rejected": -478.13677978515625, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4635746479034424, |
|
"rewards/margins": 0.47717157006263733, |
|
"rewards/rejected": -0.9407461285591125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -1.3422645330429077, |
|
"logits/rejected": -1.035585641860962, |
|
"logps/chosen": -311.18182373046875, |
|
"logps/pi_response": -238.19351196289062, |
|
"logps/ref_response": -195.33450317382812, |
|
"logps/rejected": -481.33416748046875, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.45635801553726196, |
|
"rewards/margins": 0.5854658484458923, |
|
"rewards/rejected": -1.0418239831924438, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -1.1403189897537231, |
|
"logits/rejected": -0.7611511945724487, |
|
"logps/chosen": -358.22308349609375, |
|
"logps/pi_response": -249.77072143554688, |
|
"logps/ref_response": -202.05001831054688, |
|
"logps/rejected": -496.70916748046875, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.564284086227417, |
|
"rewards/margins": 0.598731517791748, |
|
"rewards/rejected": -1.163015604019165, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -0.8958919644355774, |
|
"logits/rejected": -0.4801406264305115, |
|
"logps/chosen": -365.9770202636719, |
|
"logps/pi_response": -254.03115844726562, |
|
"logps/ref_response": -202.2326202392578, |
|
"logps/rejected": -469.5523986816406, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6682159900665283, |
|
"rewards/margins": 0.44698458909988403, |
|
"rewards/rejected": -1.1152006387710571, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -0.8831208944320679, |
|
"logits/rejected": -0.5018015503883362, |
|
"logps/chosen": -358.91448974609375, |
|
"logps/pi_response": -286.5412292480469, |
|
"logps/ref_response": -215.90811157226562, |
|
"logps/rejected": -526.8685302734375, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6926028728485107, |
|
"rewards/margins": 0.6388980150222778, |
|
"rewards/rejected": -1.331500768661499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -0.8882027864456177, |
|
"logits/rejected": -0.3719862103462219, |
|
"logps/chosen": -382.29156494140625, |
|
"logps/pi_response": -279.39959716796875, |
|
"logps/ref_response": -194.23623657226562, |
|
"logps/rejected": -500.37554931640625, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8814951181411743, |
|
"rewards/margins": 0.4973115026950836, |
|
"rewards/rejected": -1.3788065910339355, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -0.7373358011245728, |
|
"logits/rejected": -0.3627234399318695, |
|
"logps/chosen": -384.5245666503906, |
|
"logps/pi_response": -285.951171875, |
|
"logps/ref_response": -195.06085205078125, |
|
"logps/rejected": -552.2709350585938, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.024389624595642, |
|
"rewards/margins": 0.5978730916976929, |
|
"rewards/rejected": -1.6222625970840454, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.8074722290039062, |
|
"logits/rejected": -0.4555937349796295, |
|
"logps/chosen": -363.5116271972656, |
|
"logps/pi_response": -276.3110046386719, |
|
"logps/ref_response": -198.39134216308594, |
|
"logps/rejected": -539.1277465820312, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7972265481948853, |
|
"rewards/margins": 0.6627088785171509, |
|
"rewards/rejected": -1.4599354267120361, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -0.7802283763885498, |
|
"logits/rejected": -0.35213881731033325, |
|
"logps/chosen": -357.5544128417969, |
|
"logps/pi_response": -271.56719970703125, |
|
"logps/ref_response": -197.29266357421875, |
|
"logps/rejected": -542.0772094726562, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7836874723434448, |
|
"rewards/margins": 0.6558796167373657, |
|
"rewards/rejected": -1.4395670890808105, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.9855061769485474, |
|
"logits/rejected": -0.5764757990837097, |
|
"logps/chosen": -366.1041564941406, |
|
"logps/pi_response": -283.294921875, |
|
"logps/ref_response": -215.0994873046875, |
|
"logps/rejected": -516.2839965820312, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7308257818222046, |
|
"rewards/margins": 0.46794968843460083, |
|
"rewards/rejected": -1.1987755298614502, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5937920936248587, |
|
"train_runtime": 4171.3796, |
|
"train_samples_per_second": 4.885, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|