{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984301412872841, "eval_steps": 100, "global_step": 159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.872462511062622, "logits/rejected": -2.8200550079345703, "logps/chosen": -279.5750732421875, "logps/rejected": -283.5914306640625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.7532310485839844, "logits/rejected": -2.7174949645996094, "logps/chosen": -283.4659423828125, "logps/rejected": -245.05682373046875, "loss": 0.6929, "rewards/accuracies": 0.4652777910232544, "rewards/chosen": -0.00040846053161658347, "rewards/margins": 0.0003327548620291054, "rewards/rejected": -0.0007412154227495193, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.9903533134293035e-06, "logits/chosen": -2.7988767623901367, "logits/rejected": -2.745978832244873, "logps/chosen": -275.3612365722656, "logps/rejected": -259.2863464355469, "loss": 0.6891, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0042456588707864285, "rewards/margins": 0.009360184893012047, "rewards/rejected": -0.005114525556564331, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.882681251368549e-06, "logits/chosen": -2.750326633453369, "logits/rejected": -2.6968815326690674, "logps/chosen": -287.40570068359375, "logps/rejected": -264.3677978515625, "loss": 0.6796, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.019095653668045998, "rewards/margins": 0.02568042278289795, "rewards/rejected": -0.006584773771464825, "step": 30 }, { "epoch": 0.25, "learning_rate": 4.660472094042121e-06, "logits/chosen": -2.753675937652588, "logits/rejected": -2.7094359397888184, "logps/chosen": -269.2148742675781, "logps/rejected": -266.2108459472656, "loss": 0.6689, "rewards/accuracies": 0.625, "rewards/chosen": 0.008650553412735462, "rewards/margins": 0.03987262770533562, "rewards/rejected": -0.031222078949213028, "step": 40 }, { "epoch": 0.31, "learning_rate": 4.33440758555951e-06, "logits/chosen": -2.700374126434326, "logits/rejected": -2.640312910079956, "logps/chosen": -279.00579833984375, "logps/rejected": -280.64959716796875, "loss": 0.657, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.0027772835455834866, "rewards/margins": 0.06927188485860825, "rewards/rejected": -0.07204917818307877, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.92016186682789e-06, "logits/chosen": -2.695119857788086, "logits/rejected": -2.624582529067993, "logps/chosen": -274.4857482910156, "logps/rejected": -262.78082275390625, "loss": 0.6506, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": 0.0005743972724303603, "rewards/margins": 0.09640298783779144, "rewards/rejected": -0.095828577876091, "step": 60 }, { "epoch": 0.44, "learning_rate": 3.437648009023905e-06, "logits/chosen": -2.645156145095825, "logits/rejected": -2.5854098796844482, "logps/chosen": -281.52117919921875, "logps/rejected": -287.34521484375, "loss": 0.6423, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0025214666966348886, "rewards/margins": 0.1480122059583664, "rewards/rejected": -0.15053364634513855, "step": 70 }, { "epoch": 0.5, "learning_rate": 2.9100607788275547e-06, "logits/chosen": -2.600132703781128, "logits/rejected": -2.5598268508911133, "logps/chosen": -278.39520263671875, "logps/rejected": -274.490478515625, "loss": 0.6365, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.08650828897953033, "rewards/margins": 0.12828321754932404, "rewards/rejected": -0.21479150652885437, "step": 80 }, { "epoch": 0.57, "learning_rate": 2.3627616503391813e-06, "logits/chosen": -2.6346802711486816, "logits/rejected": -2.561859130859375, "logps/chosen": -267.5691223144531, "logps/rejected": -296.9002380371094, "loss": 0.625, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.10211894661188126, "rewards/margins": 0.14463460445404053, "rewards/rejected": -0.2467535436153412, "step": 90 }, { "epoch": 0.63, "learning_rate": 1.8220596619089576e-06, "logits/chosen": -2.628725528717041, "logits/rejected": -2.5589451789855957, "logps/chosen": -290.6697082519531, "logps/rejected": -277.4861755371094, "loss": 0.6186, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08957623690366745, "rewards/margins": 0.20158691704273224, "rewards/rejected": -0.2911631464958191, "step": 100 }, { "epoch": 0.63, "eval_logits/chosen": -2.6340699195861816, "eval_logits/rejected": -2.550675868988037, "eval_logps/chosen": -293.948974609375, "eval_logps/rejected": -284.51849365234375, "eval_loss": 0.627379298210144, "eval_rewards/accuracies": 0.6800000071525574, "eval_rewards/chosen": -0.09909208118915558, "eval_rewards/margins": 0.17093417048454285, "eval_rewards/rejected": -0.27002623677253723, "eval_runtime": 384.4479, "eval_samples_per_second": 5.202, "eval_steps_per_second": 0.65, "step": 100 }, { "epoch": 0.69, "learning_rate": 1.3139467229135999e-06, "logits/chosen": -2.6164746284484863, "logits/rejected": -2.556546688079834, "logps/chosen": -296.1142272949219, "logps/rejected": -295.5721740722656, "loss": 0.6274, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0782129243016243, "rewards/margins": 0.19299447536468506, "rewards/rejected": -0.27120739221572876, "step": 110 }, { "epoch": 0.75, "learning_rate": 8.628481651367876e-07, "logits/chosen": -2.619702100753784, "logits/rejected": -2.5185346603393555, "logps/chosen": -305.4084167480469, "logps/rejected": -269.8453063964844, "loss": 0.6209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11033356189727783, "rewards/margins": 0.20669062435626984, "rewards/rejected": -0.3170241713523865, "step": 120 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-07, "logits/chosen": -2.639065742492676, "logits/rejected": -2.5192837715148926, "logps/chosen": -312.64453125, "logps/rejected": -290.57025146484375, "loss": 0.6221, "rewards/accuracies": 0.6875, "rewards/chosen": -0.15254992246627808, "rewards/margins": 0.21097925305366516, "rewards/rejected": -0.36352917551994324, "step": 130 }, { "epoch": 0.88, "learning_rate": 2.1464952759020857e-07, "logits/chosen": -2.5287649631500244, "logits/rejected": -2.4844765663146973, "logps/chosen": -285.4928283691406, "logps/rejected": -300.5223693847656, "loss": 0.6143, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -0.14462201297283173, "rewards/margins": 0.2161392718553543, "rewards/rejected": -0.3607613146305084, "step": 140 }, { "epoch": 0.94, "learning_rate": 4.870879364444109e-08, "logits/chosen": -2.6168649196624756, "logits/rejected": -2.5214154720306396, "logps/chosen": -299.5126037597656, "logps/rejected": -298.0356750488281, "loss": 0.6145, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1232418566942215, "rewards/margins": 0.22777679562568665, "rewards/rejected": -0.3510186970233917, "step": 150 }, { "epoch": 1.0, "step": 159, "total_flos": 0.0, "train_loss": 0.6436299775381508, "train_runtime": 7284.0023, "train_samples_per_second": 2.798, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }