{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9968652037617555, "eval_steps": 500, "global_step": 159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-08, "logits/chosen": -2.843719482421875, "logits/rejected": -2.8718748092651367, "logps/chosen": -340.24505615234375, "logps/pi_response": -88.81813049316406, "logps/ref_response": -88.81813049316406, "logps/rejected": -141.60296630859375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.775027275085449, "logits/rejected": -2.7582144737243652, "logps/chosen": -225.69427490234375, "logps/pi_response": -73.32665252685547, "logps/ref_response": -73.0557632446289, "logps/rejected": -127.79769134521484, "loss": 0.6918, "rewards/accuracies": 0.5486111044883728, "rewards/chosen": 0.0009001567959785461, "rewards/margins": 0.0027927933260798454, "rewards/rejected": -0.0018926364136859775, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.7578864097595215, "logits/rejected": -2.7449183464050293, "logps/chosen": -216.22109985351562, "logps/pi_response": -79.10755920410156, "logps/ref_response": -73.02125549316406, "logps/rejected": -117.88703918457031, "loss": 0.6655, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.005766990128904581, "rewards/margins": 0.042582910507917404, "rewards/rejected": -0.03681592270731926, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.636462688446045, "logits/rejected": -2.6021816730499268, "logps/chosen": -240.2003936767578, "logps/pi_response": -100.06871032714844, "logps/ref_response": -68.24726867675781, "logps/rejected": -125.3204345703125, "loss": 0.6227, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08825792372226715, "rewards/margins": 0.159133642911911, "rewards/rejected": -0.24739158153533936, "step": 30 }, { "epoch": 0.25, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.5370068550109863, "logits/rejected": -2.5128865242004395, "logps/chosen": -271.98651123046875, "logps/pi_response": -144.71682739257812, "logps/ref_response": -75.6551742553711, "logps/rejected": -181.0801544189453, "loss": 0.5839, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23095576465129852, "rewards/margins": 0.38431650400161743, "rewards/rejected": -0.6152722239494324, "step": 40 }, { "epoch": 0.31, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -2.5748300552368164, "logits/rejected": -2.5561716556549072, "logps/chosen": -276.7567138671875, "logps/pi_response": -160.7132110595703, "logps/ref_response": -71.23826599121094, "logps/rejected": -182.82472229003906, "loss": 0.5987, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3602275252342224, "rewards/margins": 0.41353344917297363, "rewards/rejected": -0.7737610340118408, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.920161866827889e-07, "logits/chosen": -2.5654828548431396, "logits/rejected": -2.519744634628296, "logps/chosen": -221.7403106689453, "logps/pi_response": -161.64913940429688, "logps/ref_response": -67.74427795410156, "logps/rejected": -205.1297607421875, "loss": 0.5637, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.35707658529281616, "rewards/margins": 0.4391842782497406, "rewards/rejected": -0.7962608337402344, "step": 60 }, { "epoch": 0.44, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -2.5978214740753174, "logits/rejected": -2.557990550994873, "logps/chosen": -270.705810546875, "logps/pi_response": -191.92599487304688, "logps/ref_response": -73.56436157226562, "logps/rejected": -205.6122589111328, "loss": 0.5327, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.4569978713989258, "rewards/margins": 0.58502596616745, "rewards/rejected": -1.0420238971710205, "step": 70 }, { "epoch": 0.5, "learning_rate": 2.910060778827554e-07, "logits/chosen": -2.6471352577209473, "logits/rejected": -2.6175568103790283, "logps/chosen": -273.21337890625, "logps/pi_response": -190.89492797851562, "logps/ref_response": -77.31620025634766, "logps/rejected": -209.2509307861328, "loss": 0.5247, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4046536982059479, "rewards/margins": 0.6309649348258972, "rewards/rejected": -1.035618543624878, "step": 80 }, { "epoch": 0.56, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -2.5935592651367188, "logits/rejected": -2.5323500633239746, "logps/chosen": -294.3185119628906, "logps/pi_response": -193.82952880859375, "logps/ref_response": -72.20295715332031, "logps/rejected": -214.9334716796875, "loss": 0.5214, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4483931064605713, "rewards/margins": 0.6660177707672119, "rewards/rejected": -1.1144107580184937, "step": 90 }, { "epoch": 0.63, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -2.584444522857666, "logits/rejected": -2.538541078567505, "logps/chosen": -310.3904113769531, "logps/pi_response": -218.18429565429688, "logps/ref_response": -74.52151489257812, "logps/rejected": -236.4603271484375, "loss": 0.524, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.604050874710083, "rewards/margins": 0.6805473566055298, "rewards/rejected": -1.2845981121063232, "step": 100 }, { "epoch": 0.69, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -2.508607864379883, "logits/rejected": -2.4556093215942383, "logps/chosen": -275.2364501953125, "logps/pi_response": -222.62771606445312, "logps/ref_response": -63.37713623046875, "logps/rejected": -252.931640625, "loss": 0.4936, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8243808746337891, "rewards/margins": 0.6452677845954895, "rewards/rejected": -1.4696485996246338, "step": 110 }, { "epoch": 0.75, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.523491621017456, "logits/rejected": -2.471564292907715, "logps/chosen": -280.9041748046875, "logps/pi_response": -221.3182373046875, "logps/ref_response": -66.3117446899414, "logps/rejected": -240.2959442138672, "loss": 0.483, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5750901699066162, "rewards/margins": 0.953150749206543, "rewards/rejected": -1.5282410383224487, "step": 120 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-08, "logits/chosen": -2.496025800704956, "logits/rejected": -2.4560306072235107, "logps/chosen": -316.5722961425781, "logps/pi_response": -236.9508056640625, "logps/ref_response": -76.07049560546875, "logps/rejected": -261.31170654296875, "loss": 0.4892, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6082834601402283, "rewards/margins": 0.9130091667175293, "rewards/rejected": -1.5212925672531128, "step": 130 }, { "epoch": 0.88, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -2.502040147781372, "logits/rejected": -2.4415221214294434, "logps/chosen": -303.0337219238281, "logps/pi_response": -231.24246215820312, "logps/ref_response": -78.19743347167969, "logps/rejected": -247.8349151611328, "loss": 0.4841, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.6394041776657104, "rewards/margins": 0.7987918257713318, "rewards/rejected": -1.4381959438323975, "step": 140 }, { "epoch": 0.94, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -2.5210301876068115, "logits/rejected": -2.4584171772003174, "logps/chosen": -301.7514343261719, "logps/pi_response": -243.6031494140625, "logps/ref_response": -77.64109802246094, "logps/rejected": -281.96185302734375, "loss": 0.489, "rewards/accuracies": 0.75, "rewards/chosen": -0.6782695055007935, "rewards/margins": 0.8968443870544434, "rewards/rejected": -1.5751138925552368, "step": 150 }, { "epoch": 1.0, "step": 159, "total_flos": 0.0, "train_loss": 0.5467530106598476, "train_runtime": 4156.701, "train_samples_per_second": 4.902, "train_steps_per_second": 0.038 } ], "logging_steps": 10, "max_steps": 159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }