{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9968652037617555, "eval_steps": 500, "global_step": 159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-08, "logits/chosen": -2.8596811294555664, "logits/rejected": -2.878772258758545, "logps/chosen": -377.73638916015625, "logps/rejected": -331.222412109375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.7957372665405273, "logits/rejected": -2.782663583755493, "logps/chosen": -276.7703857421875, "logps/rejected": -262.0076904296875, "loss": 0.6929, "rewards/accuracies": 0.5277777910232544, "rewards/chosen": 0.0007769857766106725, "rewards/margins": 0.0016400194726884365, "rewards/rejected": -0.0008630338124930859, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.8224599361419678, "logits/rejected": -2.819226026535034, "logps/chosen": -257.95306396484375, "logps/rejected": -246.1923065185547, "loss": 0.688, "rewards/accuracies": 0.65625, "rewards/chosen": 0.005768328905105591, "rewards/margins": 0.012648087926208973, "rewards/rejected": -0.006879759021103382, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.817078113555908, "logits/rejected": -2.7680933475494385, "logps/chosen": -256.79595947265625, "logps/rejected": -242.519775390625, "loss": 0.6718, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.023607144132256508, "rewards/margins": 0.046295683830976486, "rewards/rejected": -0.022688541561365128, "step": 30 }, { "epoch": 0.25, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.7658379077911377, "logits/rejected": -2.779132127761841, "logps/chosen": -303.56890869140625, "logps/rejected": -275.5705261230469, "loss": 0.6489, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.03137209266424179, "rewards/margins": 0.10878098011016846, "rewards/rejected": -0.14015306532382965, "step": 40 }, { "epoch": 0.31, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -2.7775702476501465, "logits/rejected": -2.7892866134643555, "logps/chosen": -298.8876953125, "logps/rejected": -303.22705078125, "loss": 0.6336, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12522175908088684, "rewards/margins": 0.1828564703464508, "rewards/rejected": -0.30807822942733765, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.920161866827889e-07, "logits/chosen": -2.729459285736084, "logits/rejected": -2.708754539489746, "logps/chosen": -259.3099060058594, "logps/rejected": -277.4466857910156, "loss": 0.6141, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30067360401153564, "rewards/margins": 0.1461033821105957, "rewards/rejected": -0.44677701592445374, "step": 60 }, { "epoch": 0.44, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -2.742418050765991, "logits/rejected": -2.747220516204834, "logps/chosen": -293.5316467285156, "logps/rejected": -323.1183166503906, "loss": 0.5805, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.404310405254364, "rewards/margins": 0.3498535454273224, "rewards/rejected": -0.754163920879364, "step": 70 }, { "epoch": 0.5, "learning_rate": 2.910060778827554e-07, "logits/chosen": -2.7769148349761963, "logits/rejected": -2.749772071838379, "logps/chosen": -318.1655578613281, "logps/rejected": -307.8402404785156, "loss": 0.602, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4084464907646179, "rewards/margins": 0.3972111642360687, "rewards/rejected": -0.805657684803009, "step": 80 }, { "epoch": 0.56, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -2.724620819091797, "logits/rejected": -2.711826801300049, "logps/chosen": -345.8966064453125, "logps/rejected": -358.5138244628906, "loss": 0.5744, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.48604559898376465, "rewards/margins": 0.4906246066093445, "rewards/rejected": -0.9766701459884644, "step": 90 }, { "epoch": 0.63, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -2.7404863834381104, "logits/rejected": -2.6992406845092773, "logps/chosen": -330.0885314941406, "logps/rejected": -324.7499084472656, "loss": 0.5834, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4633980393409729, "rewards/margins": 0.3974135220050812, "rewards/rejected": -0.860811710357666, "step": 100 }, { "epoch": 0.69, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -2.694833755493164, "logits/rejected": -2.659644365310669, "logps/chosen": -270.2969970703125, "logps/rejected": -323.013916015625, "loss": 0.5582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.448250949382782, "rewards/margins": 0.43962574005126953, "rewards/rejected": -0.8878766894340515, "step": 110 }, { "epoch": 0.75, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.7171640396118164, "logits/rejected": -2.7176883220672607, "logps/chosen": -309.8388671875, "logps/rejected": -334.4871520996094, "loss": 0.5727, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.4930656850337982, "rewards/margins": 0.5099568963050842, "rewards/rejected": -1.00302255153656, "step": 120 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-08, "logits/chosen": -2.6858134269714355, "logits/rejected": -2.667487859725952, "logps/chosen": -363.0616149902344, "logps/rejected": -362.6529846191406, "loss": 0.5814, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.7948101162910461, "rewards/margins": 0.3632834851741791, "rewards/rejected": -1.1580935716629028, "step": 130 }, { "epoch": 0.88, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -2.694016695022583, "logits/rejected": -2.6449930667877197, "logps/chosen": -343.0268859863281, "logps/rejected": -364.78936767578125, "loss": 0.5468, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7075488567352295, "rewards/margins": 0.5151935815811157, "rewards/rejected": -1.2227424383163452, "step": 140 }, { "epoch": 0.94, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -2.6975722312927246, "logits/rejected": -2.6778318881988525, "logps/chosen": -323.04046630859375, "logps/rejected": -371.57696533203125, "loss": 0.5554, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5652332305908203, "rewards/margins": 0.48478445410728455, "rewards/rejected": -1.0500177145004272, "step": 150 }, { "epoch": 1.0, "step": 159, "total_flos": 0.0, "train_loss": 0.6063382550605438, "train_runtime": 2648.5083, "train_samples_per_second": 7.694, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }