{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.971563981042654, "eval_steps": 128, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018957345971563982, "grad_norm": 66.10237426067714, "learning_rate": 4.545454545454545e-08, "logits/chosen": 117.53560638427734, "logits/rejected": 126.8960952758789, "logps/chosen": -335.40118408203125, "logps/rejected": -439.16552734375, "loss": 0.5, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1895734597156398, "grad_norm": 65.8363279797723, "learning_rate": 4.545454545454545e-07, "logits/chosen": 135.0050811767578, "logits/rejected": 138.34999084472656, "logps/chosen": -396.04180908203125, "logps/rejected": -440.06195068359375, "loss": 0.4978, "rewards/accuracies": 0.4583333432674408, "rewards/chosen": -0.0022641660179942846, "rewards/margins": 0.03446006774902344, "rewards/rejected": -0.03672423213720322, "step": 10 }, { "epoch": 0.3791469194312796, "grad_norm": 30.020665129557617, "learning_rate": 4.885348141000122e-07, "logits/chosen": 123.14253234863281, "logits/rejected": 126.7535629272461, "logps/chosen": -354.2257995605469, "logps/rejected": -407.9169006347656, "loss": 0.4121, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.9093106985092163, "rewards/margins": 0.3055870234966278, "rewards/rejected": 0.6037237644195557, "step": 20 }, { "epoch": 0.5687203791469194, "grad_norm": 24.190229582222074, "learning_rate": 4.5025027361734613e-07, "logits/chosen": 145.62486267089844, "logits/rejected": 138.91897583007812, "logps/chosen": -376.835205078125, "logps/rejected": -432.81298828125, "loss": 0.3514, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.7815009355545044, "rewards/margins": 1.4220384359359741, "rewards/rejected": -0.640537440776825, "step": 30 }, { "epoch": 0.7582938388625592, "grad_norm": 24.559258380924415, "learning_rate": 3.893311157806091e-07, "logits/chosen": 134.8649139404297, "logits/rejected": 123.98270416259766, "logps/chosen": -324.2713317871094, "logps/rejected": -366.3738098144531, "loss": 0.3442, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 1.4961439371109009, "rewards/margins": 1.9210717678070068, "rewards/rejected": -0.42492780089378357, "step": 40 }, { "epoch": 0.9478672985781991, "grad_norm": 25.998652882986566, "learning_rate": 3.126631330646801e-07, "logits/chosen": 153.93051147460938, "logits/rejected": 157.34010314941406, "logps/chosen": -383.385009765625, "logps/rejected": -484.5369567871094, "loss": 0.3087, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 1.7879406213760376, "rewards/margins": 2.1984703540802, "rewards/rejected": -0.4105294644832611, "step": 50 }, { "epoch": 1.1374407582938388, "grad_norm": 20.98032293493931, "learning_rate": 2.2891223348923882e-07, "logits/chosen": 145.45932006835938, "logits/rejected": 149.6053466796875, "logps/chosen": -359.0306396484375, "logps/rejected": -456.782470703125, "loss": 0.257, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 1.9487521648406982, "rewards/margins": 2.969599723815918, "rewards/rejected": -1.0208473205566406, "step": 60 }, { "epoch": 1.3270142180094786, "grad_norm": 19.65205364182723, "learning_rate": 1.4754491880085317e-07, "logits/chosen": 140.3638458251953, "logits/rejected": 141.1572265625, "logps/chosen": -328.19830322265625, "logps/rejected": -429.80487060546875, "loss": 0.2359, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 1.9472763538360596, "rewards/margins": 2.761829137802124, "rewards/rejected": -0.8145527839660645, "step": 70 }, { "epoch": 1.5165876777251186, "grad_norm": 19.33483383354362, "learning_rate": 7.775827023107834e-08, "logits/chosen": 127.70585632324219, "logits/rejected": 143.24069213867188, "logps/chosen": -311.0325622558594, "logps/rejected": -434.9661560058594, "loss": 0.2084, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 2.130922794342041, "rewards/margins": 3.2173447608947754, "rewards/rejected": -1.0864222049713135, "step": 80 }, { "epoch": 1.7061611374407581, "grad_norm": 21.62644101836202, "learning_rate": 2.7440387297912122e-08, "logits/chosen": 128.0491180419922, "logits/rejected": 140.35018920898438, "logps/chosen": -341.66192626953125, "logps/rejected": -459.8998107910156, "loss": 0.2046, "rewards/accuracies": 0.84375, "rewards/chosen": 2.238457202911377, "rewards/margins": 3.3493576049804688, "rewards/rejected": -1.110900640487671, "step": 90 }, { "epoch": 1.8957345971563981, "grad_norm": 19.399744221303617, "learning_rate": 2.27878296044029e-09, "logits/chosen": 134.3742218017578, "logits/rejected": 134.44503784179688, "logps/chosen": -334.2799377441406, "logps/rejected": -428.43621826171875, "loss": 0.1949, "rewards/accuracies": 0.84375, "rewards/chosen": 2.384758472442627, "rewards/margins": 2.9683709144592285, "rewards/rejected": -0.5836124420166016, "step": 100 }, { "epoch": 1.971563981042654, "step": 104, "total_flos": 0.0, "train_loss": 0.29816230271871275, "train_runtime": 2165.9381, "train_samples_per_second": 6.233, "train_steps_per_second": 0.048 } ], "logging_steps": 10, "max_steps": 104, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }