{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 189, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 15.546492141333783, "learning_rate": 2.6315789473684208e-08, "logits/chosen": -1.4921875, "logits/rejected": -1.3828125, "logps/chosen": -83.0, "logps/rejected": -108.5, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15873015873015872, "grad_norm": 14.774196452895438, "learning_rate": 2.631578947368421e-07, "logits/chosen": -1.2734375, "logits/rejected": -1.1953125, "logps/chosen": -107.5, "logps/rejected": -90.0, "loss": 0.6918, "rewards/accuracies": 0.2222222238779068, "rewards/chosen": -0.0020751953125, "rewards/margins": -0.00139617919921875, "rewards/rejected": -0.00069427490234375, "step": 10 }, { "epoch": 0.31746031746031744, "grad_norm": 14.3389141291959, "learning_rate": 4.970588235294118e-07, "logits/chosen": -1.28125, "logits/rejected": -1.2578125, "logps/chosen": -87.0, "logps/rejected": -102.5, "loss": 0.6842, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.0015411376953125, "rewards/margins": 0.021240234375, "rewards/rejected": -0.019775390625, "step": 20 }, { "epoch": 0.47619047619047616, "grad_norm": 14.661136683478352, "learning_rate": 4.676470588235294e-07, "logits/chosen": -1.3125, "logits/rejected": -1.21875, "logps/chosen": -94.5, "logps/rejected": -100.0, "loss": 0.6687, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.036865234375, "rewards/margins": 0.07470703125, "rewards/rejected": -0.037841796875, "step": 30 }, { "epoch": 0.6349206349206349, "grad_norm": 15.043075717803145, "learning_rate": 4.38235294117647e-07, "logits/chosen": -1.3359375, "logits/rejected": -1.4140625, "logps/chosen": -94.0, "logps/rejected": -112.5, "loss": 0.6159, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.06005859375, "rewards/margins": 0.1669921875, "rewards/rejected": -0.10693359375, "step": 40 }, { "epoch": 0.7936507936507936, "grad_norm": 20.286017694579517, "learning_rate": 4.0882352941176465e-07, "logits/chosen": -1.359375, "logits/rejected": -1.3515625, "logps/chosen": -103.0, "logps/rejected": -115.0, "loss": 0.6568, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.00506591796875, "rewards/margins": 0.2099609375, "rewards/rejected": -0.2158203125, "step": 50 }, { "epoch": 0.9523809523809523, "grad_norm": 15.604577240602536, "learning_rate": 3.7941176470588235e-07, "logits/chosen": -1.3984375, "logits/rejected": -1.421875, "logps/chosen": -98.0, "logps/rejected": -117.0, "loss": 0.6149, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0322265625, "rewards/margins": 0.1865234375, "rewards/rejected": -0.154296875, "step": 60 }, { "epoch": 1.0, "eval_logits/chosen": -1.3359375, "eval_logits/rejected": -1.3046875, "eval_logps/chosen": -91.0, "eval_logps/rejected": -111.5, "eval_loss": 0.6164844036102295, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": 0.007598876953125, "eval_rewards/margins": 0.224609375, "eval_rewards/rejected": -0.2177734375, "eval_runtime": 12.239, "eval_samples_per_second": 16.341, "eval_steps_per_second": 0.572, "step": 63 }, { "epoch": 1.1111111111111112, "grad_norm": 13.062522185911641, "learning_rate": 3.5e-07, "logits/chosen": -1.375, "logits/rejected": -1.375, "logps/chosen": -93.0, "logps/rejected": -108.0, "loss": 0.5617, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1533203125, "rewards/margins": 0.375, "rewards/rejected": -0.220703125, "step": 70 }, { "epoch": 1.2698412698412698, "grad_norm": 12.793133064546527, "learning_rate": 3.205882352941177e-07, "logits/chosen": -1.2421875, "logits/rejected": -1.359375, "logps/chosen": -82.5, "logps/rejected": -112.5, "loss": 0.5127, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1455078125, "rewards/margins": 0.515625, "rewards/rejected": -0.37109375, "step": 80 }, { "epoch": 1.4285714285714286, "grad_norm": 13.067935081505256, "learning_rate": 2.911764705882353e-07, "logits/chosen": -1.484375, "logits/rejected": -1.3203125, "logps/chosen": -100.0, "logps/rejected": -98.0, "loss": 0.5069, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.283203125, "rewards/margins": 0.5390625, "rewards/rejected": -0.2578125, "step": 90 }, { "epoch": 1.5873015873015874, "grad_norm": 15.317610443936815, "learning_rate": 2.6176470588235295e-07, "logits/chosen": -1.3671875, "logits/rejected": -1.375, "logps/chosen": -90.0, "logps/rejected": -111.0, "loss": 0.4966, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.2353515625, "rewards/margins": 0.5546875, "rewards/rejected": -0.318359375, "step": 100 }, { "epoch": 1.746031746031746, "grad_norm": 12.447441760307086, "learning_rate": 2.323529411764706e-07, "logits/chosen": -1.4296875, "logits/rejected": -1.296875, "logps/chosen": -97.0, "logps/rejected": -115.0, "loss": 0.4934, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.1435546875, "rewards/margins": 0.56640625, "rewards/rejected": -0.423828125, "step": 110 }, { "epoch": 1.9047619047619047, "grad_norm": 13.19848987425151, "learning_rate": 2.0294117647058823e-07, "logits/chosen": -1.34375, "logits/rejected": -1.34375, "logps/chosen": -94.0, "logps/rejected": -117.5, "loss": 0.4805, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.10205078125, "rewards/margins": 0.60546875, "rewards/rejected": -0.50390625, "step": 120 }, { "epoch": 2.0, "eval_logits/chosen": -1.34375, "eval_logits/rejected": -1.328125, "eval_logps/chosen": -92.0, "eval_logps/rejected": -113.0, "eval_loss": 0.5889843702316284, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.0751953125, "eval_rewards/margins": 0.31640625, "eval_rewards/rejected": -0.392578125, "eval_runtime": 14.7924, "eval_samples_per_second": 13.52, "eval_steps_per_second": 0.473, "step": 126 }, { "epoch": 2.0634920634920633, "grad_norm": 10.98673276740895, "learning_rate": 1.7352941176470587e-07, "logits/chosen": -1.390625, "logits/rejected": -1.3984375, "logps/chosen": -101.0, "logps/rejected": -116.0, "loss": 0.4562, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.193359375, "rewards/margins": 0.67578125, "rewards/rejected": -0.482421875, "step": 130 }, { "epoch": 2.2222222222222223, "grad_norm": 9.416416295176981, "learning_rate": 1.441176470588235e-07, "logits/chosen": -1.4140625, "logits/rejected": -1.3828125, "logps/chosen": -100.5, "logps/rejected": -107.5, "loss": 0.4104, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.2099609375, "rewards/margins": 0.76171875, "rewards/rejected": -0.55078125, "step": 140 }, { "epoch": 2.380952380952381, "grad_norm": 11.858508121730686, "learning_rate": 1.1470588235294116e-07, "logits/chosen": -1.3671875, "logits/rejected": -1.3515625, "logps/chosen": -93.5, "logps/rejected": -113.5, "loss": 0.413, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2177734375, "rewards/margins": 0.7265625, "rewards/rejected": -0.51171875, "step": 150 }, { "epoch": 2.5396825396825395, "grad_norm": 9.556697117699082, "learning_rate": 8.529411764705883e-08, "logits/chosen": -1.296875, "logits/rejected": -1.4296875, "logps/chosen": -90.5, "logps/rejected": -108.5, "loss": 0.4203, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1337890625, "rewards/margins": 0.82421875, "rewards/rejected": -0.69140625, "step": 160 }, { "epoch": 2.6984126984126986, "grad_norm": 10.065550572065934, "learning_rate": 5.5882352941176474e-08, "logits/chosen": -1.3671875, "logits/rejected": -1.3671875, "logps/chosen": -98.5, "logps/rejected": -109.5, "loss": 0.4442, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.09814453125, "rewards/margins": 0.734375, "rewards/rejected": -0.63671875, "step": 170 }, { "epoch": 2.857142857142857, "grad_norm": 11.316830865980743, "learning_rate": 2.6470588235294116e-08, "logits/chosen": -1.3515625, "logits/rejected": -1.34375, "logps/chosen": -102.0, "logps/rejected": -104.5, "loss": 0.4264, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.10498046875, "rewards/margins": 0.6328125, "rewards/rejected": -0.52734375, "step": 180 }, { "epoch": 3.0, "eval_logits/chosen": -1.3515625, "eval_logits/rejected": -1.3359375, "eval_logps/chosen": -92.5, "eval_logps/rejected": -114.0, "eval_loss": 0.5811718702316284, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.1357421875, "eval_rewards/margins": 0.34765625, "eval_rewards/rejected": -0.484375, "eval_runtime": 14.7749, "eval_samples_per_second": 13.537, "eval_steps_per_second": 0.474, "step": 189 }, { "epoch": 3.0, "step": 189, "total_flos": 0.0, "train_loss": 0.5234840029761905, "train_runtime": 1680.5719, "train_samples_per_second": 3.57, "train_steps_per_second": 0.112 } ], "logging_steps": 10, "max_steps": 189, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }