{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.997867803837953, "eval_steps": 500, "global_step": 117, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.1666666666666667e-07, "logits/chosen": 0.20978523790836334, "logits/rejected": 0.2752217650413513, "logps/chosen": -404.9998474121094, "logps/rejected": -254.88076782226562, "loss": 0.3365, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.09, "learning_rate": 4.166666666666667e-06, "logits/chosen": 0.10752824693918228, "logits/rejected": 0.2080475240945816, "logps/chosen": -346.3302917480469, "logps/rejected": -298.17047119140625, "loss": 0.3407, "rewards/accuracies": 0.4513888955116272, "rewards/chosen": 0.00015709556464571506, "rewards/margins": 0.00016293837688863277, "rewards/rejected": -5.842794507771032e-06, "step": 10 }, { "epoch": 0.17, "learning_rate": 4.9287250957321685e-06, "logits/chosen": 0.07487143576145172, "logits/rejected": 0.22133192420005798, "logps/chosen": -339.75555419921875, "logps/rejected": -288.63885498046875, "loss": 0.3478, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0009735990315675735, "rewards/margins": 0.0009165561641566455, "rewards/rejected": 5.70427582715638e-05, "step": 20 }, { "epoch": 0.26, "learning_rate": 4.646121984004666e-06, "logits/chosen": 0.16956673562526703, "logits/rejected": 0.27035585045814514, "logps/chosen": -349.0343933105469, "logps/rejected": -280.947265625, "loss": 0.3333, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.0033079744316637516, "rewards/margins": 0.0031072809360921383, "rewards/rejected": 0.00020069361198693514, "step": 30 }, { "epoch": 0.34, "learning_rate": 4.172826515897146e-06, "logits/chosen": 0.06156591325998306, "logits/rejected": 0.23971056938171387, "logps/chosen": -358.6338195800781, "logps/rejected": -291.4596252441406, "loss": 0.3305, "rewards/accuracies": 0.6875, "rewards/chosen": 0.006019611842930317, "rewards/margins": 0.005839194171130657, "rewards/rejected": 0.00018041740986518562, "step": 40 }, { "epoch": 0.43, "learning_rate": 3.5508930707739143e-06, "logits/chosen": 0.17305973172187805, "logits/rejected": 0.33108973503112793, "logps/chosen": -334.7964172363281, "logps/rejected": -278.05926513671875, "loss": 0.3349, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.009936058893799782, "rewards/margins": 0.015141323208808899, "rewards/rejected": -0.005205262452363968, "step": 50 }, { "epoch": 0.51, "learning_rate": 2.835583164544139e-06, "logits/chosen": 0.18460991978645325, "logits/rejected": 0.2829166054725647, "logps/chosen": -343.370849609375, "logps/rejected": -328.1906433105469, "loss": 0.3237, "rewards/accuracies": 0.71875, "rewards/chosen": 0.008376851677894592, "rewards/margins": 0.02010621875524521, "rewards/rejected": -0.011729367077350616, "step": 60 }, { "epoch": 0.6, "learning_rate": 2.090455221462156e-06, "logits/chosen": 0.1229587197303772, "logits/rejected": 0.28873729705810547, "logps/chosen": -368.4198303222656, "logps/rejected": -326.5229797363281, "loss": 0.3276, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.000949523295275867, "rewards/margins": 0.026355501264333725, "rewards/rejected": -0.02730502560734749, "step": 70 }, { "epoch": 0.68, "learning_rate": 1.3817171292109182e-06, "logits/chosen": 0.11349205672740936, "logits/rejected": 0.2876424789428711, "logps/chosen": -344.27825927734375, "logps/rejected": -312.2487487792969, "loss": 0.3122, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.007496008183807135, "rewards/margins": 0.03042692504823208, "rewards/rejected": -0.0379229299724102, "step": 80 }, { "epoch": 0.77, "learning_rate": 7.723433775328385e-07, "logits/chosen": 0.1455320417881012, "logits/rejected": 0.32055655121803284, "logps/chosen": -389.3392028808594, "logps/rejected": -343.0546569824219, "loss": 0.3056, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.01950433850288391, "rewards/margins": 0.045494794845581055, "rewards/rejected": -0.06499912589788437, "step": 90 }, { "epoch": 0.85, "learning_rate": 3.164794984571759e-07, "logits/chosen": 0.12944644689559937, "logits/rejected": 0.2912340462207794, "logps/chosen": -380.64886474609375, "logps/rejected": -371.8363037109375, "loss": 0.3046, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.028714578598737717, "rewards/margins": 0.048634570091962814, "rewards/rejected": -0.07734914869070053, "step": 100 }, { "epoch": 0.94, "learning_rate": 5.463099816548578e-08, "logits/chosen": 0.13937367498874664, "logits/rejected": 0.2031385451555252, "logps/chosen": -384.63287353515625, "logps/rejected": -380.8174743652344, "loss": 0.2819, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.033626072108745575, "rewards/margins": 0.050805091857910156, "rewards/rejected": -0.08443117141723633, "step": 110 }, { "epoch": 1.0, "step": 117, "total_flos": 0.0, "train_loss": 0.3177420940154638, "train_runtime": 3860.0223, "train_samples_per_second": 1.943, "train_steps_per_second": 0.03 } ], "logging_steps": 10, "max_steps": 117, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }