{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9921671018276762, "eval_steps": 100, "global_step": 95, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "logits/chosen": 0.8826487064361572, "logits/rejected": 0.921362042427063, "logps/chosen": -36.58121871948242, "logps/rejected": -54.902320861816406, "loss": 2500.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1, "learning_rate": 5e-06, "logits/chosen": 0.8914464116096497, "logits/rejected": 0.8742997050285339, "logps/chosen": -87.8135986328125, "logps/rejected": -96.38023376464844, "loss": 2502.7986, "rewards/accuracies": 0.2152777761220932, "rewards/chosen": -0.00011578062549233437, "rewards/margins": -2.257831147289835e-05, "rewards/rejected": -9.320233948528767e-05, "step": 10 }, { "epoch": 0.21, "learning_rate": 4.83118057351089e-06, "logits/chosen": 0.7811827659606934, "logits/rejected": 0.8477146029472351, "logps/chosen": -91.7614517211914, "logps/rejected": -85.11201477050781, "loss": 2501.6135, "rewards/accuracies": 0.25, "rewards/chosen": -3.986436786362901e-05, "rewards/margins": -0.0002826174022629857, "rewards/rejected": 0.00024275311443489045, "step": 20 }, { "epoch": 0.31, "learning_rate": 4.3475222930516484e-06, "logits/chosen": 0.8648529052734375, "logits/rejected": 0.8481088876724243, "logps/chosen": -85.29056549072266, "logps/rejected": -78.01969146728516, "loss": 2497.0559, "rewards/accuracies": 0.2750000059604645, "rewards/chosen": 0.0002869653981178999, "rewards/margins": 0.00010402966290712357, "rewards/rejected": 0.00018293573521077633, "step": 30 }, { "epoch": 0.42, "learning_rate": 3.6143458894413463e-06, "logits/chosen": 0.7654550075531006, "logits/rejected": 0.8423868417739868, "logps/chosen": -122.76900482177734, "logps/rejected": -108.79447174072266, "loss": 2500.084, "rewards/accuracies": 0.28125, "rewards/chosen": -0.00036479695700109005, "rewards/margins": -0.0001517109922133386, "rewards/rejected": -0.0002130859502358362, "step": 40 }, { "epoch": 0.52, "learning_rate": 2.730670898658255e-06, "logits/chosen": 0.8405305743217468, "logits/rejected": 0.869471549987793, "logps/chosen": -65.78740692138672, "logps/rejected": -71.66648864746094, "loss": 2500.4, "rewards/accuracies": 0.21875, "rewards/chosen": -0.00032314873533323407, "rewards/margins": -0.00028933738940395415, "rewards/rejected": -3.381132773938589e-05, "step": 50 }, { "epoch": 0.63, "learning_rate": 1.8158425248197931e-06, "logits/chosen": 0.8164768218994141, "logits/rejected": 0.8862431645393372, "logps/chosen": -117.6532974243164, "logps/rejected": -110.94734191894531, "loss": 2499.3979, "rewards/accuracies": 0.26875001192092896, "rewards/chosen": -0.0002028214803431183, "rewards/margins": -4.244589217705652e-05, "rewards/rejected": -0.0001603755954420194, "step": 60 }, { "epoch": 0.73, "learning_rate": 9.934134090518593e-07, "logits/chosen": 0.8050645589828491, "logits/rejected": 0.8623224496841431, "logps/chosen": -82.68321990966797, "logps/rejected": -83.93122863769531, "loss": 2500.1014, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.00020013593893963844, "rewards/margins": 3.215682227164507e-05, "rewards/rejected": -0.00023229271755553782, "step": 70 }, { "epoch": 0.84, "learning_rate": 3.7445716067596506e-07, "logits/chosen": 0.76527339220047, "logits/rejected": 0.8293226957321167, "logps/chosen": -105.8263931274414, "logps/rejected": -104.97891998291016, "loss": 2496.7385, "rewards/accuracies": 0.2750000059604645, "rewards/chosen": -0.00012336719373706728, "rewards/margins": -3.703986237724166e-07, "rewards/rejected": -0.00012299678928684443, "step": 80 }, { "epoch": 0.94, "learning_rate": 4.256725079024554e-08, "logits/chosen": 0.8350859880447388, "logits/rejected": 0.855826199054718, "logps/chosen": -77.66065979003906, "logps/rejected": -90.00727081298828, "loss": 2495.8914, "rewards/accuracies": 0.3125, "rewards/chosen": 0.00030836346559226513, "rewards/margins": 0.0007544533582404256, "rewards/rejected": -0.00044608983444049954, "step": 90 }, { "epoch": 0.99, "step": 95, "total_flos": 0.0, "train_loss": 2499.3611225328946, "train_runtime": 1147.2033, "train_samples_per_second": 5.329, "train_steps_per_second": 0.083 } ], "logging_steps": 10, "max_steps": 95, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }