{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9874476987447699, "eval_steps": 500, "global_step": 59, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 8.333333333333333e-08, "logits/chosen": -2.809058427810669, "logits/rejected": -2.8124935626983643, "logps/chosen": -318.11346435546875, "logps/rejected": -229.77012634277344, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.17, "learning_rate": 4.930057285201027e-07, "logits/chosen": -2.7690165042877197, "logits/rejected": -2.7547106742858887, "logps/chosen": -277.6565246582031, "logps/rejected": -264.4493408203125, "loss": 0.6916, "rewards/accuracies": 0.4791666567325592, "rewards/chosen": 0.003391754813492298, "rewards/margins": 0.002254640683531761, "rewards/rejected": 0.0011371138971298933, "step": 10 }, { "epoch": 0.33, "learning_rate": 4.187457503795526e-07, "logits/chosen": -2.79248046875, "logits/rejected": -2.789917230606079, "logps/chosen": -264.5072326660156, "logps/rejected": -252.63845825195312, "loss": 0.6752, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.026929911226034164, "rewards/margins": 0.03922198340296745, "rewards/rejected": -0.012292074970901012, "step": 20 }, { "epoch": 0.5, "learning_rate": 2.8691164100062034e-07, "logits/chosen": -2.7963314056396484, "logits/rejected": -2.796976089477539, "logps/chosen": -298.3793640136719, "logps/rejected": -256.82110595703125, "loss": 0.6504, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.021613802760839462, "rewards/margins": 0.11929114162921906, "rewards/rejected": -0.14090493321418762, "step": 30 }, { "epoch": 0.67, "learning_rate": 1.4248369943086995e-07, "logits/chosen": -2.7643628120422363, "logits/rejected": -2.7510218620300293, "logps/chosen": -265.435302734375, "logps/rejected": -256.5634765625, "loss": 0.6363, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1126113086938858, "rewards/margins": 0.15844564139842987, "rewards/rejected": -0.2710569500923157, "step": 40 }, { "epoch": 0.84, "learning_rate": 3.473909705816111e-08, "logits/chosen": -2.76975679397583, "logits/rejected": -2.7606043815612793, "logps/chosen": -280.33734130859375, "logps/rejected": -287.0810241699219, "loss": 0.6219, "rewards/accuracies": 0.640625, "rewards/chosen": -0.20978038012981415, "rewards/margins": 0.129803866147995, "rewards/rejected": -0.33958423137664795, "step": 50 }, { "epoch": 0.99, "step": 59, "total_flos": 0.0, "train_loss": 0.6501501697604939, "train_runtime": 1924.715, "train_samples_per_second": 7.94, "train_steps_per_second": 0.031 } ], "logging_steps": 10, "max_steps": 59, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }