{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9915966386554622, "eval_steps": 100, "global_step": 59, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 6.891863950600918, "learning_rate": 8.333333333333333e-08, "logits/chosen": -1.1214768886566162, "logits/rejected": -1.0666239261627197, "logps/chosen": -773.5914306640625, "logps/rejected": -765.6082763671875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.17, "grad_norm": 6.6410695205888075, "learning_rate": 4.930057285201027e-07, "logits/chosen": -1.2806458473205566, "logits/rejected": -0.9247087836265564, "logps/chosen": -503.0013122558594, "logps/rejected": -899.8956298828125, "loss": 0.6907, "rewards/accuracies": 0.5069444179534912, "rewards/chosen": -0.0002252649428555742, "rewards/margins": 0.004411212634295225, "rewards/rejected": -0.004636477679014206, "step": 10 }, { "epoch": 0.34, "grad_norm": 7.3762614729181015, "learning_rate": 4.187457503795526e-07, "logits/chosen": -1.2530821561813354, "logits/rejected": -0.9845311045646667, "logps/chosen": -543.0537109375, "logps/rejected": -933.8472900390625, "loss": 0.6527, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.019722815603017807, "rewards/margins": 0.08204329013824463, "rewards/rejected": -0.10176609456539154, "step": 20 }, { "epoch": 0.5, "grad_norm": 8.929333715687283, "learning_rate": 2.8691164100062034e-07, "logits/chosen": -1.1901506185531616, "logits/rejected": -0.9701334238052368, "logps/chosen": -547.3389892578125, "logps/rejected": -972.1591796875, "loss": 0.5279, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.08818452060222626, "rewards/margins": 0.44495606422424316, "rewards/rejected": -0.5331405997276306, "step": 30 }, { "epoch": 0.67, "grad_norm": 7.370675587057317, "learning_rate": 1.4248369943086995e-07, "logits/chosen": -1.121147632598877, "logits/rejected": -0.9864951372146606, "logps/chosen": -544.5201416015625, "logps/rejected": -1050.002197265625, "loss": 0.4145, "rewards/accuracies": 0.90625, "rewards/chosen": -0.19094732403755188, "rewards/margins": 0.9460043907165527, "rewards/rejected": -1.1369515657424927, "step": 40 }, { "epoch": 0.84, "grad_norm": 6.641314363386866, "learning_rate": 3.473909705816111e-08, "logits/chosen": -1.0754799842834473, "logits/rejected": -1.0215175151824951, "logps/chosen": -568.5784912109375, "logps/rejected": -1094.7982177734375, "loss": 0.3982, "rewards/accuracies": 0.875, "rewards/chosen": -0.3303142786026001, "rewards/margins": 1.3637864589691162, "rewards/rejected": -1.6941007375717163, "step": 50 }, { "epoch": 0.99, "step": 59, "total_flos": 0.0, "train_loss": 0.5082056158680027, "train_runtime": 904.9767, "train_samples_per_second": 4.188, "train_steps_per_second": 0.065 } ], "logging_steps": 10, "max_steps": 59, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }