{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.971563981042654, "eval_steps": 100, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018957345971563982, "grad_norm": 384.97491646147324, "learning_rate": 4.545454545454545e-08, "logits/chosen": -11.400373458862305, "logits/rejected": -11.167098045349121, "logps/chosen": -1579.2471923828125, "logps/rejected": -1833.805419921875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1895734597156398, "grad_norm": 285.06639638114245, "learning_rate": 4.545454545454545e-07, "logits/chosen": -14.52730941772461, "logits/rejected": -14.906502723693848, "logps/chosen": -1777.32421875, "logps/rejected": -1881.382568359375, "loss": 0.6985, "rewards/accuracies": 0.4236111044883728, "rewards/chosen": -0.15817444026470184, "rewards/margins": 0.02842862159013748, "rewards/rejected": -0.18660305440425873, "step": 10 }, { "epoch": 0.3791469194312796, "grad_norm": 534.5020309330747, "learning_rate": 4.885348141000122e-07, "logits/chosen": -21.795948028564453, "logits/rejected": -23.514450073242188, "logps/chosen": -1682.7659912109375, "logps/rejected": -1746.1217041015625, "loss": 0.7332, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.4811238646507263, "rewards/margins": 0.043054938316345215, "rewards/rejected": -0.5241788625717163, "step": 20 }, { "epoch": 0.5687203791469194, "grad_norm": 425.21287499734404, "learning_rate": 4.5025027361734613e-07, "logits/chosen": -7.340817451477051, "logits/rejected": -15.593961715698242, "logps/chosen": -1770.065673828125, "logps/rejected": -1880.151611328125, "loss": 0.8071, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.468301385641098, "rewards/margins": 0.7533053159713745, "rewards/rejected": -1.2216066122055054, "step": 30 }, { "epoch": 0.7582938388625592, "grad_norm": 712.3590680432743, "learning_rate": 3.893311157806091e-07, "logits/chosen": -14.5736665725708, "logits/rejected": -23.62198829650879, "logps/chosen": -1539.815185546875, "logps/rejected": -1462.7039794921875, "loss": 0.9612, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.467960357666016, "rewards/margins": -0.5169845819473267, "rewards/rejected": -3.950974941253662, "step": 40 }, { "epoch": 0.9478672985781991, "grad_norm": 881.241885579057, "learning_rate": 3.126631330646801e-07, "logits/chosen": -9.282499313354492, "logits/rejected": -10.507891654968262, "logps/chosen": -2434.360595703125, "logps/rejected": -2703.219970703125, "loss": 0.7979, "rewards/accuracies": 0.65625, "rewards/chosen": 0.584517240524292, "rewards/margins": 0.6922141313552856, "rewards/rejected": -0.1076967716217041, "step": 50 }, { "epoch": 1.1374407582938388, "grad_norm": 429.8468773274208, "learning_rate": 2.2891223348923882e-07, "logits/chosen": -6.557864189147949, "logits/rejected": -9.822066307067871, "logps/chosen": -1984.517578125, "logps/rejected": -1996.927490234375, "loss": 0.5975, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 1.916027307510376, "rewards/margins": 1.0445150136947632, "rewards/rejected": 0.871512234210968, "step": 60 }, { "epoch": 1.3270142180094786, "grad_norm": 175.85699006716078, "learning_rate": 1.4754491880085317e-07, "logits/chosen": -7.904175758361816, "logits/rejected": -13.800127983093262, "logps/chosen": -1871.2291259765625, "logps/rejected": -1999.8929443359375, "loss": 0.4226, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.032440781593322754, "rewards/margins": 1.4170843362808228, "rewards/rejected": -1.4495251178741455, "step": 70 }, { "epoch": 1.5165876777251186, "grad_norm": 231.31575551946057, "learning_rate": 7.775827023107834e-08, "logits/chosen": -12.700660705566406, "logits/rejected": -10.377889633178711, "logps/chosen": -1366.731689453125, "logps/rejected": -1466.466552734375, "loss": 0.3894, "rewards/accuracies": 0.856249988079071, "rewards/chosen": 0.11536725610494614, "rewards/margins": 1.3466222286224365, "rewards/rejected": -1.2312551736831665, "step": 80 }, { "epoch": 1.7061611374407581, "grad_norm": 230.19784681381057, "learning_rate": 2.7440387297912122e-08, "logits/chosen": -10.58217716217041, "logits/rejected": -11.754631996154785, "logps/chosen": -1986.6451416015625, "logps/rejected": -2165.16162109375, "loss": 0.3852, "rewards/accuracies": 0.90625, "rewards/chosen": -0.339664489030838, "rewards/margins": 2.0723910331726074, "rewards/rejected": -2.412055492401123, "step": 90 }, { "epoch": 1.8957345971563981, "grad_norm": 302.98036373926305, "learning_rate": 2.27878296044029e-09, "logits/chosen": -8.607019424438477, "logits/rejected": -15.033491134643555, "logps/chosen": -2083.50048828125, "logps/rejected": -2193.92431640625, "loss": 0.4114, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.0890653133392334, "rewards/margins": 1.4980413913726807, "rewards/rejected": -1.5871065855026245, "step": 100 }, { "epoch": 1.8957345971563981, "eval_logits/chosen": -18.000696182250977, "eval_logits/rejected": -25.16254234313965, "eval_logps/chosen": -1530.4515380859375, "eval_logps/rejected": -1648.5675048828125, "eval_loss": 0.8002049326896667, "eval_rewards/accuracies": 0.7604166865348816, "eval_rewards/chosen": -0.46603381633758545, "eval_rewards/margins": 0.8467853665351868, "eval_rewards/rejected": -1.3128191232681274, "eval_runtime": 36.1276, "eval_samples_per_second": 20.76, "eval_steps_per_second": 0.664, "step": 100 }, { "epoch": 1.971563981042654, "step": 104, "total_flos": 0.0, "train_loss": 0.6178501087885636, "train_runtime": 1142.0913, "train_samples_per_second": 11.82, "train_steps_per_second": 0.091 } ], "logging_steps": 10, "max_steps": 104, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }