{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 1.3333333333333333e-05, "logits/chosen": -54.362911224365234, "logits/rejected": -50.76668930053711, "logps/chosen": -1319.19677734375, "logps/rejected": -33.71879577636719, "loss": 0.6892, "rewards/accuracies": 0.40909090638160706, "rewards/chosen": 0.008085771463811398, "rewards/margins": 0.007936443202197552, "rewards/rejected": 0.0001493280433351174, "step": 11 }, { "epoch": 0.37, "learning_rate": 2.9811320754716983e-05, "logits/chosen": -54.190120697021484, "logits/rejected": -52.07868957519531, "logps/chosen": -1108.5838623046875, "logps/rejected": -34.123477935791016, "loss": 0.6329, "rewards/accuracies": 0.8636363744735718, "rewards/chosen": 0.12481586635112762, "rewards/margins": 0.1290697455406189, "rewards/rejected": -0.004253899212926626, "step": 22 }, { "epoch": 0.56, "learning_rate": 2.7924528301886794e-05, "logits/chosen": -55.231224060058594, "logits/rejected": -52.04852294921875, "logps/chosen": -879.3148803710938, "logps/rejected": -34.018951416015625, "loss": 0.5471, "rewards/accuracies": 1.0, "rewards/chosen": 0.3259323835372925, "rewards/margins": 0.3459864854812622, "rewards/rejected": -0.020054107531905174, "step": 33 }, { "epoch": 0.75, "learning_rate": 2.5849056603773585e-05, "logits/chosen": -54.642879486083984, "logits/rejected": -50.72136306762695, "logps/chosen": -1260.130615234375, "logps/rejected": -32.81765365600586, "loss": 0.3731, "rewards/accuracies": 1.0, "rewards/chosen": 0.9432396292686462, "rewards/margins": 0.9746879935264587, "rewards/rejected": -0.03144851326942444, "step": 44 }, { "epoch": 0.93, "learning_rate": 2.377358490566038e-05, "logits/chosen": -54.97840881347656, "logits/rejected": -52.43299865722656, "logps/chosen": -1015.5433959960938, "logps/rejected": -34.17496109008789, "loss": 0.3539, "rewards/accuracies": 1.0, "rewards/chosen": 1.0532668828964233, "rewards/margins": 1.1037672758102417, "rewards/rejected": -0.0505005344748497, "step": 55 }, { "epoch": 1.12, "learning_rate": 2.169811320754717e-05, "logits/chosen": -55.37051773071289, "logits/rejected": -52.23636245727539, "logps/chosen": -1029.4456787109375, "logps/rejected": -34.25499725341797, "loss": 0.2602, "rewards/accuracies": 1.0, "rewards/chosen": 1.5273464918136597, "rewards/margins": 1.6002427339553833, "rewards/rejected": -0.07289613038301468, "step": 66 }, { "epoch": 1.31, "learning_rate": 1.9622641509433963e-05, "logits/chosen": -55.24615478515625, "logits/rejected": -51.99899673461914, "logps/chosen": -1143.59716796875, "logps/rejected": -35.575958251953125, "loss": 0.2115, "rewards/accuracies": 1.0, "rewards/chosen": 2.0695841312408447, "rewards/margins": 2.1615474224090576, "rewards/rejected": -0.09196347743272781, "step": 77 }, { "epoch": 1.49, "learning_rate": 1.7547169811320753e-05, "logits/chosen": -55.43460464477539, "logits/rejected": -50.45402908325195, "logps/chosen": -1283.4525146484375, "logps/rejected": -34.708351135253906, "loss": 0.1256, "rewards/accuracies": 1.0, "rewards/chosen": 2.9994964599609375, "rewards/margins": 3.1131112575531006, "rewards/rejected": -0.11361455917358398, "step": 88 }, { "epoch": 1.68, "learning_rate": 1.5471698113207547e-05, "logits/chosen": -55.35545349121094, "logits/rejected": -52.1571044921875, "logps/chosen": -875.93359375, "logps/rejected": -33.44974136352539, "loss": 0.1917, "rewards/accuracies": 1.0, "rewards/chosen": 2.1976325511932373, "rewards/margins": 2.317676544189453, "rewards/rejected": -0.12004398554563522, "step": 99 }, { "epoch": 1.86, "learning_rate": 1.339622641509434e-05, "logits/chosen": -55.0767936706543, "logits/rejected": -51.604347229003906, "logps/chosen": -1365.019775390625, "logps/rejected": -35.252044677734375, "loss": 0.1243, "rewards/accuracies": 1.0, "rewards/chosen": 3.4024481773376465, "rewards/margins": 3.554830312728882, "rewards/rejected": -0.15238191187381744, "step": 110 }, { "epoch": 2.05, "learning_rate": 1.1320754716981132e-05, "logits/chosen": -56.40107727050781, "logits/rejected": -54.048580169677734, "logps/chosen": -858.4847412109375, "logps/rejected": -34.9173698425293, "loss": 0.1969, "rewards/accuracies": 1.0, "rewards/chosen": 2.170722484588623, "rewards/margins": 2.324540138244629, "rewards/rejected": -0.15381723642349243, "step": 121 }, { "epoch": 2.24, "learning_rate": 9.245283018867924e-06, "logits/chosen": -55.104923248291016, "logits/rejected": -52.74159240722656, "logps/chosen": -809.7835693359375, "logps/rejected": -35.847755432128906, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": 2.388211727142334, "rewards/margins": 2.551792860031128, "rewards/rejected": -0.16358119249343872, "step": 132 }, { "epoch": 2.42, "learning_rate": 7.169811320754717e-06, "logits/chosen": -55.185218811035156, "logits/rejected": -52.399497985839844, "logps/chosen": -959.4953002929688, "logps/rejected": -34.56352615356445, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": 2.7163705825805664, "rewards/margins": 2.8906729221343994, "rewards/rejected": -0.1743021458387375, "step": 143 }, { "epoch": 2.61, "learning_rate": 5.094339622641509e-06, "logits/chosen": -53.87266540527344, "logits/rejected": -51.906978607177734, "logps/chosen": -1213.17529296875, "logps/rejected": -34.25074768066406, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": 3.6375014781951904, "rewards/margins": 3.8169896602630615, "rewards/rejected": -0.17948788404464722, "step": 154 }, { "epoch": 2.8, "learning_rate": 3.018867924528302e-06, "logits/chosen": -55.073856353759766, "logits/rejected": -50.98554611206055, "logps/chosen": -1360.9793701171875, "logps/rejected": -36.670528411865234, "loss": 0.1152, "rewards/accuracies": 1.0, "rewards/chosen": 3.9220426082611084, "rewards/margins": 4.106159687042236, "rewards/rejected": -0.18411725759506226, "step": 165 }, { "epoch": 2.98, "learning_rate": 9.433962264150943e-07, "logits/chosen": -55.68719482421875, "logits/rejected": -54.916015625, "logps/chosen": -787.702880859375, "logps/rejected": -35.69668960571289, "loss": 0.159, "rewards/accuracies": 1.0, "rewards/chosen": 2.3595094680786133, "rewards/margins": 2.553260564804077, "rewards/rejected": -0.19375087320804596, "step": 176 } ], "logging_steps": 11, "max_steps": 177, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }