{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 207, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 1.5714285714285715e-05, "logits/chosen": -1.823734998703003, "logits/rejected": -1.96222984790802, "logps/chosen": -984.5184936523438, "logps/rejected": -29.066242218017578, "loss": 0.7963, "rewards/accuracies": 0.6538461446762085, "rewards/chosen": 0.2930363118648529, "rewards/margins": 0.29948848485946655, "rewards/rejected": -0.00645211897790432, "step": 13 }, { "epoch": 0.38, "learning_rate": 2.9516129032258067e-05, "logits/chosen": -1.7756704092025757, "logits/rejected": -1.890375018119812, "logps/chosen": -1008.671630859375, "logps/rejected": -30.05452537536621, "loss": 0.5675, "rewards/accuracies": 0.6538461446762085, "rewards/chosen": 0.6647549867630005, "rewards/margins": 0.7006121277809143, "rewards/rejected": -0.03585716709494591, "step": 26 }, { "epoch": 0.57, "learning_rate": 2.7419354838709678e-05, "logits/chosen": -1.837444543838501, "logits/rejected": -1.8552197217941284, "logps/chosen": -1084.7537841796875, "logps/rejected": -30.599714279174805, "loss": 0.4647, "rewards/accuracies": 0.7692307829856873, "rewards/chosen": 0.9935499429702759, "rewards/margins": 1.160874366760254, "rewards/rejected": -0.16732460260391235, "step": 39 }, { "epoch": 0.75, "learning_rate": 2.532258064516129e-05, "logits/chosen": -1.745394229888916, "logits/rejected": -1.8828259706497192, "logps/chosen": -1103.2149658203125, "logps/rejected": -32.83525085449219, "loss": 0.2193, "rewards/accuracies": 0.8846153616905212, "rewards/chosen": 2.401637077331543, "rewards/margins": 2.809011697769165, "rewards/rejected": -0.4073745608329773, "step": 52 }, { "epoch": 0.94, "learning_rate": 2.3225806451612902e-05, "logits/chosen": -1.6953773498535156, "logits/rejected": -2.03174090385437, "logps/chosen": -1086.2177734375, "logps/rejected": -34.20427703857422, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": 2.713472843170166, "rewards/margins": 3.305988073348999, "rewards/rejected": -0.5925151109695435, "step": 65 }, { "epoch": 1.13, "learning_rate": 2.1129032258064516e-05, "logits/chosen": -1.7250920534133911, "logits/rejected": -1.886851191520691, "logps/chosen": -902.3397827148438, "logps/rejected": -36.79640579223633, "loss": 0.0972, "rewards/accuracies": 1.0, "rewards/chosen": 2.591458797454834, "rewards/margins": 3.4001564979553223, "rewards/rejected": -0.8086973428726196, "step": 78 }, { "epoch": 1.32, "learning_rate": 1.903225806451613e-05, "logits/chosen": -1.6640688180923462, "logits/rejected": -1.9599171876907349, "logps/chosen": -1034.3873291015625, "logps/rejected": -38.65880584716797, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 3.461390495300293, "rewards/margins": 4.427910327911377, "rewards/rejected": -0.9665195941925049, "step": 91 }, { "epoch": 1.51, "learning_rate": 1.6935483870967744e-05, "logits/chosen": -1.6940295696258545, "logits/rejected": -1.9844238758087158, "logps/chosen": -1174.732666015625, "logps/rejected": -41.705257415771484, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": 3.3263399600982666, "rewards/margins": 4.630356788635254, "rewards/rejected": -1.3040169477462769, "step": 104 }, { "epoch": 1.7, "learning_rate": 1.4838709677419355e-05, "logits/chosen": -1.6856719255447388, "logits/rejected": -1.8793022632598877, "logps/chosen": -1046.614990234375, "logps/rejected": -43.94160842895508, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": 3.614682912826538, "rewards/margins": 5.117927074432373, "rewards/rejected": -1.5032439231872559, "step": 117 }, { "epoch": 1.88, "learning_rate": 1.274193548387097e-05, "logits/chosen": -1.7377840280532837, "logits/rejected": -1.8570376634597778, "logps/chosen": -1106.663330078125, "logps/rejected": -47.238887786865234, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 3.449557304382324, "rewards/margins": 5.24083137512207, "rewards/rejected": -1.7912741899490356, "step": 130 }, { "epoch": 2.07, "learning_rate": 1.0645161290322582e-05, "logits/chosen": -1.7412984371185303, "logits/rejected": -1.9490795135498047, "logps/chosen": -1117.510009765625, "logps/rejected": -47.68777084350586, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 3.141476631164551, "rewards/margins": 5.044860363006592, "rewards/rejected": -1.903383731842041, "step": 143 }, { "epoch": 2.26, "learning_rate": 8.548387096774194e-06, "logits/chosen": -1.6916511058807373, "logits/rejected": -1.9522241353988647, "logps/chosen": -1196.82861328125, "logps/rejected": -49.64944076538086, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 3.8758084774017334, "rewards/margins": 5.988270282745361, "rewards/rejected": -2.1124606132507324, "step": 156 }, { "epoch": 2.45, "learning_rate": 6.451612903225806e-06, "logits/chosen": -1.760750651359558, "logits/rejected": -1.925395131111145, "logps/chosen": -701.3285522460938, "logps/rejected": -50.479835510253906, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 2.707566261291504, "rewards/margins": 4.906687259674072, "rewards/rejected": -2.1991212368011475, "step": 169 }, { "epoch": 2.64, "learning_rate": 4.35483870967742e-06, "logits/chosen": -1.6971558332443237, "logits/rejected": -1.9400659799575806, "logps/chosen": -959.2064208984375, "logps/rejected": -51.388999938964844, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 3.8825502395629883, "rewards/margins": 6.139618873596191, "rewards/rejected": -2.2570688724517822, "step": 182 }, { "epoch": 2.83, "learning_rate": 2.2580645161290324e-06, "logits/chosen": -1.7328979969024658, "logits/rejected": -2.0168874263763428, "logps/chosen": -926.796875, "logps/rejected": -52.40264129638672, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 3.548464059829712, "rewards/margins": 5.864409446716309, "rewards/rejected": -2.3159451484680176, "step": 195 } ], "logging_steps": 13, "max_steps": 207, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }