{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 100, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.846153846153847e-07, "logits/chosen": 0.33759647607803345, "logits/rejected": 0.28577902913093567, "logps/chosen": -217.51107788085938, "logps/rejected": -154.6298065185547, "loss": 0.0102, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.16, "learning_rate": 3.846153846153847e-06, "logits/chosen": 0.23491337895393372, "logits/rejected": 0.185601606965065, "logps/chosen": -189.5899200439453, "logps/rejected": -162.4447784423828, "loss": 0.0106, "rewards/accuracies": 0.3333333432674408, "rewards/chosen": -0.0008325728122144938, "rewards/margins": -0.0011730334954336286, "rewards/rejected": 0.0003404606832191348, "step": 10 }, { "epoch": 0.32, "learning_rate": 4.951096619903317e-06, "logits/chosen": 0.11001434177160263, "logits/rejected": 0.06706250458955765, "logps/chosen": -170.3766632080078, "logps/rejected": -145.70187377929688, "loss": 0.0102, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0003622249641921371, "rewards/margins": 0.0007274674135260284, "rewards/rejected": -0.000365242303814739, "step": 20 }, { "epoch": 0.48, "learning_rate": 4.716164218065246e-06, "logits/chosen": 0.15306313335895538, "logits/rejected": 0.14907678961753845, "logps/chosen": -184.689453125, "logps/rejected": -146.67767333984375, "loss": 0.01, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.0019506055396050215, "rewards/margins": 0.001728715025819838, "rewards/rejected": 0.0002218906593043357, "step": 30 }, { "epoch": 0.64, "learning_rate": 4.3048902348863116e-06, "logits/chosen": 0.11253954470157623, "logits/rejected": 0.2042957991361618, "logps/chosen": -182.73019409179688, "logps/rejected": -147.92068481445312, "loss": 0.0104, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": 0.0024931146763265133, "rewards/margins": -0.0002410466258879751, "rewards/rejected": 0.002734161214902997, "step": 40 }, { "epoch": 0.8, "learning_rate": 3.7500000000000005e-06, "logits/chosen": 0.26959556341171265, "logits/rejected": 0.17199485003948212, "logps/chosen": -196.38694763183594, "logps/rejected": -161.71762084960938, "loss": 0.0102, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": 0.00301692564971745, "rewards/margins": 0.0006210061255842447, "rewards/rejected": 0.0023959192913025618, "step": 50 }, { "epoch": 0.96, "learning_rate": 3.0956464785579125e-06, "logits/chosen": 0.19740340113639832, "logits/rejected": 0.1762491911649704, "logps/chosen": -182.07997131347656, "logps/rejected": -150.88436889648438, "loss": 0.0105, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.00115151796489954, "rewards/margins": -0.0006008323980495334, "rewards/rejected": -0.0005506856250576675, "step": 60 }, { "epoch": 1.12, "learning_rate": 2.39389699200963e-06, "logits/chosen": 0.11103002727031708, "logits/rejected": 0.17598359286785126, "logps/chosen": -203.237548828125, "logps/rejected": -173.33811950683594, "loss": 0.0104, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": 0.001459635910578072, "rewards/margins": -0.0005474772187881172, "rewards/rejected": 0.0020071130711585283, "step": 70 }, { "epoch": 1.28, "learning_rate": 1.700590188571887e-06, "logits/chosen": 0.13080066442489624, "logits/rejected": 0.154659703373909, "logps/chosen": -193.30853271484375, "logps/rejected": -161.02102661132812, "loss": 0.0102, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0017246768111363053, "rewards/margins": 0.0007789047667756677, "rewards/rejected": 0.0009457715786993504, "step": 80 }, { "epoch": 1.44, "learning_rate": 1.0708929268538034e-06, "logits/chosen": 0.10300946235656738, "logits/rejected": 0.11627539247274399, "logps/chosen": -184.31507873535156, "logps/rejected": -151.22093200683594, "loss": 0.0101, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": 0.0029378323815762997, "rewards/margins": 0.0010877925669774413, "rewards/rejected": 0.0018500399310141802, "step": 90 }, { "epoch": 1.6, "learning_rate": 5.549106142039018e-07, "logits/chosen": 0.17798453569412231, "logits/rejected": 0.1302061527967453, "logps/chosen": -176.29644775390625, "logps/rejected": -149.06594848632812, "loss": 0.0105, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.0017444228287786245, "rewards/margins": -0.0010295301908627152, "rewards/rejected": 0.0027739531360566616, "step": 100 }, { "epoch": 1.6, "eval_logits/chosen": -0.002386125735938549, "eval_logits/rejected": 0.09546130150556564, "eval_logps/chosen": -306.2984924316406, "eval_logps/rejected": -278.5430908203125, "eval_loss": 0.010826661251485348, "eval_rewards/accuracies": 0.4945000112056732, "eval_rewards/chosen": 0.0006116966251283884, "eval_rewards/margins": -0.00013227059389464557, "eval_rewards/rejected": 0.0007439671317115426, "eval_runtime": 433.1922, "eval_samples_per_second": 4.617, "eval_steps_per_second": 1.154, "step": 100 }, { "epoch": 1.76, "learning_rate": 1.937002879188285e-07, "logits/chosen": 0.13854524493217468, "logits/rejected": 0.09755973517894745, "logps/chosen": -194.58871459960938, "logps/rejected": -159.81277465820312, "loss": 0.0105, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": 0.0010774679249152541, "rewards/margins": -0.0008498074603267014, "rewards/rejected": 0.0019272754434496164, "step": 110 }, { "epoch": 1.92, "learning_rate": 1.6003680950742728e-08, "logits/chosen": 0.18497225642204285, "logits/rejected": 0.25526371598243713, "logps/chosen": -182.73492431640625, "logps/rejected": -146.8502197265625, "loss": 0.0103, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": 0.0014311736449599266, "rewards/margins": 0.00013411189138423651, "rewards/rejected": 0.001297061680816114, "step": 120 }, { "epoch": 1.98, "step": 124, "total_flos": 0.0, "train_loss": 0.010321829197627882, "train_runtime": 1406.1718, "train_samples_per_second": 1.422, "train_steps_per_second": 0.088 } ], "logging_steps": 10, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }