{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 100, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.846153846153847e-07, "logits/chosen": 0.4176342189311981, "logits/rejected": 0.3709704279899597, "logps/chosen": -211.92245483398438, "logps/rejected": -192.94674682617188, "loss": 0.0102, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.16, "learning_rate": 3.846153846153847e-06, "logits/chosen": 0.11221448332071304, "logits/rejected": 0.2316199392080307, "logps/chosen": -163.23516845703125, "logps/rejected": -129.18174743652344, "loss": 0.0101, "rewards/accuracies": 0.4097222089767456, "rewards/chosen": 0.002231667749583721, "rewards/margins": 0.0012476635165512562, "rewards/rejected": 0.000984004233032465, "step": 10 }, { "epoch": 0.32, "learning_rate": 4.951096619903317e-06, "logits/chosen": 0.11361704021692276, "logits/rejected": 0.05081893131136894, "logps/chosen": -177.82867431640625, "logps/rejected": -143.34634399414062, "loss": 0.0103, "rewards/accuracies": 0.40625, "rewards/chosen": -0.0008938458049669862, "rewards/margins": 0.00015756110951770097, "rewards/rejected": -0.0010514067253097892, "step": 20 }, { "epoch": 0.48, "learning_rate": 4.716164218065246e-06, "logits/chosen": 0.1756063550710678, "logits/rejected": 0.23870661854743958, "logps/chosen": -182.95594787597656, "logps/rejected": -149.51669311523438, "loss": 0.0104, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": 0.0036168531514704227, "rewards/margins": 8.136676478898153e-05, "rewards/rejected": 0.0035354860592633486, "step": 30 }, { "epoch": 0.64, "learning_rate": 4.3048902348863116e-06, "logits/chosen": 0.23890891671180725, "logits/rejected": 0.15510627627372742, "logps/chosen": -198.42312622070312, "logps/rejected": -165.66856384277344, "loss": 0.0103, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.000657144351862371, "rewards/margins": 0.0006801211275160313, "rewards/rejected": -0.0013372651301324368, "step": 40 }, { "epoch": 0.8, "learning_rate": 3.7500000000000005e-06, "logits/chosen": 0.0918453186750412, "logits/rejected": 0.22310352325439453, "logps/chosen": -167.31259155273438, "logps/rejected": -152.1429443359375, "loss": 0.0103, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0004377079603727907, "rewards/margins": 0.00018744784756563604, "rewards/rejected": -0.0006251559825614095, "step": 50 }, { "epoch": 0.96, "learning_rate": 3.0956464785579125e-06, "logits/chosen": 0.17539598047733307, "logits/rejected": 0.15246877074241638, "logps/chosen": -172.65084838867188, "logps/rejected": -149.18576049804688, "loss": 0.0102, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.00030969156068749726, "rewards/margins": 0.0009550409158691764, "rewards/rejected": -0.0006453495589084923, "step": 60 }, { "epoch": 1.12, "learning_rate": 2.39389699200963e-06, "logits/chosen": 0.2547686994075775, "logits/rejected": 0.22828814387321472, "logps/chosen": -180.34805297851562, "logps/rejected": -161.528076171875, "loss": 0.0105, "rewards/accuracies": 0.375, "rewards/chosen": -0.0031430914532393217, "rewards/margins": -0.0006838366389274597, "rewards/rejected": -0.002459254814311862, "step": 70 }, { "epoch": 1.28, "learning_rate": 1.700590188571887e-06, "logits/chosen": 0.24805791676044464, "logits/rejected": 0.14473767578601837, "logps/chosen": -168.27664184570312, "logps/rejected": -145.95330810546875, "loss": 0.0107, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.001294322544708848, "rewards/margins": -0.0014985360903665423, "rewards/rejected": 0.00020421366207301617, "step": 80 }, { "epoch": 1.44, "learning_rate": 1.0708929268538034e-06, "logits/chosen": 0.21019116044044495, "logits/rejected": 0.0877661406993866, "logps/chosen": -181.98239135742188, "logps/rejected": -144.48709106445312, "loss": 0.0103, "rewards/accuracies": 0.375, "rewards/chosen": -0.0010133858304470778, "rewards/margins": 0.00020684293122030795, "rewards/rejected": -0.0012202286161482334, "step": 90 }, { "epoch": 1.6, "learning_rate": 5.549106142039018e-07, "logits/chosen": 0.17486083507537842, "logits/rejected": 0.10187114775180817, "logps/chosen": -171.99929809570312, "logps/rejected": -156.14585876464844, "loss": 0.0104, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.00023761784541420639, "rewards/margins": -0.00010095858306158334, "rewards/rejected": -0.00013665916048921645, "step": 100 }, { "epoch": 1.6, "eval_logits/chosen": -0.010001667775213718, "eval_logits/rejected": 0.08886812627315521, "eval_logps/chosen": -306.4288635253906, "eval_logps/rejected": -278.7423095703125, "eval_loss": 0.01069465558975935, "eval_rewards/accuracies": 0.5145000219345093, "eval_rewards/chosen": -0.00046658667270094156, "eval_rewards/margins": 0.00048252198030240834, "eval_rewards/rejected": -0.0009491087403148413, "eval_runtime": 710.6081, "eval_samples_per_second": 2.814, "eval_steps_per_second": 0.704, "step": 100 }, { "epoch": 1.76, "learning_rate": 1.937002879188285e-07, "logits/chosen": 0.17259174585342407, "logits/rejected": 0.22864675521850586, "logps/chosen": -194.3363494873047, "logps/rejected": -159.630126953125, "loss": 0.0099, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00021430714696180075, "rewards/margins": 0.0019922677893191576, "rewards/rejected": -0.002206574659794569, "step": 110 }, { "epoch": 1.92, "learning_rate": 1.6003680950742728e-08, "logits/chosen": 0.14167846739292145, "logits/rejected": 0.09132746607065201, "logps/chosen": -186.17977905273438, "logps/rejected": -150.71856689453125, "loss": 0.0102, "rewards/accuracies": 0.46875, "rewards/chosen": -0.0018472172087058425, "rewards/margins": 0.0006628064438700676, "rewards/rejected": -0.002510023768991232, "step": 120 }, { "epoch": 1.98, "step": 124, "total_flos": 0.0, "train_loss": 0.010306676939850854, "train_runtime": 2255.3236, "train_samples_per_second": 0.887, "train_steps_per_second": 0.055 } ], "logging_steps": 10, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }