| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9945, |
| "eval_steps": 500, |
| "global_step": 153, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 8.605272045068777, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -2.8784992694854736, |
| "logits/rejected": -2.8769874572753906, |
| "logps/chosen": -263.9749755859375, |
| "logps/pi_response": -246.19029235839844, |
| "logps/ref_response": -246.19029235839844, |
| "logps/rejected": -308.2843322753906, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 8.688961504116353, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -2.936194896697998, |
| "logits/rejected": -2.808932304382324, |
| "logps/chosen": -315.6687927246094, |
| "logps/pi_response": -209.20472717285156, |
| "logps/ref_response": -209.1347198486328, |
| "logps/rejected": -260.7985534667969, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.4829059839248657, |
| "rewards/chosen": 0.00021380360703915358, |
| "rewards/margins": 0.0008379952632822096, |
| "rewards/rejected": -0.0006241916562430561, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 8.833821950128216, |
| "learning_rate": 4.989490450759331e-07, |
| "logits/chosen": -2.852677583694458, |
| "logits/rejected": -2.8317201137542725, |
| "logps/chosen": -274.80267333984375, |
| "logps/pi_response": -189.35801696777344, |
| "logps/ref_response": -187.89822387695312, |
| "logps/rejected": -261.1772766113281, |
| "loss": 0.6876, |
| "rewards/accuracies": 0.6846153736114502, |
| "rewards/chosen": -0.006662360858172178, |
| "rewards/margins": 0.012640128843486309, |
| "rewards/rejected": -0.0193024892359972, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 9.337463091325768, |
| "learning_rate": 4.872270441827174e-07, |
| "logits/chosen": -2.8088669776916504, |
| "logits/rejected": -2.791938304901123, |
| "logps/chosen": -275.8798828125, |
| "logps/pi_response": -215.20196533203125, |
| "logps/ref_response": -213.5146484375, |
| "logps/rejected": -255.837890625, |
| "loss": 0.6691, |
| "rewards/accuracies": 0.7153846025466919, |
| "rewards/chosen": 0.021489957347512245, |
| "rewards/margins": 0.061311714351177216, |
| "rewards/rejected": -0.03982176259160042, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 12.278601194362231, |
| "learning_rate": 4.6308512113530063e-07, |
| "logits/chosen": -2.7728219032287598, |
| "logits/rejected": -2.690376043319702, |
| "logps/chosen": -278.7479553222656, |
| "logps/pi_response": -190.5654296875, |
| "logps/ref_response": -177.33053588867188, |
| "logps/rejected": -246.11264038085938, |
| "loss": 0.6443, |
| "rewards/accuracies": 0.6730769276618958, |
| "rewards/chosen": -0.05301598832011223, |
| "rewards/margins": 0.10135015100240707, |
| "rewards/rejected": -0.154366135597229, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.472310357744927, |
| "learning_rate": 4.277872161641681e-07, |
| "logits/chosen": -2.792513608932495, |
| "logits/rejected": -2.7258100509643555, |
| "logps/chosen": -291.08642578125, |
| "logps/pi_response": -217.5293426513672, |
| "logps/ref_response": -194.07823181152344, |
| "logps/rejected": -272.1592102050781, |
| "loss": 0.619, |
| "rewards/accuracies": 0.6692307591438293, |
| "rewards/chosen": -0.05969160422682762, |
| "rewards/margins": 0.2206883430480957, |
| "rewards/rejected": -0.2803799510002136, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 12.018657879137255, |
| "learning_rate": 3.8318133624280046e-07, |
| "logits/chosen": -2.7461166381835938, |
| "logits/rejected": -2.6338207721710205, |
| "logps/chosen": -268.39324951171875, |
| "logps/pi_response": -218.18861389160156, |
| "logps/ref_response": -193.3256072998047, |
| "logps/rejected": -277.92572021484375, |
| "loss": 0.611, |
| "rewards/accuracies": 0.7153846025466919, |
| "rewards/chosen": -0.13407574594020844, |
| "rewards/margins": 0.21902315318584442, |
| "rewards/rejected": -0.35309889912605286, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 12.586943156361984, |
| "learning_rate": 3.316028034595861e-07, |
| "logits/chosen": -2.702092170715332, |
| "logits/rejected": -2.648845672607422, |
| "logps/chosen": -275.9683532714844, |
| "logps/pi_response": -199.25994873046875, |
| "logps/ref_response": -183.3825225830078, |
| "logps/rejected": -281.8118896484375, |
| "loss": 0.6125, |
| "rewards/accuracies": 0.6692307591438293, |
| "rewards/chosen": -0.07608187198638916, |
| "rewards/margins": 0.22089019417762756, |
| "rewards/rejected": -0.2969720661640167, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.52, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 16.37677218967305, |
| "learning_rate": 2.7575199021178855e-07, |
| "logits/chosen": -2.654991388320923, |
| "logits/rejected": -2.581737756729126, |
| "logps/chosen": -314.16900634765625, |
| "logps/pi_response": -250.20211791992188, |
| "logps/ref_response": -203.31488037109375, |
| "logps/rejected": -308.69287109375, |
| "loss": 0.5967, |
| "rewards/accuracies": 0.6730769276618958, |
| "rewards/chosen": -0.329426109790802, |
| "rewards/margins": 0.27185821533203125, |
| "rewards/rejected": -0.6012843251228333, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 22.869287847796812, |
| "learning_rate": 2.1855294234408068e-07, |
| "logits/chosen": -2.4663641452789307, |
| "logits/rejected": -2.1920886039733887, |
| "logps/chosen": -374.7882385253906, |
| "logps/pi_response": -300.48028564453125, |
| "logps/ref_response": -229.24087524414062, |
| "logps/rejected": -370.0035400390625, |
| "loss": 0.573, |
| "rewards/accuracies": 0.7153846025466919, |
| "rewards/chosen": -0.42087164521217346, |
| "rewards/margins": 0.46370625495910645, |
| "rewards/rejected": -0.8845779299736023, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.65, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 22.10929439369714, |
| "learning_rate": 1.6300029195778453e-07, |
| "logits/chosen": -2.2815823554992676, |
| "logits/rejected": -1.9420466423034668, |
| "logps/chosen": -328.23388671875, |
| "logps/pi_response": -285.6993408203125, |
| "logps/ref_response": -202.154541015625, |
| "logps/rejected": -346.80718994140625, |
| "loss": 0.5648, |
| "rewards/accuracies": 0.6653845906257629, |
| "rewards/chosen": -0.5251672863960266, |
| "rewards/margins": 0.47274622321128845, |
| "rewards/rejected": -0.9979135394096375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.71, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 20.95321740793465, |
| "learning_rate": 1.1200247470632392e-07, |
| "logits/chosen": -2.211641311645508, |
| "logits/rejected": -1.855459451675415, |
| "logps/chosen": -360.8876953125, |
| "logps/pi_response": -303.0977783203125, |
| "logps/ref_response": -215.0885009765625, |
| "logps/rejected": -370.98193359375, |
| "loss": 0.563, |
| "rewards/accuracies": 0.7423076629638672, |
| "rewards/chosen": -0.47680747509002686, |
| "rewards/margins": 0.5583351850509644, |
| "rewards/rejected": -1.0351426601409912, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.78, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 21.53917118957897, |
| "learning_rate": 6.822945986946385e-08, |
| "logits/chosen": -1.8491864204406738, |
| "logits/rejected": -1.6956101655960083, |
| "logps/chosen": -344.0650939941406, |
| "logps/pi_response": -307.6352844238281, |
| "logps/ref_response": -204.07801818847656, |
| "logps/rejected": -390.5289001464844, |
| "loss": 0.5501, |
| "rewards/accuracies": 0.6884615421295166, |
| "rewards/chosen": -0.6317132711410522, |
| "rewards/margins": 0.5379453301429749, |
| "rewards/rejected": -1.1696586608886719, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.84, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 27.579199953433918, |
| "learning_rate": 3.397296523427806e-08, |
| "logits/chosen": -1.9508247375488281, |
| "logits/rejected": -1.6159588098526, |
| "logps/chosen": -333.64599609375, |
| "logps/pi_response": -301.04547119140625, |
| "logps/ref_response": -194.1094207763672, |
| "logps/rejected": -385.6200256347656, |
| "loss": 0.5332, |
| "rewards/accuracies": 0.7038461565971375, |
| "rewards/chosen": -0.6621810793876648, |
| "rewards/margins": 0.6021292805671692, |
| "rewards/rejected": -1.264310359954834, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.91, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 26.751074987142594, |
| "learning_rate": 1.1026475173977978e-08, |
| "logits/chosen": -1.9854283332824707, |
| "logits/rejected": -1.720418930053711, |
| "logps/chosen": -328.14459228515625, |
| "logps/pi_response": -306.83367919921875, |
| "logps/ref_response": -197.67745971679688, |
| "logps/rejected": -386.437255859375, |
| "loss": 0.5515, |
| "rewards/accuracies": 0.7038461565971375, |
| "rewards/chosen": -0.6694343686103821, |
| "rewards/margins": 0.5615480542182922, |
| "rewards/rejected": -1.2309825420379639, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.97, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 27.667206944684594, |
| "learning_rate": 5.913435276374834e-10, |
| "logits/chosen": -1.9676626920700073, |
| "logits/rejected": -1.6368684768676758, |
| "logps/chosen": -339.4317321777344, |
| "logps/pi_response": -303.0911560058594, |
| "logps/ref_response": -192.59991455078125, |
| "logps/rejected": -375.9079895019531, |
| "loss": 0.5599, |
| "rewards/accuracies": 0.7192307710647583, |
| "rewards/chosen": -0.6414641737937927, |
| "rewards/margins": 0.5702866315841675, |
| "rewards/rejected": -1.2117507457733154, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.99, |
| "step": 153, |
| "total_flos": 0.0, |
| "train_loss": 0.5998621676482406, |
| "train_runtime": 41019.2972, |
| "train_samples_per_second": 0.488, |
| "train_steps_per_second": 0.004 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 153, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|