{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.971563981042654, "eval_steps": 500, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018957345971563982, "grad_norm": 135.46242416026297, "learning_rate": 4.545454545454545e-08, "logits/chosen": 203.78909301757812, "logits/rejected": 182.07696533203125, "logps/chosen": -434.955322265625, "logps/rejected": -429.50384521484375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1895734597156398, "grad_norm": 153.21007203142685, "learning_rate": 4.545454545454545e-07, "logits/chosen": 172.51792907714844, "logits/rejected": 181.91905212402344, "logps/chosen": -379.5159912109375, "logps/rejected": -448.1414794921875, "loss": 0.7124, "rewards/accuracies": 0.4861111044883728, "rewards/chosen": 0.027371780946850777, "rewards/margins": 0.008838895708322525, "rewards/rejected": 0.01853288896381855, "step": 10 }, { "epoch": 0.3791469194312796, "grad_norm": 142.08069477633126, "learning_rate": 4.885348141000122e-07, "logits/chosen": 173.6682891845703, "logits/rejected": 175.75106811523438, "logps/chosen": -381.7967834472656, "logps/rejected": -430.2057189941406, "loss": 0.6304, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.19567528367042542, "rewards/margins": 0.36299628019332886, "rewards/rejected": -0.16732101142406464, "step": 20 }, { "epoch": 0.5687203791469194, "grad_norm": 120.91669313906786, "learning_rate": 4.5025027361734613e-07, "logits/chosen": 164.82431030273438, "logits/rejected": 173.40679931640625, "logps/chosen": -365.9583740234375, "logps/rejected": -445.6947326660156, "loss": 0.5761, "rewards/accuracies": 0.75, "rewards/chosen": -1.0274367332458496, "rewards/margins": 1.1871185302734375, "rewards/rejected": -2.214555263519287, "step": 30 }, { "epoch": 0.7582938388625592, "grad_norm": 99.23172159300925, "learning_rate": 3.893311157806091e-07, "logits/chosen": 170.08607482910156, "logits/rejected": 162.03958129882812, "logps/chosen": -414.80340576171875, "logps/rejected": -454.820556640625, "loss": 0.5641, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.624403715133667, "rewards/margins": 0.9876155853271484, "rewards/rejected": -3.6120193004608154, "step": 40 }, { "epoch": 0.9478672985781991, "grad_norm": 107.79387973262907, "learning_rate": 3.126631330646801e-07, "logits/chosen": 177.14224243164062, "logits/rejected": 174.7544708251953, "logps/chosen": -459.03759765625, "logps/rejected": -512.6439208984375, "loss": 0.5158, "rewards/accuracies": 0.78125, "rewards/chosen": -3.4753570556640625, "rewards/margins": 1.3291194438934326, "rewards/rejected": -4.804476737976074, "step": 50 }, { "epoch": 1.1374407582938388, "grad_norm": 53.02406081573369, "learning_rate": 2.2891223348923882e-07, "logits/chosen": 167.0361328125, "logits/rejected": 171.70101928710938, "logps/chosen": -414.3773498535156, "logps/rejected": -524.4793701171875, "loss": 0.3097, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -3.200485944747925, "rewards/margins": 2.440056085586548, "rewards/rejected": -5.6405415534973145, "step": 60 }, { "epoch": 1.3270142180094786, "grad_norm": 62.10149124492704, "learning_rate": 1.4754491880085317e-07, "logits/chosen": 157.87100219726562, "logits/rejected": 165.49331665039062, "logps/chosen": -453.851318359375, "logps/rejected": -509.77960205078125, "loss": 0.2085, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -3.4425768852233887, "rewards/margins": 2.912635326385498, "rewards/rejected": -6.355212211608887, "step": 70 }, { "epoch": 1.5165876777251186, "grad_norm": 92.48557214337542, "learning_rate": 7.775827023107834e-08, "logits/chosen": 155.17611694335938, "logits/rejected": 170.6410369873047, "logps/chosen": -473.2515563964844, "logps/rejected": -549.7650146484375, "loss": 0.187, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -3.7577788829803467, "rewards/margins": 2.7696285247802734, "rewards/rejected": -6.527407646179199, "step": 80 }, { "epoch": 1.7061611374407581, "grad_norm": 50.507688635772325, "learning_rate": 2.7440387297912122e-08, "logits/chosen": 160.5852508544922, "logits/rejected": 169.7069854736328, "logps/chosen": -444.99725341796875, "logps/rejected": -585.0924072265625, "loss": 0.1675, "rewards/accuracies": 0.96875, "rewards/chosen": -3.7263991832733154, "rewards/margins": 3.2523887157440186, "rewards/rejected": -6.97878885269165, "step": 90 }, { "epoch": 1.8957345971563981, "grad_norm": 44.901315947621455, "learning_rate": 2.27878296044029e-09, "logits/chosen": 160.45559692382812, "logits/rejected": 161.6788330078125, "logps/chosen": -444.6620178222656, "logps/rejected": -523.9886474609375, "loss": 0.1723, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -3.935753583908081, "rewards/margins": 3.035583734512329, "rewards/rejected": -6.971337795257568, "step": 100 }, { "epoch": 1.971563981042654, "step": 104, "total_flos": 0.0, "train_loss": 0.39618923515081406, "train_runtime": 6217.6844, "train_samples_per_second": 2.171, "train_steps_per_second": 0.017 } ], "logging_steps": 10, "max_steps": 104, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }