{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.25e-08, "logits/chosen": -2.902447462081909, "logits/rejected": -2.93850040435791, "logps/chosen": -331.92425537109375, "logps/rejected": -304.7728576660156, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.12, "learning_rate": 4.990486745229364e-07, "logits/chosen": -2.8264474868774414, "logits/rejected": -2.8004088401794434, "logps/chosen": -250.434326171875, "logps/rejected": -216.94482421875, "loss": 0.6926, "rewards/accuracies": 0.4652777910232544, "rewards/chosen": 0.000873287848662585, "rewards/margins": 0.0005172663950361311, "rewards/rejected": 0.0003560214536264539, "step": 10 }, { "epoch": 0.25, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -2.761777639389038, "logits/rejected": -2.7672297954559326, "logps/chosen": -256.48187255859375, "logps/rejected": -276.37890625, "loss": 0.6831, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.015453550033271313, "rewards/margins": 0.014861134812235832, "rewards/rejected": 0.0005924167344346642, "step": 20 }, { "epoch": 0.38, "learning_rate": 3.933941090877615e-07, "logits/chosen": -2.8019919395446777, "logits/rejected": -2.7836480140686035, "logps/chosen": -257.0506286621094, "logps/rejected": -244.07858276367188, "loss": 0.6633, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.004371149465441704, "rewards/margins": 0.06962588429450989, "rewards/rejected": -0.06525473296642303, "step": 30 }, { "epoch": 0.5, "learning_rate": 2.934120444167326e-07, "logits/chosen": -2.7478058338165283, "logits/rejected": -2.731013298034668, "logps/chosen": -291.93682861328125, "logps/rejected": -290.897705078125, "loss": 0.6459, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.08649159967899323, "rewards/margins": 0.1326875388622284, "rewards/rejected": -0.21917912364006042, "step": 40 }, { "epoch": 0.62, "learning_rate": 1.8529523872436977e-07, "logits/chosen": -2.7630627155303955, "logits/rejected": -2.769901752471924, "logps/chosen": -275.48516845703125, "logps/rejected": -269.76690673828125, "loss": 0.6259, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17858798801898956, "rewards/margins": 0.11792335659265518, "rewards/rejected": -0.29651135206222534, "step": 50 }, { "epoch": 0.75, "learning_rate": 8.930309757836516e-08, "logits/chosen": -2.7746453285217285, "logits/rejected": -2.717956304550171, "logps/chosen": -277.69140625, "logps/rejected": -282.4396667480469, "loss": 0.6146, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.17534565925598145, "rewards/margins": 0.22178736329078674, "rewards/rejected": -0.3971330225467682, "step": 60 }, { "epoch": 0.88, "learning_rate": 2.3423053240837514e-08, "logits/chosen": -2.7869958877563477, "logits/rejected": -2.776400327682495, "logps/chosen": -290.5310974121094, "logps/rejected": -295.3239440917969, "loss": 0.6132, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.25528621673583984, "rewards/margins": 0.1374310851097107, "rewards/rejected": -0.39271730184555054, "step": 70 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -2.7256760597229004, "logits/rejected": -2.682258367538452, "logps/chosen": -292.8467712402344, "logps/rejected": -316.6199951171875, "loss": 0.6074, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.29217854142189026, "rewards/margins": 0.14723847806453705, "rewards/rejected": -0.4394169747829437, "step": 80 }, { "epoch": 1.0, "step": 80, "total_flos": 0.0, "train_loss": 0.6432609260082245, "train_runtime": 1321.6714, "train_samples_per_second": 7.709, "train_steps_per_second": 0.061 } ], "logging_steps": 10, "max_steps": 80, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }