{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 53, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "debug/losses": 0.17192834615707397, "debug/policy_weights": 0.24804016947746277, "debug/raw_losses": 0.6931471824645996, "epoch": 0.018867924528301886, "grad_norm": 3.0791833143219045, "learning_rate": 8.333333333333333e-08, "logits/chosen": -2.855412006378174, "logits/rejected": -2.8797199726104736, "logps/chosen": -320.43853759765625, "logps/rejected": -340.07073974609375, "loss": 0.2116, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "debug/losses": 0.20367620885372162, "debug/policy_weights": 0.2948996424674988, "debug/raw_losses": 0.6906173229217529, "epoch": 0.18867924528301888, "grad_norm": 3.2025034006962603, "learning_rate": 4.911172937635942e-07, "logits/chosen": -2.8658909797668457, "logits/rejected": -2.8917548656463623, "logps/chosen": -305.7406005859375, "logps/rejected": -332.2490234375, "loss": 0.2033, "rewards/accuracies": 0.4791666567325592, "rewards/chosen": 0.0008342999499291182, "rewards/margins": 0.005153011996299028, "rewards/rejected": -0.004318712279200554, "step": 10 }, { "debug/losses": 0.20357565581798553, "debug/policy_weights": 0.30120497941970825, "debug/raw_losses": 0.675395131111145, "epoch": 0.37735849056603776, "grad_norm": 3.097721440067098, "learning_rate": 3.982949361823388e-07, "logits/chosen": -2.855691432952881, "logits/rejected": -2.8575425148010254, "logps/chosen": -324.3226623535156, "logps/rejected": -331.01007080078125, "loss": 0.2068, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.0029448498971760273, "rewards/margins": 0.03993762657046318, "rewards/rejected": -0.03699277713894844, "step": 20 }, { "debug/losses": 0.20828036963939667, "debug/policy_weights": 0.3203299641609192, "debug/raw_losses": 0.6491612195968628, "epoch": 0.5660377358490566, "grad_norm": 3.2758930059808353, "learning_rate": 2.416462557480814e-07, "logits/chosen": -2.839444398880005, "logits/rejected": -2.842738628387451, "logps/chosen": -298.2297668457031, "logps/rejected": -313.391845703125, "loss": 0.2007, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.014156119897961617, "rewards/margins": 0.11547034978866577, "rewards/rejected": -0.12962646782398224, "step": 30 }, { "debug/losses": 0.18270191550254822, "debug/policy_weights": 0.30252760648727417, "debug/raw_losses": 0.584720253944397, "epoch": 0.7547169811320755, "grad_norm": 2.7163808332522805, "learning_rate": 8.859303711029939e-08, "logits/chosen": -2.842470645904541, "logits/rejected": -2.8394596576690674, "logps/chosen": -290.64208984375, "logps/rejected": -320.837890625, "loss": 0.1963, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.011087085120379925, "rewards/margins": 0.30588188767433167, "rewards/rejected": -0.31696897745132446, "step": 40 }, { "debug/losses": 0.23133957386016846, "debug/policy_weights": 0.3536807894706726, "debug/raw_losses": 0.6469973921775818, "epoch": 0.9433962264150944, "grad_norm": 3.004599614335292, "learning_rate": 5.009573740853313e-09, "logits/chosen": -2.8772711753845215, "logits/rejected": -2.864253520965576, "logps/chosen": -282.04022216796875, "logps/rejected": -316.0839538574219, "loss": 0.1939, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.027098428457975388, "rewards/margins": 0.1554117500782013, "rewards/rejected": -0.1825101673603058, "step": 50 }, { "epoch": 1.0, "step": 53, "total_flos": 0.0, "train_loss": 0.20092295110225677, "train_runtime": 384.8987, "train_samples_per_second": 17.537, "train_steps_per_second": 0.138 } ], "logging_steps": 10, "max_steps": 53, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }