{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 53, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "debug/losses": 0.34206920862197876, "debug/policy_weights": 0.49350154399871826, "debug/raw_losses": 0.6931471824645996, "epoch": 0.018867924528301886, "grad_norm": 5.360033875918955, "learning_rate": 8.333333333333333e-08, "logits/chosen": -2.855412006378174, "logits/rejected": -2.8797199726104736, "logps/chosen": -320.43853759765625, "logps/rejected": -340.07073974609375, "loss": 0.378, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "debug/losses": 0.374397873878479, "debug/policy_weights": 0.5419037342071533, "debug/raw_losses": 0.6909083127975464, "epoch": 0.18867924528301888, "grad_norm": 5.481970932548877, "learning_rate": 4.911172937635942e-07, "logits/chosen": -2.8661186695098877, "logits/rejected": -2.892002820968628, "logps/chosen": -305.7351379394531, "logps/rejected": -332.1855773925781, "loss": 0.3734, "rewards/accuracies": 0.4861111044883728, "rewards/chosen": 0.000888873531948775, "rewards/margins": 0.004573077894747257, "rewards/rejected": -0.0036842040717601776, "step": 10 }, { "debug/losses": 0.36864763498306274, "debug/policy_weights": 0.5463515520095825, "debug/raw_losses": 0.6742688417434692, "epoch": 0.37735849056603776, "grad_norm": 5.223305949320831, "learning_rate": 3.982949361823388e-07, "logits/chosen": -2.8624260425567627, "logits/rejected": -2.864138126373291, "logps/chosen": -323.93145751953125, "logps/rejected": -330.8647155761719, "loss": 0.3687, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.006857290863990784, "rewards/margins": 0.042396366596221924, "rewards/rejected": -0.03553907200694084, "step": 20 }, { "debug/losses": 0.36431893706321716, "debug/policy_weights": 0.5633269548416138, "debug/raw_losses": 0.6449006199836731, "epoch": 0.5660377358490566, "grad_norm": 5.588271480922223, "learning_rate": 2.416462557480814e-07, "logits/chosen": -2.85429048538208, "logits/rejected": -2.857250452041626, "logps/chosen": -296.940673828125, "logps/rejected": -313.1925354003906, "loss": 0.3529, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0012655016034841537, "rewards/margins": 0.12636741995811462, "rewards/rejected": -0.12763293087482452, "step": 30 }, { "debug/losses": 0.3164761960506439, "debug/policy_weights": 0.5361936688423157, "debug/raw_losses": 0.5776438117027283, "epoch": 0.7547169811320755, "grad_norm": 4.853338929616513, "learning_rate": 8.859303711029939e-08, "logits/chosen": -2.862122058868408, "logits/rejected": -2.85917329788208, "logps/chosen": -290.1681823730469, "logps/rejected": -323.2647705078125, "loss": 0.3411, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.006347997579723597, "rewards/margins": 0.3348899781703949, "rewards/rejected": -0.34123796224594116, "step": 40 }, { "debug/losses": 0.3770141899585724, "debug/policy_weights": 0.5816048979759216, "debug/raw_losses": 0.6441487073898315, "epoch": 0.9433962264150944, "grad_norm": 5.24061929616419, "learning_rate": 5.009573740853313e-09, "logits/chosen": -2.899784564971924, "logits/rejected": -2.886505603790283, "logps/chosen": -282.5003662109375, "logps/rejected": -317.9324645996094, "loss": 0.329, "rewards/accuracies": 0.65625, "rewards/chosen": -0.03169974684715271, "rewards/margins": 0.16929562389850616, "rewards/rejected": -0.20099535584449768, "step": 50 }, { "epoch": 1.0, "step": 53, "total_flos": 0.0, "train_loss": 0.35314782722940985, "train_runtime": 383.8735, "train_samples_per_second": 17.584, "train_steps_per_second": 0.138 } ], "logging_steps": 10, "max_steps": 53, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }