|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 53, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/losses": 0.34206920862197876, |
|
"debug/policy_weights": 0.49350154399871826, |
|
"debug/raw_losses": 0.6931471824645996, |
|
"epoch": 0.018867924528301886, |
|
"grad_norm": 5.360033875918955, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.855412006378174, |
|
"logits/rejected": -2.8797199726104736, |
|
"logps/chosen": -320.43853759765625, |
|
"logps/rejected": -340.07073974609375, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/losses": 0.374397873878479, |
|
"debug/policy_weights": 0.5419037342071533, |
|
"debug/raw_losses": 0.6909083127975464, |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 5.481970932548877, |
|
"learning_rate": 4.911172937635942e-07, |
|
"logits/chosen": -2.8661186695098877, |
|
"logits/rejected": -2.892002820968628, |
|
"logps/chosen": -305.7351379394531, |
|
"logps/rejected": -332.1855773925781, |
|
"loss": 0.3734, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.000888873531948775, |
|
"rewards/margins": 0.004573077894747257, |
|
"rewards/rejected": -0.0036842040717601776, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/losses": 0.36864763498306274, |
|
"debug/policy_weights": 0.5463515520095825, |
|
"debug/raw_losses": 0.6742688417434692, |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 5.223305949320831, |
|
"learning_rate": 3.982949361823388e-07, |
|
"logits/chosen": -2.8624260425567627, |
|
"logits/rejected": -2.864138126373291, |
|
"logps/chosen": -323.93145751953125, |
|
"logps/rejected": -330.8647155761719, |
|
"loss": 0.3687, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.006857290863990784, |
|
"rewards/margins": 0.042396366596221924, |
|
"rewards/rejected": -0.03553907200694084, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/losses": 0.36431893706321716, |
|
"debug/policy_weights": 0.5633269548416138, |
|
"debug/raw_losses": 0.6449006199836731, |
|
"epoch": 0.5660377358490566, |
|
"grad_norm": 5.588271480922223, |
|
"learning_rate": 2.416462557480814e-07, |
|
"logits/chosen": -2.85429048538208, |
|
"logits/rejected": -2.857250452041626, |
|
"logps/chosen": -296.940673828125, |
|
"logps/rejected": -313.1925354003906, |
|
"loss": 0.3529, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0012655016034841537, |
|
"rewards/margins": 0.12636741995811462, |
|
"rewards/rejected": -0.12763293087482452, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/losses": 0.3164761960506439, |
|
"debug/policy_weights": 0.5361936688423157, |
|
"debug/raw_losses": 0.5776438117027283, |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 4.853338929616513, |
|
"learning_rate": 8.859303711029939e-08, |
|
"logits/chosen": -2.862122058868408, |
|
"logits/rejected": -2.85917329788208, |
|
"logps/chosen": -290.1681823730469, |
|
"logps/rejected": -323.2647705078125, |
|
"loss": 0.3411, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.006347997579723597, |
|
"rewards/margins": 0.3348899781703949, |
|
"rewards/rejected": -0.34123796224594116, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/losses": 0.3770141899585724, |
|
"debug/policy_weights": 0.5816048979759216, |
|
"debug/raw_losses": 0.6441487073898315, |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 5.24061929616419, |
|
"learning_rate": 5.009573740853313e-09, |
|
"logits/chosen": -2.899784564971924, |
|
"logits/rejected": -2.886505603790283, |
|
"logps/chosen": -282.5003662109375, |
|
"logps/rejected": -317.9324645996094, |
|
"loss": 0.329, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03169974684715271, |
|
"rewards/margins": 0.16929562389850616, |
|
"rewards/rejected": -0.20099535584449768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 53, |
|
"total_flos": 0.0, |
|
"train_loss": 0.35314782722940985, |
|
"train_runtime": 383.8735, |
|
"train_samples_per_second": 17.584, |
|
"train_steps_per_second": 0.138 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 53, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|