|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 53, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/losses": 0.342240571975708, |
|
"debug/policy_weights": 0.4937487840652466, |
|
"debug/raw_losses": 0.6931471824645996, |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.8462421894073486, |
|
"logits/rejected": -2.8283610343933105, |
|
"logps/chosen": -274.7393798828125, |
|
"logps/rejected": -204.42575073242188, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/losses": 0.3810771703720093, |
|
"debug/policy_weights": 0.5504893660545349, |
|
"debug/raw_losses": 0.6920116543769836, |
|
"epoch": 0.19, |
|
"learning_rate": 4.911172937635942e-07, |
|
"logits/chosen": -2.852349281311035, |
|
"logits/rejected": -2.83735990524292, |
|
"logps/chosen": -306.01458740234375, |
|
"logps/rejected": -295.93804931640625, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.0012468346394598484, |
|
"rewards/margins": 0.0023373025469481945, |
|
"rewards/rejected": -0.0010904677910730243, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/losses": 0.3667130768299103, |
|
"debug/policy_weights": 0.5492504835128784, |
|
"debug/raw_losses": 0.6667538285255432, |
|
"epoch": 0.38, |
|
"learning_rate": 3.982949361823388e-07, |
|
"logits/chosen": -2.8518126010894775, |
|
"logits/rejected": -2.872077226638794, |
|
"logps/chosen": -296.2869567871094, |
|
"logps/rejected": -332.9769592285156, |
|
"loss": 0.374, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.017367612570524216, |
|
"rewards/margins": 0.05825704336166382, |
|
"rewards/rejected": -0.04088941961526871, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/losses": 0.35409680008888245, |
|
"debug/policy_weights": 0.5632873177528381, |
|
"debug/raw_losses": 0.6232098340988159, |
|
"epoch": 0.57, |
|
"learning_rate": 2.416462557480814e-07, |
|
"logits/chosen": -2.824850559234619, |
|
"logits/rejected": -2.8103976249694824, |
|
"logps/chosen": -312.3518981933594, |
|
"logps/rejected": -323.0265197753906, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.014037144370377064, |
|
"rewards/margins": 0.1827748715877533, |
|
"rewards/rejected": -0.1687377393245697, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/losses": 0.34752941131591797, |
|
"debug/policy_weights": 0.550245463848114, |
|
"debug/raw_losses": 0.6313939690589905, |
|
"epoch": 0.75, |
|
"learning_rate": 8.859303711029939e-08, |
|
"logits/chosen": -2.79345965385437, |
|
"logits/rejected": -2.797208547592163, |
|
"logps/chosen": -275.5638122558594, |
|
"logps/rejected": -348.8089294433594, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0044286223128438, |
|
"rewards/margins": 0.20287349820137024, |
|
"rewards/rejected": -0.20730212330818176, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/losses": 0.3295789361000061, |
|
"debug/policy_weights": 0.5445331335067749, |
|
"debug/raw_losses": 0.5927887558937073, |
|
"epoch": 0.94, |
|
"learning_rate": 5.009573740853313e-09, |
|
"logits/chosen": -2.829876184463501, |
|
"logits/rejected": -2.8419814109802246, |
|
"logps/chosen": -307.4209899902344, |
|
"logps/rejected": -348.12298583984375, |
|
"loss": 0.3378, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0008583018789067864, |
|
"rewards/margins": 0.31627127528190613, |
|
"rewards/rejected": -0.3154129683971405, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 53, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3564033879424041, |
|
"train_runtime": 425.1442, |
|
"train_samples_per_second": 15.877, |
|
"train_steps_per_second": 0.125 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 53, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|