|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 18, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.0885691336788925, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.1367864608764648, |
|
"logits/rejected": -0.8539448976516724, |
|
"logps/chosen": -199.5379180908203, |
|
"logps/rejected": -728.93701171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 5.003858987570864, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.1000946760177612, |
|
"logits/rejected": -0.859523355960846, |
|
"logps/chosen": -194.80165100097656, |
|
"logps/rejected": -695.8270263671875, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": -9.916844282997772e-05, |
|
"rewards/margins": 0.006284303963184357, |
|
"rewards/rejected": -0.006383472587913275, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 18, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6850088967217339, |
|
"train_runtime": 241.6299, |
|
"train_samples_per_second": 4.735, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 18, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|