|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6666666666666664e-08, |
|
"logits/chosen": -2.3145697116851807, |
|
"logits/rejected": -2.255990743637085, |
|
"logps/chosen": -240.6372833251953, |
|
"logps/pi_response": -133.796875, |
|
"logps/ref_response": -133.796875, |
|
"logps/rejected": -520.8372192382812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": -2.33054780960083, |
|
"logits/rejected": -2.218600273132324, |
|
"logps/chosen": -279.32952880859375, |
|
"logps/pi_response": -142.03379821777344, |
|
"logps/ref_response": -141.8440399169922, |
|
"logps/rejected": -580.2430419921875, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5763888955116272, |
|
"rewards/chosen": -0.008015867322683334, |
|
"rewards/margins": 0.018631011247634888, |
|
"rewards/rejected": -0.026646876707673073, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": -2.273662567138672, |
|
"logits/rejected": -2.187835931777954, |
|
"logps/chosen": -318.18304443359375, |
|
"logps/pi_response": -160.6199951171875, |
|
"logps/ref_response": -153.63629150390625, |
|
"logps/rejected": -610.4090576171875, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.20712228119373322, |
|
"rewards/margins": 0.21442191302776337, |
|
"rewards/rejected": -0.4215441644191742, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": -2.2064340114593506, |
|
"logits/rejected": -2.1401524543762207, |
|
"logps/chosen": -325.96112060546875, |
|
"logps/pi_response": -152.35244750976562, |
|
"logps/ref_response": -142.01828002929688, |
|
"logps/rejected": -649.8997802734375, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3760668635368347, |
|
"rewards/margins": 0.47578781843185425, |
|
"rewards/rejected": -0.8518548011779785, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": -2.1908040046691895, |
|
"logits/rejected": -2.1057353019714355, |
|
"logps/chosen": -340.5452880859375, |
|
"logps/pi_response": -159.50143432617188, |
|
"logps/ref_response": -146.47113037109375, |
|
"logps/rejected": -705.52978515625, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5094891786575317, |
|
"rewards/margins": 0.7861413955688477, |
|
"rewards/rejected": -1.295630693435669, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": -2.184225082397461, |
|
"logits/rejected": -2.1084225177764893, |
|
"logps/chosen": -371.94671630859375, |
|
"logps/pi_response": -157.12008666992188, |
|
"logps/ref_response": -142.58538818359375, |
|
"logps/rejected": -764.1049194335938, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.6093698143959045, |
|
"rewards/margins": 0.9578849077224731, |
|
"rewards/rejected": -1.5672547817230225, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5555570570089049, |
|
"train_runtime": 3425.432, |
|
"train_samples_per_second": 4.462, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|