|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 122, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 43.23719376156736, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -3.6897170543670654, |
|
"logits/rejected": -3.519662618637085, |
|
"logps/chosen": -584.1221923828125, |
|
"logps/rejected": -1429.938720703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 36.75962682478825, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -3.6686697006225586, |
|
"logits/rejected": -3.5728933811187744, |
|
"logps/chosen": -948.7052001953125, |
|
"logps/rejected": -1359.1160888671875, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5902777910232544, |
|
"rewards/chosen": 0.0016961859073489904, |
|
"rewards/margins": 0.011211401782929897, |
|
"rewards/rejected": -0.009515216574072838, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 26.68557894354543, |
|
"learning_rate": 4.949291683053768e-07, |
|
"logits/chosen": -3.7271945476531982, |
|
"logits/rejected": -3.6335723400115967, |
|
"logps/chosen": -889.2982177734375, |
|
"logps/rejected": -1387.8297119140625, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.022394303232431412, |
|
"rewards/margins": 0.24209070205688477, |
|
"rewards/rejected": -0.21969637274742126, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 24.2663500160121, |
|
"learning_rate": 4.70586371748506e-07, |
|
"logits/chosen": -3.849905490875244, |
|
"logits/rejected": -3.791762590408325, |
|
"logps/chosen": -940.0631103515625, |
|
"logps/rejected": -1561.598876953125, |
|
"loss": 0.3698, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.10165198147296906, |
|
"rewards/margins": 1.3781466484069824, |
|
"rewards/rejected": -1.4797985553741455, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 20.782644086151006, |
|
"learning_rate": 4.280458575653296e-07, |
|
"logits/chosen": -4.025510311126709, |
|
"logits/rejected": -3.9784233570098877, |
|
"logps/chosen": -968.7717895507812, |
|
"logps/rejected": -1640.0823974609375, |
|
"loss": 0.2677, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5273348093032837, |
|
"rewards/margins": 2.6067256927490234, |
|
"rewards/rejected": -3.1340603828430176, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 18.40887250285513, |
|
"learning_rate": 3.7081709127108767e-07, |
|
"logits/chosen": -4.091545104980469, |
|
"logits/rejected": -4.069024085998535, |
|
"logps/chosen": -968.97900390625, |
|
"logps/rejected": -1849.377197265625, |
|
"loss": 0.1851, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7245203256607056, |
|
"rewards/margins": 3.8392529487609863, |
|
"rewards/rejected": -4.5637736320495605, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 30.82289433036107, |
|
"learning_rate": 3.0362127536287636e-07, |
|
"logits/chosen": -4.057796001434326, |
|
"logits/rejected": -4.068426132202148, |
|
"logps/chosen": -992.41796875, |
|
"logps/rejected": -1906.330810546875, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.9383566975593567, |
|
"rewards/margins": 4.514256477355957, |
|
"rewards/rejected": -5.452613353729248, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 14.214509503993076, |
|
"learning_rate": 2.3200186419770823e-07, |
|
"logits/chosen": -4.036534309387207, |
|
"logits/rejected": -4.083151817321777, |
|
"logps/chosen": -1086.2425537109375, |
|
"logps/rejected": -1876.317138671875, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1720283031463623, |
|
"rewards/margins": 4.419107437133789, |
|
"rewards/rejected": -5.591135501861572, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 16.47757248085571, |
|
"learning_rate": 1.6186724554503237e-07, |
|
"logits/chosen": -4.064330101013184, |
|
"logits/rejected": -4.05181360244751, |
|
"logps/chosen": -1022.8089599609375, |
|
"logps/rejected": -2001.3753662109375, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9989339113235474, |
|
"rewards/margins": 5.62686014175415, |
|
"rewards/rejected": -6.625794410705566, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 18.42221667739246, |
|
"learning_rate": 9.900331622138063e-08, |
|
"logits/chosen": -4.04154109954834, |
|
"logits/rejected": -4.0591230392456055, |
|
"logps/chosen": -946.9400634765625, |
|
"logps/rejected": -2018.881591796875, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8327393531799316, |
|
"rewards/margins": 5.7661213874816895, |
|
"rewards/rejected": -6.598860263824463, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 27.496315989869185, |
|
"learning_rate": 4.859616286322094e-08, |
|
"logits/chosen": -4.036691188812256, |
|
"logits/rejected": -4.036020755767822, |
|
"logps/chosen": -1030.328369140625, |
|
"logps/rejected": -2058.629638671875, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.1028516292572021, |
|
"rewards/margins": 5.824145317077637, |
|
"rewards/rejected": -6.92699670791626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": -5.204982280731201, |
|
"eval_logits/rejected": -4.0475640296936035, |
|
"eval_logps/chosen": -89.41363525390625, |
|
"eval_logps/rejected": -486.17486572265625, |
|
"eval_loss": 0.4731297492980957, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.1248578280210495, |
|
"eval_rewards/margins": 0.583656907081604, |
|
"eval_rewards/rejected": -0.7085147500038147, |
|
"eval_runtime": 5.5516, |
|
"eval_samples_per_second": 0.721, |
|
"eval_steps_per_second": 0.18, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.740929534156027, |
|
"learning_rate": 1.4804225250339281e-08, |
|
"logits/chosen": -4.010983467102051, |
|
"logits/rejected": -4.0438127517700195, |
|
"logps/chosen": -967.0818481445312, |
|
"logps/rejected": -1875.3775634765625, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9154699444770813, |
|
"rewards/margins": 4.917794704437256, |
|
"rewards/rejected": -5.8332648277282715, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 17.270638468872217, |
|
"learning_rate": 4.152374292708538e-10, |
|
"logits/chosen": -4.0482587814331055, |
|
"logits/rejected": -4.022861480712891, |
|
"logps/chosen": -1003.4119262695312, |
|
"logps/rejected": -2065.379638671875, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9699057340621948, |
|
"rewards/margins": 5.419854164123535, |
|
"rewards/rejected": -6.3897600173950195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 122, |
|
"total_flos": 0.0, |
|
"train_loss": 0.24888706256131657, |
|
"train_runtime": 2705.2939, |
|
"train_samples_per_second": 2.886, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 122, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|