|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 185, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 171.98892218238854, |
|
"learning_rate": 2.6315789473684208e-08, |
|
"logits/chosen": -0.1266070306301117, |
|
"logits/rejected": 0.7204304933547974, |
|
"logps/chosen": -319.01666259765625, |
|
"logps/rejected": -252.47039794921875, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 158.2614639136714, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -0.3861861824989319, |
|
"logits/rejected": 0.33749374747276306, |
|
"logps/chosen": -266.4891052246094, |
|
"logps/rejected": -224.11000061035156, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.5520833134651184, |
|
"rewards/chosen": -0.03102089650928974, |
|
"rewards/margins": 0.034922875463962555, |
|
"rewards/rejected": -0.06594377011060715, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 104.41587535161224, |
|
"learning_rate": 4.999552306674344e-07, |
|
"logits/chosen": -0.24374540150165558, |
|
"logits/rejected": 0.8117060661315918, |
|
"logps/chosen": -289.02911376953125, |
|
"logps/rejected": -250.653564453125, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4349571764469147, |
|
"rewards/margins": 1.263426661491394, |
|
"rewards/rejected": -1.6983836889266968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 98.86884631406178, |
|
"learning_rate": 4.946022852363932e-07, |
|
"logits/chosen": -0.2871348261833191, |
|
"logits/rejected": 0.6740838289260864, |
|
"logps/chosen": -281.1429748535156, |
|
"logps/rejected": -271.7496032714844, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6509501934051514, |
|
"rewards/margins": 2.906687021255493, |
|
"rewards/rejected": -4.5576372146606445, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 95.68866413164287, |
|
"learning_rate": 4.805146507594034e-07, |
|
"logits/chosen": -0.5090769529342651, |
|
"logits/rejected": 0.5341213345527649, |
|
"logps/chosen": -283.4405517578125, |
|
"logps/rejected": -268.97686767578125, |
|
"loss": 0.371, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4618725776672363, |
|
"rewards/margins": 3.340365171432495, |
|
"rewards/rejected": -5.802238464355469, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 103.23233983894589, |
|
"learning_rate": 4.581953932909403e-07, |
|
"logits/chosen": -0.4626421332359314, |
|
"logits/rejected": 0.5320831537246704, |
|
"logps/chosen": -313.1284484863281, |
|
"logps/rejected": -299.7115173339844, |
|
"loss": 0.335, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.947516441345215, |
|
"rewards/margins": 3.4062907695770264, |
|
"rewards/rejected": -6.353806495666504, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 91.31338474042249, |
|
"learning_rate": 4.284415281717847e-07, |
|
"logits/chosen": -0.3830726444721222, |
|
"logits/rejected": 0.7034914493560791, |
|
"logps/chosen": -302.44549560546875, |
|
"logps/rejected": -295.2908020019531, |
|
"loss": 0.2941, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -2.427272081375122, |
|
"rewards/margins": 3.3501389026641846, |
|
"rewards/rejected": -5.777410507202148, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 108.21771645007362, |
|
"learning_rate": 3.923155588020165e-07, |
|
"logits/chosen": -0.050761766731739044, |
|
"logits/rejected": 1.1738256216049194, |
|
"logps/chosen": -279.0822448730469, |
|
"logps/rejected": -271.3674011230469, |
|
"loss": 0.3118, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -2.9542946815490723, |
|
"rewards/margins": 3.31215238571167, |
|
"rewards/rejected": -6.266446590423584, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 110.48344039822193, |
|
"learning_rate": 3.511075348989692e-07, |
|
"logits/chosen": -0.02379416488111019, |
|
"logits/rejected": 0.9985305666923523, |
|
"logps/chosen": -291.3994140625, |
|
"logps/rejected": -278.81207275390625, |
|
"loss": 0.3145, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.9188703298568726, |
|
"rewards/margins": 3.239673614501953, |
|
"rewards/rejected": -5.158544063568115, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 94.44668679211257, |
|
"learning_rate": 3.062889851306735e-07, |
|
"logits/chosen": 0.15241345763206482, |
|
"logits/rejected": 1.204730749130249, |
|
"logps/chosen": -285.8970947265625, |
|
"logps/rejected": -274.2763671875, |
|
"loss": 0.3256, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -2.3829667568206787, |
|
"rewards/margins": 3.3176727294921875, |
|
"rewards/rejected": -5.700639724731445, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 72.25131318661623, |
|
"learning_rate": 2.594603691794176e-07, |
|
"logits/chosen": 0.017316246405243874, |
|
"logits/rejected": 1.112657070159912, |
|
"logps/chosen": -291.90631103515625, |
|
"logps/rejected": -277.5140686035156, |
|
"loss": 0.2907, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -2.7051641941070557, |
|
"rewards/margins": 3.0991756916046143, |
|
"rewards/rejected": -5.804339408874512, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 106.17479973453032, |
|
"learning_rate": 2.1229392570965654e-07, |
|
"logits/chosen": 0.5481065511703491, |
|
"logits/rejected": 1.4057379961013794, |
|
"logps/chosen": -290.0019226074219, |
|
"logps/rejected": -288.4178161621094, |
|
"loss": 0.2795, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -3.1080322265625, |
|
"rewards/margins": 3.114968776702881, |
|
"rewards/rejected": -6.223001003265381, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 88.25415485320248, |
|
"learning_rate": 1.6647395712565254e-07, |
|
"logits/chosen": 0.10530638694763184, |
|
"logits/rejected": 1.3136330842971802, |
|
"logps/chosen": -303.7025451660156, |
|
"logps/rejected": -291.4312438964844, |
|
"loss": 0.3024, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.87852144241333, |
|
"rewards/margins": 3.439791440963745, |
|
"rewards/rejected": -6.318312644958496, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 81.41509800140894, |
|
"learning_rate": 1.2363668353585485e-07, |
|
"logits/chosen": 0.025721266865730286, |
|
"logits/rejected": 1.1706856489181519, |
|
"logps/chosen": -291.2774963378906, |
|
"logps/rejected": -280.7757873535156, |
|
"loss": 0.2712, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -2.8241302967071533, |
|
"rewards/margins": 3.6137948036193848, |
|
"rewards/rejected": -6.437924385070801, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 76.56961564493653, |
|
"learning_rate": 8.53118137245516e-08, |
|
"logits/chosen": 0.24798288941383362, |
|
"logits/rejected": 1.3128881454467773, |
|
"logps/chosen": -298.71783447265625, |
|
"logps/rejected": -297.16790771484375, |
|
"loss": 0.2607, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -2.95615553855896, |
|
"rewards/margins": 3.7294158935546875, |
|
"rewards/rejected": -6.685571193695068, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 101.60579173655283, |
|
"learning_rate": 5.2867919617408553e-08, |
|
"logits/chosen": 0.16610342264175415, |
|
"logits/rejected": 1.297738790512085, |
|
"logps/chosen": -296.17230224609375, |
|
"logps/rejected": -285.56707763671875, |
|
"loss": 0.2777, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -2.7571194171905518, |
|
"rewards/margins": 3.536668062210083, |
|
"rewards/rejected": -6.293786525726318, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 92.310593955402, |
|
"learning_rate": 2.7463564905650853e-08, |
|
"logits/chosen": 0.06046704202890396, |
|
"logits/rejected": 1.0854153633117676, |
|
"logps/chosen": -297.1445007324219, |
|
"logps/rejected": -291.33868408203125, |
|
"loss": 0.2684, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.6816515922546387, |
|
"rewards/margins": 3.552661418914795, |
|
"rewards/rejected": -6.234313011169434, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 83.71834684366553, |
|
"learning_rate": 1.0005933014019307e-08, |
|
"logits/chosen": 0.15604642033576965, |
|
"logits/rejected": 1.338841199874878, |
|
"logps/chosen": -298.0588684082031, |
|
"logps/rejected": -293.54638671875, |
|
"loss": 0.2745, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -3.0692405700683594, |
|
"rewards/margins": 3.527927875518799, |
|
"rewards/rejected": -6.59716796875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 78.86616344216218, |
|
"learning_rate": 1.1184317978602808e-09, |
|
"logits/chosen": -0.07575028389692307, |
|
"logits/rejected": 1.0216057300567627, |
|
"logps/chosen": -288.5888366699219, |
|
"logps/rejected": -287.2474670410156, |
|
"loss": 0.3031, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.831172466278076, |
|
"rewards/margins": 3.852785587310791, |
|
"rewards/rejected": -6.683958530426025, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 185, |
|
"total_flos": 0.0, |
|
"train_loss": 0.33391942269093283, |
|
"train_runtime": 5319.9853, |
|
"train_samples_per_second": 8.891, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 185, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|