|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8368200836820083, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008368200836820083, |
|
"grad_norm": 9.75007616796795, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.761155366897583, |
|
"logits/rejected": -2.7110397815704346, |
|
"logps/chosen": -188.2120819091797, |
|
"logps/rejected": -227.97329711914062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08368200836820083, |
|
"grad_norm": 8.857631148542579, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.752246856689453, |
|
"logits/rejected": -2.7512197494506836, |
|
"logps/chosen": -286.0818786621094, |
|
"logps/rejected": -270.5452880859375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.0016119987703859806, |
|
"rewards/margins": 0.0004296954721212387, |
|
"rewards/rejected": 0.001182303298264742, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 8.648350893662677, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.816281795501709, |
|
"logits/rejected": -2.7909739017486572, |
|
"logps/chosen": -281.4281311035156, |
|
"logps/rejected": -259.23931884765625, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.02481575682759285, |
|
"rewards/margins": 0.019779205322265625, |
|
"rewards/rejected": 0.00503655057400465, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2510460251046025, |
|
"grad_norm": 9.462527888383683, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.7952637672424316, |
|
"logits/rejected": -2.7442078590393066, |
|
"logps/chosen": -272.9776306152344, |
|
"logps/rejected": -227.1405792236328, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.021379830315709114, |
|
"rewards/margins": 0.0722198635339737, |
|
"rewards/rejected": -0.05084002763032913, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 10.802670702282992, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.808018445968628, |
|
"logits/rejected": -2.7901508808135986, |
|
"logps/chosen": -269.8119812011719, |
|
"logps/rejected": -276.64312744140625, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11026054620742798, |
|
"rewards/margins": 0.09342513978481293, |
|
"rewards/rejected": -0.2036857157945633, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"grad_norm": 14.0383641925761, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.801910877227783, |
|
"logits/rejected": -2.7805628776550293, |
|
"logps/chosen": -308.90960693359375, |
|
"logps/rejected": -296.39837646484375, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1705964356660843, |
|
"rewards/margins": 0.27841097116470337, |
|
"rewards/rejected": -0.44900742173194885, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 17.441329707174045, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.8074169158935547, |
|
"logits/rejected": -2.7729382514953613, |
|
"logps/chosen": -314.9671325683594, |
|
"logps/rejected": -307.9980773925781, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3278293013572693, |
|
"rewards/margins": 0.2875938415527344, |
|
"rewards/rejected": -0.6154230833053589, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5857740585774058, |
|
"grad_norm": 13.990328041846443, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.808964252471924, |
|
"logits/rejected": -2.7739078998565674, |
|
"logps/chosen": -318.0167541503906, |
|
"logps/rejected": -318.5818786621094, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.400399774312973, |
|
"rewards/margins": 0.22247812151908875, |
|
"rewards/rejected": -0.6228778958320618, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 17.12917679172734, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.7654800415039062, |
|
"logits/rejected": -2.7729649543762207, |
|
"logps/chosen": -259.6241149902344, |
|
"logps/rejected": -305.7326354980469, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.29664868116378784, |
|
"rewards/margins": 0.25704866647720337, |
|
"rewards/rejected": -0.5536972880363464, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7531380753138075, |
|
"grad_norm": 19.058433635316273, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.727175712585449, |
|
"logits/rejected": -2.7393229007720947, |
|
"logps/chosen": -297.5341491699219, |
|
"logps/rejected": -314.9481201171875, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.36374929547309875, |
|
"rewards/margins": 0.3506723940372467, |
|
"rewards/rejected": -0.7144217491149902, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 15.477637607113444, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.7408394813537598, |
|
"logits/rejected": -2.671611785888672, |
|
"logps/chosen": -293.8873596191406, |
|
"logps/rejected": -300.93572998046875, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.41290682554244995, |
|
"rewards/margins": 0.2459905445575714, |
|
"rewards/rejected": -0.6588973999023438, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"step": 100, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 11.541, |
|
"train_samples_per_second": 1324.231, |
|
"train_steps_per_second": 5.112 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|