|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 647945.4912541932, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -0.34773391485214233, |
|
"logits/rejected": -0.6075438261032104, |
|
"logps/chosen": -72.6761474609375, |
|
"logps/rejected": -90.11207580566406, |
|
"loss": 128855.9062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 973324.1712020065, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.5611530542373657, |
|
"logits/rejected": -0.5887401103973389, |
|
"logps/chosen": -80.2381591796875, |
|
"logps/rejected": -83.50374603271484, |
|
"loss": 124005.5694, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.0019423539051786065, |
|
"rewards/margins": 5.1506802265066653e-05, |
|
"rewards/rejected": -0.0019938608165830374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 619327.407060219, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.6772833466529846, |
|
"logits/rejected": -0.6759974360466003, |
|
"logps/chosen": -103.69559478759766, |
|
"logps/rejected": -107.43603515625, |
|
"loss": 124210.2125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.004284867085516453, |
|
"rewards/margins": -3.467009082669392e-05, |
|
"rewards/rejected": -0.004250196740031242, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 698173.4505162692, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.7464536428451538, |
|
"logits/rejected": -0.7253994345664978, |
|
"logps/chosen": -90.76727294921875, |
|
"logps/rejected": -93.79044342041016, |
|
"loss": 126548.2375, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.003106231102719903, |
|
"rewards/margins": -0.0005979427369311452, |
|
"rewards/rejected": -0.0025082884822040796, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 637174.9970357245, |
|
"learning_rate": 4.857142857142857e-07, |
|
"logits/chosen": -0.7085025906562805, |
|
"logits/rejected": -0.7023540139198303, |
|
"logps/chosen": -87.2509765625, |
|
"logps/rejected": -88.0642318725586, |
|
"loss": 124747.6875, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.007863897830247879, |
|
"rewards/margins": -0.0013397409347817302, |
|
"rewards/rejected": -0.006524157710373402, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 759040.4009588562, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"logits/chosen": -0.5708094835281372, |
|
"logits/rejected": -0.55577552318573, |
|
"logps/chosen": -99.05384826660156, |
|
"logps/rejected": -96.9248046875, |
|
"loss": 127056.3875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.011573193594813347, |
|
"rewards/margins": -0.0007376443827524781, |
|
"rewards/rejected": -0.010835548862814903, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 818448.4874125579, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -0.5234788060188293, |
|
"logits/rejected": -0.5684272646903992, |
|
"logps/chosen": -84.0132064819336, |
|
"logps/rejected": -89.70082092285156, |
|
"loss": 124101.0125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.011890527792274952, |
|
"rewards/margins": 0.0017182690789923072, |
|
"rewards/rejected": -0.013608796522021294, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 764315.259548912, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"logits/chosen": -0.672571063041687, |
|
"logits/rejected": -0.6554594039916992, |
|
"logps/chosen": -102.6801986694336, |
|
"logps/rejected": -114.0815658569336, |
|
"loss": 125767.8, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.013903990387916565, |
|
"rewards/margins": 0.0018995633581653237, |
|
"rewards/rejected": -0.01580355316400528, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 792832.7721251897, |
|
"learning_rate": 4.142857142857143e-07, |
|
"logits/chosen": -0.6233155131340027, |
|
"logits/rejected": -0.6050644516944885, |
|
"logps/chosen": -89.83741760253906, |
|
"logps/rejected": -96.45980072021484, |
|
"loss": 126646.1, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.011440077796578407, |
|
"rewards/margins": -0.0004714619426522404, |
|
"rewards/rejected": -0.010968615300953388, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 810791.4710150602, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"logits/chosen": -0.5288355946540833, |
|
"logits/rejected": -0.507430911064148, |
|
"logps/chosen": -77.9104232788086, |
|
"logps/rejected": -74.20404052734375, |
|
"loss": 126600.7625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.005952201783657074, |
|
"rewards/margins": -0.001560600707307458, |
|
"rewards/rejected": -0.004391600843518972, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 612814.6572972395, |
|
"learning_rate": 3.785714285714285e-07, |
|
"logits/chosen": -0.6446259617805481, |
|
"logits/rejected": -0.6776315569877625, |
|
"logps/chosen": -92.22976684570312, |
|
"logps/rejected": -100.54733276367188, |
|
"loss": 124326.1, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.011474112048745155, |
|
"rewards/margins": 0.002196565503254533, |
|
"rewards/rejected": -0.013670678250491619, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 769940.7880329042, |
|
"learning_rate": 3.607142857142857e-07, |
|
"logits/chosen": -0.5441879630088806, |
|
"logits/rejected": -0.5395065546035767, |
|
"logps/chosen": -64.47439575195312, |
|
"logps/rejected": -78.48651123046875, |
|
"loss": 127264.1375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.008155420422554016, |
|
"rewards/margins": 0.005157289560884237, |
|
"rewards/rejected": -0.013312709517776966, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 781127.2959197527, |
|
"learning_rate": 3.4285714285714286e-07, |
|
"logits/chosen": -0.7074313759803772, |
|
"logits/rejected": -0.6893147230148315, |
|
"logps/chosen": -99.30326843261719, |
|
"logps/rejected": -100.26654815673828, |
|
"loss": 126373.0, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.006027103401720524, |
|
"rewards/margins": -0.0006245746044442058, |
|
"rewards/rejected": -0.005402528680860996, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 942915.0070681617, |
|
"learning_rate": 3.25e-07, |
|
"logits/chosen": -0.5311844348907471, |
|
"logits/rejected": -0.5678432583808899, |
|
"logps/chosen": -89.84095001220703, |
|
"logps/rejected": -95.73307800292969, |
|
"loss": 126546.9625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005261361598968506, |
|
"rewards/margins": -0.00025905706570483744, |
|
"rewards/rejected": -0.00500230398029089, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 802161.2678528542, |
|
"learning_rate": 3.0714285714285716e-07, |
|
"logits/chosen": -0.6184743642807007, |
|
"logits/rejected": -0.6451131701469421, |
|
"logps/chosen": -109.21659088134766, |
|
"logps/rejected": -114.1061019897461, |
|
"loss": 125730.125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.011759540066123009, |
|
"rewards/margins": 0.0014495229115709662, |
|
"rewards/rejected": -0.01320906262844801, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 866428.7327389624, |
|
"learning_rate": 2.892857142857143e-07, |
|
"logits/chosen": -0.6030551195144653, |
|
"logits/rejected": -0.5557407140731812, |
|
"logps/chosen": -82.86506652832031, |
|
"logps/rejected": -85.31071472167969, |
|
"loss": 125425.025, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.010830635204911232, |
|
"rewards/margins": -6.357554957503453e-05, |
|
"rewards/rejected": -0.010767060332000256, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 743330.5276750317, |
|
"learning_rate": 2.714285714285714e-07, |
|
"logits/chosen": -0.5015612840652466, |
|
"logits/rejected": -0.5147450566291809, |
|
"logps/chosen": -82.76224517822266, |
|
"logps/rejected": -91.91256713867188, |
|
"loss": 124215.3, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.010083668865263462, |
|
"rewards/margins": 0.0024900883436203003, |
|
"rewards/rejected": -0.012573758140206337, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 863614.5495224567, |
|
"learning_rate": 2.5357142857142855e-07, |
|
"logits/chosen": -0.5797610878944397, |
|
"logits/rejected": -0.5199266672134399, |
|
"logps/chosen": -94.99356842041016, |
|
"logps/rejected": -96.22293090820312, |
|
"loss": 127004.7, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.012711484916508198, |
|
"rewards/margins": 0.004797719419002533, |
|
"rewards/rejected": -0.017509203404188156, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 831681.0077569862, |
|
"learning_rate": 2.357142857142857e-07, |
|
"logits/chosen": -0.6032494902610779, |
|
"logits/rejected": -0.579995334148407, |
|
"logps/chosen": -104.5300521850586, |
|
"logps/rejected": -108.78277587890625, |
|
"loss": 125979.4375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.009229556657373905, |
|
"rewards/margins": 0.004828121047466993, |
|
"rewards/rejected": -0.014057678170502186, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 780274.1467706825, |
|
"learning_rate": 2.1785714285714284e-07, |
|
"logits/chosen": -0.7121313810348511, |
|
"logits/rejected": -0.667202353477478, |
|
"logps/chosen": -115.69401550292969, |
|
"logps/rejected": -110.82621765136719, |
|
"loss": 124809.7, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.012815780937671661, |
|
"rewards/margins": -0.0001598205417394638, |
|
"rewards/rejected": -0.012655961327254772, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 774598.0171325745, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.612346351146698, |
|
"logits/rejected": -0.6116153001785278, |
|
"logps/chosen": -91.24519348144531, |
|
"logps/rejected": -97.00153350830078, |
|
"loss": 123650.5375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.01422748900949955, |
|
"rewards/margins": 0.0018453721422702074, |
|
"rewards/rejected": -0.01607285998761654, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 1137683.0365726806, |
|
"learning_rate": 1.8214285714285714e-07, |
|
"logits/chosen": -0.6241598725318909, |
|
"logits/rejected": -0.6161590814590454, |
|
"logps/chosen": -82.91732788085938, |
|
"logps/rejected": -92.75973510742188, |
|
"loss": 125116.0125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.01631699874997139, |
|
"rewards/margins": 0.002573491772636771, |
|
"rewards/rejected": -0.018890492618083954, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 921161.3498685773, |
|
"learning_rate": 1.6428571428571429e-07, |
|
"logits/chosen": -0.6814984083175659, |
|
"logits/rejected": -0.6642488241195679, |
|
"logps/chosen": -134.07284545898438, |
|
"logps/rejected": -134.7923126220703, |
|
"loss": 125720.675, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.01326170563697815, |
|
"rewards/margins": 0.0025010218378156424, |
|
"rewards/rejected": -0.015762727707624435, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 813896.4945325998, |
|
"learning_rate": 1.4642857142857143e-07, |
|
"logits/chosen": -0.5411783456802368, |
|
"logits/rejected": -0.5778718590736389, |
|
"logps/chosen": -104.65946197509766, |
|
"logps/rejected": -107.73319244384766, |
|
"loss": 125973.8125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.012594607658684254, |
|
"rewards/margins": 0.0011098148534074426, |
|
"rewards/rejected": -0.013704421930015087, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 1031122.2282012746, |
|
"learning_rate": 1.2857142857142855e-07, |
|
"logits/chosen": -0.6678429841995239, |
|
"logits/rejected": -0.6291283369064331, |
|
"logps/chosen": -104.91682434082031, |
|
"logps/rejected": -111.02679443359375, |
|
"loss": 126001.475, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.008200669661164284, |
|
"rewards/margins": 0.0015530238160863519, |
|
"rewards/rejected": -0.009753693826496601, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 858633.8039080129, |
|
"learning_rate": 1.107142857142857e-07, |
|
"logits/chosen": -0.6295119524002075, |
|
"logits/rejected": -0.6167672872543335, |
|
"logps/chosen": -123.36985778808594, |
|
"logps/rejected": -133.19418334960938, |
|
"loss": 126223.65, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.011947548016905785, |
|
"rewards/margins": 0.006852240767329931, |
|
"rewards/rejected": -0.018799791112542152, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 951847.1640935472, |
|
"learning_rate": 9.285714285714286e-08, |
|
"logits/chosen": -0.6834455728530884, |
|
"logits/rejected": -0.7226243615150452, |
|
"logps/chosen": -86.39234924316406, |
|
"logps/rejected": -95.36772155761719, |
|
"loss": 124640.2, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.01843985728919506, |
|
"rewards/margins": 0.003491448936983943, |
|
"rewards/rejected": -0.021931307390332222, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 816825.5268517752, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -0.6084921956062317, |
|
"logits/rejected": -0.606655478477478, |
|
"logps/chosen": -95.06122589111328, |
|
"logps/rejected": -100.9395523071289, |
|
"loss": 126797.975, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.013821562752127647, |
|
"rewards/margins": 0.0025993138551712036, |
|
"rewards/rejected": -0.01642087660729885, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 823903.2164322428, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/chosen": -0.7316595315933228, |
|
"logits/rejected": -0.7817249298095703, |
|
"logps/chosen": -97.38008880615234, |
|
"logps/rejected": -122.05289459228516, |
|
"loss": 122803.6375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.012123498134315014, |
|
"rewards/margins": 0.006116434000432491, |
|
"rewards/rejected": -0.018239933997392654, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 1213103.129361221, |
|
"learning_rate": 3.9285714285714285e-08, |
|
"logits/chosen": -0.7132126092910767, |
|
"logits/rejected": -0.7211403846740723, |
|
"logps/chosen": -115.4140853881836, |
|
"logps/rejected": -124.9251480102539, |
|
"loss": 125220.8875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008542357943952084, |
|
"rewards/margins": 0.007235427852720022, |
|
"rewards/rejected": -0.01577778533101082, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 826125.8509083999, |
|
"learning_rate": 2.142857142857143e-08, |
|
"logits/chosen": -0.4794866144657135, |
|
"logits/rejected": -0.48627161979675293, |
|
"logps/chosen": -106.44710540771484, |
|
"logps/rejected": -113.4127197265625, |
|
"loss": 124190.425, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.016785580664873123, |
|
"rewards/margins": 0.002466305159032345, |
|
"rewards/rejected": -0.019251886755228043, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 853168.6471782625, |
|
"learning_rate": 3.571428571428571e-09, |
|
"logits/chosen": -0.6391203999519348, |
|
"logits/rejected": -0.6226745843887329, |
|
"logps/chosen": -105.24736022949219, |
|
"logps/rejected": -109.426025390625, |
|
"loss": 122976.65, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01043027639389038, |
|
"rewards/margins": 0.003538835793733597, |
|
"rewards/rejected": -0.013969110324978828, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 125356.69771634616, |
|
"train_runtime": 2759.785, |
|
"train_samples_per_second": 7.245, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|