|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 1009.1310262338153, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -0.48170238733291626, |
|
"logits/rejected": -0.5971822738647461, |
|
"logps/chosen": -29.012826919555664, |
|
"logps/rejected": -37.434471130371094, |
|
"loss": 49.0548, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 969.0027881269435, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.43042537569999695, |
|
"logits/rejected": -0.4711245000362396, |
|
"logps/chosen": -32.84762191772461, |
|
"logps/rejected": -34.32879638671875, |
|
"loss": 38.7199, |
|
"rewards/accuracies": 0.2777777910232544, |
|
"rewards/chosen": 5.492389027494937e-05, |
|
"rewards/margins": -5.384198811952956e-05, |
|
"rewards/rejected": 0.00010876586748054251, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 993.3615142178909, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.5954495072364807, |
|
"logits/rejected": -0.618130087852478, |
|
"logps/chosen": -48.733314514160156, |
|
"logps/rejected": -51.568695068359375, |
|
"loss": 41.7023, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0010887670796364546, |
|
"rewards/margins": -3.9788112189853564e-05, |
|
"rewards/rejected": 0.0011285552754998207, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 964.1608085815409, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.6574007272720337, |
|
"logits/rejected": -0.7291825413703918, |
|
"logps/chosen": -39.434776306152344, |
|
"logps/rejected": -42.335716247558594, |
|
"loss": 43.7887, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001887084566988051, |
|
"rewards/margins": 2.423059231659863e-05, |
|
"rewards/rejected": 0.0018628539983183146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 1181.1281208132718, |
|
"learning_rate": 4.857142857142857e-07, |
|
"logits/chosen": -0.6512608528137207, |
|
"logits/rejected": -0.6266627311706543, |
|
"logps/chosen": -37.72194290161133, |
|
"logps/rejected": -40.52619171142578, |
|
"loss": 40.7141, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0024465853348374367, |
|
"rewards/margins": 0.00038118622615002096, |
|
"rewards/rejected": 0.0020653989631682634, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1123.3502304939157, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"logits/chosen": -0.5247907042503357, |
|
"logits/rejected": -0.5476827621459961, |
|
"logps/chosen": -37.13096237182617, |
|
"logps/rejected": -36.993751525878906, |
|
"loss": 41.8391, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0022364568430930376, |
|
"rewards/margins": 8.963019354268909e-05, |
|
"rewards/rejected": 0.0021468265913426876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 1085.2090697086192, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -0.43748584389686584, |
|
"logits/rejected": -0.4264857769012451, |
|
"logps/chosen": -42.964988708496094, |
|
"logps/rejected": -37.55246353149414, |
|
"loss": 41.2449, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.001739932456985116, |
|
"rewards/margins": -0.00011503996938699856, |
|
"rewards/rejected": 0.0018549725646153092, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 1015.4302150816914, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"logits/chosen": -0.5420335531234741, |
|
"logits/rejected": -0.5390623211860657, |
|
"logps/chosen": -39.6472282409668, |
|
"logps/rejected": -40.896602630615234, |
|
"loss": 42.2903, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.0017996892565861344, |
|
"rewards/margins": -0.0006139087490737438, |
|
"rewards/rejected": 0.0024135981220752, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 1112.9628164769215, |
|
"learning_rate": 4.142857142857143e-07, |
|
"logits/chosen": -0.5414769053459167, |
|
"logits/rejected": -0.6392091512680054, |
|
"logps/chosen": -41.40280532836914, |
|
"logps/rejected": -38.21042251586914, |
|
"loss": 37.9115, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.002596390200778842, |
|
"rewards/margins": 0.000514088838826865, |
|
"rewards/rejected": 0.002082301303744316, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 1132.9412773578845, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"logits/chosen": -0.5120124816894531, |
|
"logits/rejected": -0.46202293038368225, |
|
"logps/chosen": -30.624813079833984, |
|
"logps/rejected": -29.584774017333984, |
|
"loss": 41.9608, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0020510884933173656, |
|
"rewards/margins": -0.00010951957665383816, |
|
"rewards/rejected": 0.00216060783714056, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1065.2844763715325, |
|
"learning_rate": 3.785714285714285e-07, |
|
"logits/chosen": -0.6081634759902954, |
|
"logits/rejected": -0.5888960957527161, |
|
"logps/chosen": -34.334285736083984, |
|
"logps/rejected": -33.35486602783203, |
|
"loss": 39.9711, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0022316346876323223, |
|
"rewards/margins": 0.0007666076417081058, |
|
"rewards/rejected": 0.0014650272205471992, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 1072.4046311311965, |
|
"learning_rate": 3.607142857142857e-07, |
|
"logits/chosen": -0.4411347806453705, |
|
"logits/rejected": -0.4419333040714264, |
|
"logps/chosen": -24.29958152770996, |
|
"logps/rejected": -25.08634376525879, |
|
"loss": 38.3559, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00189125572796911, |
|
"rewards/margins": 0.00026331303524784744, |
|
"rewards/rejected": 0.001627942780032754, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 1108.2228357679985, |
|
"learning_rate": 3.4285714285714286e-07, |
|
"logits/chosen": -0.5807032585144043, |
|
"logits/rejected": -0.6294857859611511, |
|
"logps/chosen": -39.8912353515625, |
|
"logps/rejected": -42.09502029418945, |
|
"loss": 38.3583, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0031242019031196833, |
|
"rewards/margins": 0.00099054048769176, |
|
"rewards/rejected": 0.002133661415427923, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 1149.96614138426, |
|
"learning_rate": 3.25e-07, |
|
"logits/chosen": -0.5135449767112732, |
|
"logits/rejected": -0.4764982759952545, |
|
"logps/chosen": -39.335472106933594, |
|
"logps/rejected": -40.464088439941406, |
|
"loss": 41.8146, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0020922988187521696, |
|
"rewards/margins": 0.00032900949008762836, |
|
"rewards/rejected": 0.0017632890958338976, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 1137.1577747838844, |
|
"learning_rate": 3.0714285714285716e-07, |
|
"logits/chosen": -0.6154284477233887, |
|
"logits/rejected": -0.5684093236923218, |
|
"logps/chosen": -43.35015106201172, |
|
"logps/rejected": -45.817138671875, |
|
"loss": 43.4174, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0029308353550732136, |
|
"rewards/margins": 0.00020970618061255664, |
|
"rewards/rejected": 0.0027211292181164026, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1045.1524947853006, |
|
"learning_rate": 2.892857142857143e-07, |
|
"logits/chosen": -0.49414142966270447, |
|
"logits/rejected": -0.5363041758537292, |
|
"logps/chosen": -25.930377960205078, |
|
"logps/rejected": -29.026256561279297, |
|
"loss": 40.5417, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0020943940617144108, |
|
"rewards/margins": -0.0005832756869494915, |
|
"rewards/rejected": 0.0026776697486639023, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 1137.7571627874984, |
|
"learning_rate": 2.714285714285714e-07, |
|
"logits/chosen": -0.45031625032424927, |
|
"logits/rejected": -0.4036481976509094, |
|
"logps/chosen": -35.83759307861328, |
|
"logps/rejected": -33.2865104675293, |
|
"loss": 41.7872, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0025531931314617395, |
|
"rewards/margins": -0.00011138639092678204, |
|
"rewards/rejected": 0.0026645795442163944, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 1178.055387368166, |
|
"learning_rate": 2.5357142857142855e-07, |
|
"logits/chosen": -0.5122729539871216, |
|
"logits/rejected": -0.49646639823913574, |
|
"logps/chosen": -39.3989143371582, |
|
"logps/rejected": -42.247840881347656, |
|
"loss": 41.7091, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.002737321425229311, |
|
"rewards/margins": 0.000858407118357718, |
|
"rewards/rejected": 0.0018789141904562712, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 1222.0538303638675, |
|
"learning_rate": 2.357142857142857e-07, |
|
"logits/chosen": -0.5412944555282593, |
|
"logits/rejected": -0.5432535409927368, |
|
"logps/chosen": -41.7281494140625, |
|
"logps/rejected": -41.42877197265625, |
|
"loss": 45.593, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0030562058091163635, |
|
"rewards/margins": 0.0002739687915891409, |
|
"rewards/rejected": 0.0027822370175272226, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 1059.5993351433744, |
|
"learning_rate": 2.1785714285714284e-07, |
|
"logits/chosen": -0.5831159353256226, |
|
"logits/rejected": -0.5751662254333496, |
|
"logps/chosen": -46.93434143066406, |
|
"logps/rejected": -43.748443603515625, |
|
"loss": 43.2402, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.002087333472445607, |
|
"rewards/margins": 3.515209391480312e-05, |
|
"rewards/rejected": 0.0020521811675280333, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1167.572795808609, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.48871344327926636, |
|
"logits/rejected": -0.5342381000518799, |
|
"logps/chosen": -31.179672241210938, |
|
"logps/rejected": -35.725318908691406, |
|
"loss": 40.539, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0033684056252241135, |
|
"rewards/margins": 0.0005909694591537118, |
|
"rewards/rejected": 0.002777435816824436, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 1051.702249531706, |
|
"learning_rate": 1.8214285714285714e-07, |
|
"logits/chosen": -0.5266011357307434, |
|
"logits/rejected": -0.49189430475234985, |
|
"logps/chosen": -30.086624145507812, |
|
"logps/rejected": -30.298954010009766, |
|
"loss": 40.312, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0023514502681791782, |
|
"rewards/margins": 0.00040291310870088637, |
|
"rewards/rejected": 0.0019485373049974442, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 1132.3748943763287, |
|
"learning_rate": 1.6428571428571429e-07, |
|
"logits/chosen": -0.6477980017662048, |
|
"logits/rejected": -0.5915540456771851, |
|
"logps/chosen": -52.536956787109375, |
|
"logps/rejected": -53.607810974121094, |
|
"loss": 42.3887, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00340586481615901, |
|
"rewards/margins": 0.0009445315226912498, |
|
"rewards/rejected": 0.0024613335262984037, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 1128.7389141378083, |
|
"learning_rate": 1.4642857142857143e-07, |
|
"logits/chosen": -0.47689008712768555, |
|
"logits/rejected": -0.5440131425857544, |
|
"logps/chosen": -34.16132736206055, |
|
"logps/rejected": -37.00074005126953, |
|
"loss": 38.9005, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0027422527782619, |
|
"rewards/margins": -0.0002558851265348494, |
|
"rewards/rejected": 0.0029981378465890884, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 1083.3819182532363, |
|
"learning_rate": 1.2857142857142855e-07, |
|
"logits/chosen": -0.6017645597457886, |
|
"logits/rejected": -0.6154614686965942, |
|
"logps/chosen": -60.46274948120117, |
|
"logps/rejected": -53.796775817871094, |
|
"loss": 39.6015, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.003490505740046501, |
|
"rewards/margins": 0.0010509835556149483, |
|
"rewards/rejected": 0.002439522184431553, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1158.4783936167528, |
|
"learning_rate": 1.107142857142857e-07, |
|
"logits/chosen": -0.5455101132392883, |
|
"logits/rejected": -0.5882077217102051, |
|
"logps/chosen": -51.72278594970703, |
|
"logps/rejected": -55.974029541015625, |
|
"loss": 40.6188, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.002927373396232724, |
|
"rewards/margins": 0.00027384894201532006, |
|
"rewards/rejected": 0.0026535247452557087, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 1119.1973257793477, |
|
"learning_rate": 9.285714285714286e-08, |
|
"logits/chosen": -0.5563905239105225, |
|
"logits/rejected": -0.5701907873153687, |
|
"logps/chosen": -36.81728744506836, |
|
"logps/rejected": -40.96973419189453, |
|
"loss": 36.8782, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0024603630881756544, |
|
"rewards/margins": -0.000383538194000721, |
|
"rewards/rejected": 0.002843901515007019, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 1139.2702962270098, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -0.5481973886489868, |
|
"logits/rejected": -0.5287628173828125, |
|
"logps/chosen": -36.652503967285156, |
|
"logps/rejected": -36.31899642944336, |
|
"loss": 40.0096, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.002305429894477129, |
|
"rewards/margins": 0.0007444783695973456, |
|
"rewards/rejected": 0.0015609515830874443, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 1203.095436788835, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/chosen": -0.7058078050613403, |
|
"logits/rejected": -0.6220946311950684, |
|
"logps/chosen": -41.582035064697266, |
|
"logps/rejected": -36.45862579345703, |
|
"loss": 42.6208, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0033497163094580173, |
|
"rewards/margins": 0.0008428205619566143, |
|
"rewards/rejected": 0.0025068954564630985, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 1237.1888295476801, |
|
"learning_rate": 3.9285714285714285e-08, |
|
"logits/chosen": -0.6831991076469421, |
|
"logits/rejected": -0.6509202122688293, |
|
"logps/chosen": -54.39546585083008, |
|
"logps/rejected": -51.07060623168945, |
|
"loss": 39.5452, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0032668504863977432, |
|
"rewards/margins": -0.00010241140989819542, |
|
"rewards/rejected": 0.0033692617435008287, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1122.9875396625666, |
|
"learning_rate": 2.142857142857143e-08, |
|
"logits/chosen": -0.42987775802612305, |
|
"logits/rejected": -0.5015246868133545, |
|
"logps/chosen": -43.15519714355469, |
|
"logps/rejected": -45.89078140258789, |
|
"loss": 40.6293, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0028726663440465927, |
|
"rewards/margins": 0.00013752638187725097, |
|
"rewards/rejected": 0.002735140034928918, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 1133.8011206775125, |
|
"learning_rate": 3.571428571428571e-09, |
|
"logits/chosen": -0.6172088980674744, |
|
"logits/rejected": -0.6021891832351685, |
|
"logps/chosen": -42.21741485595703, |
|
"logps/rejected": -46.169761657714844, |
|
"loss": 39.9958, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.003974457737058401, |
|
"rewards/margins": 0.0007881852798163891, |
|
"rewards/rejected": 0.0031862719915807247, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 40.90790059016301, |
|
"train_runtime": 2754.0963, |
|
"train_samples_per_second": 7.26, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|