|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020931449502878076, |
|
"grad_norm": 4.875121866371553, |
|
"learning_rate": 4.166666666666666e-09, |
|
"logits/chosen": -2.238138437271118, |
|
"logits/rejected": -2.554456949234009, |
|
"logps/chosen": -443.7523193359375, |
|
"logps/rejected": -491.8927001953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 5.553929970393955, |
|
"learning_rate": 4.166666666666667e-08, |
|
"logits/chosen": -2.4126930236816406, |
|
"logits/rejected": -2.5005030632019043, |
|
"logps/chosen": -418.43328857421875, |
|
"logps/rejected": -405.0360107421875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0017023859545588493, |
|
"rewards/margins": 0.00048581857117824256, |
|
"rewards/rejected": 0.0012165673542767763, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 4.513029874801273, |
|
"learning_rate": 8.333333333333334e-08, |
|
"logits/chosen": -2.208683490753174, |
|
"logits/rejected": -2.485910415649414, |
|
"logps/chosen": -428.45208740234375, |
|
"logps/rejected": -408.13763427734375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0008482746779918671, |
|
"rewards/margins": -0.00037219192017801106, |
|
"rewards/rejected": 0.0012204666854813695, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 4.637552468831084, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -2.224863290786743, |
|
"logits/rejected": -2.4407901763916016, |
|
"logps/chosen": -398.6038818359375, |
|
"logps/rejected": -367.05999755859375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0041518621146678925, |
|
"rewards/margins": 0.0011339159682393074, |
|
"rewards/rejected": -0.005285778548568487, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 4.657136939144448, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"logits/chosen": -2.3235936164855957, |
|
"logits/rejected": -2.4915928840637207, |
|
"logps/chosen": -372.97442626953125, |
|
"logps/rejected": -390.05841064453125, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.019573217257857323, |
|
"rewards/margins": 0.007190874312072992, |
|
"rewards/rejected": -0.026764091104269028, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 4.947790369246717, |
|
"learning_rate": 1.9998927475076103e-07, |
|
"logits/chosen": -2.1541531085968018, |
|
"logits/rejected": -2.355862855911255, |
|
"logps/chosen": -408.7329406738281, |
|
"logps/rejected": -406.50347900390625, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.04146841913461685, |
|
"rewards/margins": 0.02013658545911312, |
|
"rewards/rejected": -0.061604999005794525, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 6.135445605235113, |
|
"learning_rate": 1.9961413253717213e-07, |
|
"logits/chosen": -2.120229482650757, |
|
"logits/rejected": -2.287370204925537, |
|
"logps/chosen": -376.740234375, |
|
"logps/rejected": -386.8778381347656, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.08536554872989655, |
|
"rewards/margins": 0.03690432757139206, |
|
"rewards/rejected": -0.12226986885070801, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 5.2300665585071835, |
|
"learning_rate": 1.9870502626379125e-07, |
|
"logits/chosen": -2.208547830581665, |
|
"logits/rejected": -2.316659927368164, |
|
"logps/chosen": -425.2916564941406, |
|
"logps/rejected": -429.31463623046875, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.14128030836582184, |
|
"rewards/margins": 0.05471445247530937, |
|
"rewards/rejected": -0.1959947645664215, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 6.361729619349137, |
|
"learning_rate": 1.9726682903510838e-07, |
|
"logits/chosen": -1.8886642456054688, |
|
"logits/rejected": -2.2390127182006836, |
|
"logps/chosen": -470.6441955566406, |
|
"logps/rejected": -419.4126892089844, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2689892053604126, |
|
"rewards/margins": 0.07578183710575104, |
|
"rewards/rejected": -0.34477105736732483, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 7.250967252041406, |
|
"learning_rate": 1.9530725005474194e-07, |
|
"logits/chosen": -2.3355867862701416, |
|
"logits/rejected": -2.404792070388794, |
|
"logps/chosen": -411.76806640625, |
|
"logps/rejected": -441.7333068847656, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.42172950506210327, |
|
"rewards/margins": 0.12971071898937225, |
|
"rewards/rejected": -0.5514402985572815, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 7.1454110672964335, |
|
"learning_rate": 1.9283679330160724e-07, |
|
"logits/chosen": -2.2639448642730713, |
|
"logits/rejected": -2.5537800788879395, |
|
"logps/chosen": -477.0587463378906, |
|
"logps/rejected": -489.705810546875, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6349204778671265, |
|
"rewards/margins": 0.18245458602905273, |
|
"rewards/rejected": -0.8173751831054688, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"eval_logits/chosen": -2.2922377586364746, |
|
"eval_logits/rejected": -2.4565351009368896, |
|
"eval_logps/chosen": -472.2982177734375, |
|
"eval_logps/rejected": -487.7696533203125, |
|
"eval_loss": 0.6359348893165588, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -0.675361156463623, |
|
"eval_rewards/margins": 0.2425757199525833, |
|
"eval_rewards/rejected": -0.9179368615150452, |
|
"eval_runtime": 88.9262, |
|
"eval_samples_per_second": 22.491, |
|
"eval_steps_per_second": 0.708, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 9.360622478279684, |
|
"learning_rate": 1.898687012251826e-07, |
|
"logits/chosen": -2.217447280883789, |
|
"logits/rejected": -2.3863320350646973, |
|
"logps/chosen": -481.96990966796875, |
|
"logps/rejected": -499.48345947265625, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7452836036682129, |
|
"rewards/margins": 0.209157794713974, |
|
"rewards/rejected": -0.9544414281845093, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 7.953755036427896, |
|
"learning_rate": 1.8641888376168482e-07, |
|
"logits/chosen": -2.2092318534851074, |
|
"logits/rejected": -2.2929816246032715, |
|
"logps/chosen": -454.405517578125, |
|
"logps/rejected": -497.1351623535156, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7448408007621765, |
|
"rewards/margins": 0.29463425278663635, |
|
"rewards/rejected": -1.0394752025604248, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 8.821105331401093, |
|
"learning_rate": 1.8250583305165094e-07, |
|
"logits/chosen": -2.2061495780944824, |
|
"logits/rejected": -2.3711869716644287, |
|
"logps/chosen": -472.7056579589844, |
|
"logps/rejected": -487.33880615234375, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7287603616714478, |
|
"rewards/margins": 0.2083979845046997, |
|
"rewards/rejected": -0.9371584057807922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 9.167325969849378, |
|
"learning_rate": 1.78150524316067e-07, |
|
"logits/chosen": -2.2468433380126953, |
|
"logits/rejected": -2.466036319732666, |
|
"logps/chosen": -501.697021484375, |
|
"logps/rejected": -497.5772399902344, |
|
"loss": 0.6195, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7497612237930298, |
|
"rewards/margins": 0.30543631315231323, |
|
"rewards/rejected": -1.0551974773406982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 10.828055616866019, |
|
"learning_rate": 1.7337630342238038e-07, |
|
"logits/chosen": -2.163837432861328, |
|
"logits/rejected": -2.328864574432373, |
|
"logps/chosen": -474.3462829589844, |
|
"logps/rejected": -480.0904846191406, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7979869246482849, |
|
"rewards/margins": 0.22926858067512512, |
|
"rewards/rejected": -1.0272555351257324, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 9.907119624068729, |
|
"learning_rate": 1.682087617430782e-07, |
|
"logits/chosen": -2.1256282329559326, |
|
"logits/rejected": -2.4207208156585693, |
|
"logps/chosen": -476.00933837890625, |
|
"logps/rejected": -491.25799560546875, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8471584320068359, |
|
"rewards/margins": 0.2906045913696289, |
|
"rewards/rejected": -1.1377630233764648, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 10.130673374633192, |
|
"learning_rate": 1.6267559897763025e-07, |
|
"logits/chosen": -2.240748405456543, |
|
"logits/rejected": -2.3730461597442627, |
|
"logps/chosen": -466.5884704589844, |
|
"logps/rejected": -470.2240295410156, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8948806524276733, |
|
"rewards/margins": 0.24292059242725372, |
|
"rewards/rejected": -1.137801170349121, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 12.664244024162585, |
|
"learning_rate": 1.5680647467311557e-07, |
|
"logits/chosen": -2.3886361122131348, |
|
"logits/rejected": -2.48551344871521, |
|
"logps/chosen": -466.68115234375, |
|
"logps/rejected": -481.260498046875, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8065212965011597, |
|
"rewards/margins": 0.28530603647232056, |
|
"rewards/rejected": -1.091827392578125, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 15.413041204374277, |
|
"learning_rate": 1.506328492394303e-07, |
|
"logits/chosen": -2.425926685333252, |
|
"logits/rejected": -2.436657190322876, |
|
"logps/chosen": -480.2686462402344, |
|
"logps/rejected": -514.1541137695312, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0268957614898682, |
|
"rewards/margins": 0.26106053590774536, |
|
"rewards/rejected": -1.2879562377929688, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 16.30024056431674, |
|
"learning_rate": 1.4418781531128634e-07, |
|
"logits/chosen": -2.3286993503570557, |
|
"logits/rejected": -2.387202024459839, |
|
"logps/chosen": -454.547119140625, |
|
"logps/rejected": -511.773681640625, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8713696599006653, |
|
"rewards/margins": 0.2568342685699463, |
|
"rewards/rejected": -1.1282037496566772, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"eval_logits/chosen": -2.293304443359375, |
|
"eval_logits/rejected": -2.447746753692627, |
|
"eval_logps/chosen": -484.72442626953125, |
|
"eval_logps/rejected": -515.6393432617188, |
|
"eval_loss": 0.5989560484886169, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.7996230125427246, |
|
"eval_rewards/margins": 0.39701077342033386, |
|
"eval_rewards/rejected": -1.1966338157653809, |
|
"eval_runtime": 88.7991, |
|
"eval_samples_per_second": 22.523, |
|
"eval_steps_per_second": 0.709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 12.590959189684769, |
|
"learning_rate": 1.375059203609562e-07, |
|
"logits/chosen": -2.251105785369873, |
|
"logits/rejected": -2.49545955657959, |
|
"logps/chosen": -514.7989501953125, |
|
"logps/rejected": -508.8777770996094, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9383622407913208, |
|
"rewards/margins": 0.3089975416660309, |
|
"rewards/rejected": -1.2473597526550293, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 32.27211919256004, |
|
"learning_rate": 1.306229815126159e-07, |
|
"logits/chosen": -2.374002456665039, |
|
"logits/rejected": -2.5104002952575684, |
|
"logps/chosen": -453.17889404296875, |
|
"logps/rejected": -502.31829833984375, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0016330480575562, |
|
"rewards/margins": 0.3531147539615631, |
|
"rewards/rejected": -1.3547478914260864, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 11.074374701972996, |
|
"learning_rate": 1.2357589355094274e-07, |
|
"logits/chosen": -2.240893602371216, |
|
"logits/rejected": -2.4365756511688232, |
|
"logps/chosen": -464.9483947753906, |
|
"logps/rejected": -497.55950927734375, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8673335909843445, |
|
"rewards/margins": 0.4288042187690735, |
|
"rewards/rejected": -1.2961379289627075, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 13.608161796310325, |
|
"learning_rate": 1.1640243115310217e-07, |
|
"logits/chosen": -2.263231039047241, |
|
"logits/rejected": -2.374429225921631, |
|
"logps/chosen": -483.5979919433594, |
|
"logps/rejected": -511.7247009277344, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8133866190910339, |
|
"rewards/margins": 0.35704511404037476, |
|
"rewards/rejected": -1.1704318523406982, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 14.904992006409358, |
|
"learning_rate": 1.0914104640422679e-07, |
|
"logits/chosen": -2.312152862548828, |
|
"logits/rejected": -2.504575490951538, |
|
"logps/chosen": -487.4195861816406, |
|
"logps/rejected": -509.62213134765625, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9289507865905762, |
|
"rewards/margins": 0.4651150703430176, |
|
"rewards/rejected": -1.3940656185150146, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 32.859126344847056, |
|
"learning_rate": 1.0183066268176774e-07, |
|
"logits/chosen": -2.452216863632202, |
|
"logits/rejected": -2.5787224769592285, |
|
"logps/chosen": -454.101318359375, |
|
"logps/rejected": -491.07708740234375, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8231406211853027, |
|
"rewards/margins": 0.37211668491363525, |
|
"rewards/rejected": -1.1952574253082275, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 16.410575278967542, |
|
"learning_rate": 9.451046601356724e-08, |
|
"logits/chosen": -2.4211385250091553, |
|
"logits/rejected": -2.5718777179718018, |
|
"logps/chosen": -482.42889404296875, |
|
"logps/rejected": -517.08447265625, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7412260174751282, |
|
"rewards/margins": 0.46059027314186096, |
|
"rewards/rejected": -1.201816439628601, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 14.64481409505789, |
|
"learning_rate": 8.721969502803953e-08, |
|
"logits/chosen": -2.414080858230591, |
|
"logits/rejected": -2.641306161880493, |
|
"logps/chosen": -471.8504943847656, |
|
"logps/rejected": -492.3824157714844, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9498642086982727, |
|
"rewards/margins": 0.3709770143032074, |
|
"rewards/rejected": -1.3208411931991577, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 21.87484189841818, |
|
"learning_rate": 7.999743062239557e-08, |
|
"logits/chosen": -2.5216970443725586, |
|
"logits/rejected": -2.5266430377960205, |
|
"logps/chosen": -452.1351623535156, |
|
"logps/rejected": -507.50408935546875, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9256707429885864, |
|
"rewards/margins": 0.38739797472953796, |
|
"rewards/rejected": -1.3130687475204468, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 13.23460942074812, |
|
"learning_rate": 7.28823864763583e-08, |
|
"logits/chosen": -2.3628604412078857, |
|
"logits/rejected": -2.5071964263916016, |
|
"logps/chosen": -530.2737426757812, |
|
"logps/rejected": -534.9356689453125, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9033306241035461, |
|
"rewards/margins": 0.409872442483902, |
|
"rewards/rejected": -1.313202977180481, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"eval_logits/chosen": -2.3505780696868896, |
|
"eval_logits/rejected": -2.500311851501465, |
|
"eval_logps/chosen": -511.9820861816406, |
|
"eval_logps/rejected": -562.04541015625, |
|
"eval_loss": 0.5819065570831299, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -1.0721999406814575, |
|
"eval_rewards/margins": 0.5884942412376404, |
|
"eval_rewards/rejected": -1.6606942415237427, |
|
"eval_runtime": 88.8035, |
|
"eval_samples_per_second": 22.522, |
|
"eval_steps_per_second": 0.709, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 23.240653261962176, |
|
"learning_rate": 6.591270153428288e-08, |
|
"logits/chosen": -2.3066353797912598, |
|
"logits/rejected": -2.4188685417175293, |
|
"logps/chosen": -530.1605224609375, |
|
"logps/rejected": -555.5882568359375, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0851608514785767, |
|
"rewards/margins": 0.6294665932655334, |
|
"rewards/rejected": -1.7146275043487549, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 12.35925417664361, |
|
"learning_rate": 5.912573556804452e-08, |
|
"logits/chosen": -2.4511845111846924, |
|
"logits/rejected": -2.5960700511932373, |
|
"logps/chosen": -462.8910217285156, |
|
"logps/rejected": -492.77459716796875, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9141901135444641, |
|
"rewards/margins": 0.49542441964149475, |
|
"rewards/rejected": -1.4096145629882812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 19.635922794228048, |
|
"learning_rate": 5.255786891654399e-08, |
|
"logits/chosen": -2.2881722450256348, |
|
"logits/rejected": -2.3245983123779297, |
|
"logps/chosen": -490.61956787109375, |
|
"logps/rejected": -528.5936279296875, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0118191242218018, |
|
"rewards/margins": 0.3562072217464447, |
|
"rewards/rejected": -1.3680263757705688, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 34.0341920873177, |
|
"learning_rate": 4.624430747529102e-08, |
|
"logits/chosen": -2.2541534900665283, |
|
"logits/rejected": -2.3677923679351807, |
|
"logps/chosen": -520.711181640625, |
|
"logps/rejected": -555.8665771484375, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1455854177474976, |
|
"rewards/margins": 0.44834479689598083, |
|
"rewards/rejected": -1.5939301252365112, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 20.184086200131315, |
|
"learning_rate": 4.0218893981385925e-08, |
|
"logits/chosen": -2.336240291595459, |
|
"logits/rejected": -2.5228190422058105, |
|
"logps/chosen": -490.032470703125, |
|
"logps/rejected": -514.3966064453125, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1221544742584229, |
|
"rewards/margins": 0.41546517610549927, |
|
"rewards/rejected": -1.5376195907592773, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 14.840705395348046, |
|
"learning_rate": 3.45139266054715e-08, |
|
"logits/chosen": -2.3588707447052, |
|
"logits/rejected": -2.5286855697631836, |
|
"logps/chosen": -525.8394775390625, |
|
"logps/rejected": -543.2139892578125, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9700316190719604, |
|
"rewards/margins": 0.42892080545425415, |
|
"rewards/rejected": -1.3989523649215698, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 12.56992511385935, |
|
"learning_rate": 2.9159985823062993e-08, |
|
"logits/chosen": -2.4362387657165527, |
|
"logits/rejected": -2.588212251663208, |
|
"logps/chosen": -469.63018798828125, |
|
"logps/rejected": -491.34185791015625, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9046362638473511, |
|
"rewards/margins": 0.42833614349365234, |
|
"rewards/rejected": -1.332972526550293, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 14.216122099186137, |
|
"learning_rate": 2.4185770493280577e-08, |
|
"logits/chosen": -2.4785826206207275, |
|
"logits/rejected": -2.5475876331329346, |
|
"logps/chosen": -463.3335876464844, |
|
"logps/rejected": -562.8516235351562, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0568846464157104, |
|
"rewards/margins": 0.6403349041938782, |
|
"rewards/rejected": -1.6972196102142334, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 17.166403382209694, |
|
"learning_rate": 1.9617944023656108e-08, |
|
"logits/chosen": -2.3412299156188965, |
|
"logits/rejected": -2.431159257888794, |
|
"logps/chosen": -569.6896362304688, |
|
"logps/rejected": -604.4752197265625, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.135259985923767, |
|
"rewards/margins": 0.5612015724182129, |
|
"rewards/rejected": -1.6964616775512695, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 25.5326876410102, |
|
"learning_rate": 1.5480991445620538e-08, |
|
"logits/chosen": -2.438910961151123, |
|
"logits/rejected": -2.621582269668579, |
|
"logps/chosen": -477.71551513671875, |
|
"logps/rejected": -516.8345336914062, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0198707580566406, |
|
"rewards/margins": 0.485908567905426, |
|
"rewards/rejected": -1.5057791471481323, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"eval_logits/chosen": -2.4454309940338135, |
|
"eval_logits/rejected": -2.60603404045105, |
|
"eval_logps/chosen": -509.0269470214844, |
|
"eval_logps/rejected": -557.9309692382812, |
|
"eval_loss": 0.5776250958442688, |
|
"eval_rewards/accuracies": 0.7063491940498352, |
|
"eval_rewards/chosen": -1.042648196220398, |
|
"eval_rewards/margins": 0.5769018530845642, |
|
"eval_rewards/rejected": -1.619550108909607, |
|
"eval_runtime": 88.8844, |
|
"eval_samples_per_second": 22.501, |
|
"eval_steps_per_second": 0.709, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 12.2363803809367, |
|
"learning_rate": 1.1797088166794e-08, |
|
"logits/chosen": -2.327822208404541, |
|
"logits/rejected": -2.539658308029175, |
|
"logps/chosen": -523.35693359375, |
|
"logps/rejected": -556.1873168945312, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0230482816696167, |
|
"rewards/margins": 0.5963117480278015, |
|
"rewards/rejected": -1.6193599700927734, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 17.1630701293683, |
|
"learning_rate": 8.585981103608341e-09, |
|
"logits/chosen": -2.3502843379974365, |
|
"logits/rejected": -2.5074477195739746, |
|
"logps/chosen": -481.4237365722656, |
|
"logps/rejected": -559.5806884765625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0329768657684326, |
|
"rewards/margins": 0.5681900978088379, |
|
"rewards/rejected": -1.6011669635772705, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 16.184790708379772, |
|
"learning_rate": 5.864882831430273e-09, |
|
"logits/chosen": -2.352280378341675, |
|
"logits/rejected": -2.436026096343994, |
|
"logps/chosen": -513.5238647460938, |
|
"logps/rejected": -551.8958129882812, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0582252740859985, |
|
"rewards/margins": 0.5332168340682983, |
|
"rewards/rejected": -1.5914418697357178, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 17.526839475687186, |
|
"learning_rate": 3.6483793195745682e-09, |
|
"logits/chosen": -2.3311455249786377, |
|
"logits/rejected": -2.440988063812256, |
|
"logps/chosen": -482.4281311035156, |
|
"logps/rejected": -498.60345458984375, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0323375463485718, |
|
"rewards/margins": 0.4054194390773773, |
|
"rewards/rejected": -1.4377570152282715, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 14.705602904039639, |
|
"learning_rate": 1.9483517457776433e-09, |
|
"logits/chosen": -2.2350025177001953, |
|
"logits/rejected": -2.3830924034118652, |
|
"logps/chosen": -490.513427734375, |
|
"logps/rejected": -551.2727661132812, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0369895696640015, |
|
"rewards/margins": 0.5606644153594971, |
|
"rewards/rejected": -1.597654104232788, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 15.228089724513376, |
|
"learning_rate": 7.739128092312918e-10, |
|
"logits/chosen": -2.281054973602295, |
|
"logits/rejected": -2.4768524169921875, |
|
"logps/chosen": -496.84814453125, |
|
"logps/rejected": -510.46258544921875, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0984748601913452, |
|
"rewards/margins": 0.47915878891944885, |
|
"rewards/rejected": -1.5776336193084717, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 17.607957497609636, |
|
"learning_rate": 1.313578835593465e-10, |
|
"logits/chosen": -2.3311634063720703, |
|
"logits/rejected": -2.4415996074676514, |
|
"logps/chosen": -519.3492431640625, |
|
"logps/rejected": -541.9041137695312, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0364539623260498, |
|
"rewards/margins": 0.33034905791282654, |
|
"rewards/rejected": -1.3668031692504883, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6095632167232361, |
|
"train_runtime": 6900.3625, |
|
"train_samples_per_second": 8.86, |
|
"train_steps_per_second": 0.069 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|