{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -2.847970962524414, "logits/rejected": -2.79160213470459, "logps/chosen": -284.9612731933594, "logps/rejected": -276.45928955078125, "loss": 0.2884, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7547335624694824, "logits/rejected": -2.752797842025757, "logps/chosen": -249.9191131591797, "logps/rejected": -223.05352783203125, "loss": 0.2732, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.000286663620499894, "rewards/margins": 0.0003425275208428502, "rewards/rejected": -5.586385304923169e-05, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.7447333335876465, "logits/rejected": -2.745217800140381, "logps/chosen": -257.42864990234375, "logps/rejected": -247.4891357421875, "loss": 0.2745, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.00026875577168539166, "rewards/margins": 0.0008533511427231133, "rewards/rejected": -0.0005845952546223998, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.800112247467041, "logits/rejected": -2.7527897357940674, "logps/chosen": -300.57513427734375, "logps/rejected": -261.90386962890625, "loss": 0.2762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 1.9117258489131927e-05, "rewards/margins": 0.008139841258525848, "rewards/rejected": -0.008120724000036716, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.760014057159424, "logits/rejected": -2.7479090690612793, "logps/chosen": -256.8675537109375, "logps/rejected": -274.7937316894531, "loss": 0.278, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.004336931277066469, "rewards/margins": 0.02276746928691864, "rewards/rejected": -0.02710440196096897, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999733114418725e-07, "logits/chosen": -2.756197929382324, "logits/rejected": -2.728389024734497, "logps/chosen": -285.9724426269531, "logps/rejected": -257.05694580078125, "loss": 0.2674, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.03893030807375908, "rewards/margins": 0.054606568068265915, "rewards/rejected": -0.09353688359260559, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.990398100856366e-07, "logits/chosen": -2.714569091796875, "logits/rejected": -2.70277738571167, "logps/chosen": -287.0633850097656, "logps/rejected": -260.37890625, "loss": 0.2469, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11178895086050034, "rewards/margins": 0.09152142703533173, "rewards/rejected": -0.20331040024757385, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967775735898179e-07, "logits/chosen": -2.756913900375366, "logits/rejected": -2.7149264812469482, "logps/chosen": -284.56011962890625, "logps/rejected": -262.7689514160156, "loss": 0.2154, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1742471307516098, "rewards/margins": 0.13403555750846863, "rewards/rejected": -0.30828267335891724, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.931986719649298e-07, "logits/chosen": -2.7609376907348633, "logits/rejected": -2.734510660171509, "logps/chosen": -284.21551513671875, "logps/rejected": -317.23236083984375, "loss": 0.1847, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.23862528800964355, "rewards/margins": 0.21072354912757874, "rewards/rejected": -0.4493487775325775, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.883222001996351e-07, "logits/chosen": -2.7887558937072754, "logits/rejected": -2.7636470794677734, "logps/chosen": -316.7803039550781, "logps/rejected": -324.9473571777344, "loss": 0.1585, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.4141347408294678, "rewards/margins": 0.3233526349067688, "rewards/rejected": -0.7374873757362366, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.821741763807186e-07, "logits/chosen": -2.7823832035064697, "logits/rejected": -2.7509586811065674, "logps/chosen": -330.6769104003906, "logps/rejected": -338.5577087402344, "loss": 0.1528, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4171191155910492, "rewards/margins": 0.3463096618652344, "rewards/rejected": -0.7634287476539612, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -2.7335572242736816, "eval_logits/rejected": -2.718585968017578, "eval_logps/chosen": -307.4990234375, "eval_logps/rejected": -349.410400390625, "eval_loss": 0.14441701769828796, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -0.5045937895774841, "eval_rewards/margins": 0.4159778654575348, "eval_rewards/rejected": -0.9205717444419861, "eval_runtime": 53.6196, "eval_samples_per_second": 37.3, "eval_steps_per_second": 0.597, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.747874028753375e-07, "logits/chosen": -2.674656629562378, "logits/rejected": -2.6687161922454834, "logps/chosen": -337.2121276855469, "logps/rejected": -396.91595458984375, "loss": 0.12, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7294884920120239, "rewards/margins": 0.5125582814216614, "rewards/rejected": -1.24204683303833, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.662012913161997e-07, "logits/chosen": -2.6504039764404297, "logits/rejected": -2.6141586303710938, "logps/chosen": -368.1474914550781, "logps/rejected": -387.9979553222656, "loss": 0.1101, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0060771703720093, "rewards/margins": 0.4001084864139557, "rewards/rejected": -1.406185507774353, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5646165232345103e-07, "logits/chosen": -2.6267447471618652, "logits/rejected": -2.594045400619507, "logps/chosen": -375.8540954589844, "logps/rejected": -363.2262268066406, "loss": 0.1178, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8372260928153992, "rewards/margins": 0.3926037847995758, "rewards/rejected": -1.2298297882080078, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.456204510851956e-07, "logits/chosen": -2.5409464836120605, "logits/rejected": -2.5215885639190674, "logps/chosen": -319.5188293457031, "logps/rejected": -338.7505798339844, "loss": 0.1225, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.7115974426269531, "rewards/margins": 0.47829103469848633, "rewards/rejected": -1.1898884773254395, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.337355301007335e-07, "logits/chosen": -2.5749340057373047, "logits/rejected": -2.5617096424102783, "logps/chosen": -327.0196228027344, "logps/rejected": -341.3411560058594, "loss": 0.1327, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6178635954856873, "rewards/margins": 0.39589935541152954, "rewards/rejected": -1.0137629508972168, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.2087030056579986e-07, "logits/chosen": -2.564208507537842, "logits/rejected": -2.5471348762512207, "logps/chosen": -320.0544738769531, "logps/rejected": -338.40899658203125, "loss": 0.125, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5907465219497681, "rewards/margins": 0.47800785303115845, "rewards/rejected": -1.0687544345855713, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.070934040463998e-07, "logits/chosen": -2.5135045051574707, "logits/rejected": -2.4910852909088135, "logps/chosen": -342.8435974121094, "logps/rejected": -369.6528625488281, "loss": 0.1216, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7418714165687561, "rewards/margins": 0.44837790727615356, "rewards/rejected": -1.1902493238449097, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.9247834624635404e-07, "logits/chosen": -2.5766987800598145, "logits/rejected": -2.5346732139587402, "logps/chosen": -350.6063537597656, "logps/rejected": -388.07867431640625, "loss": 0.112, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7415702939033508, "rewards/margins": 0.6597684621810913, "rewards/rejected": -1.4013385772705078, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7710310482256523e-07, "logits/chosen": -2.475404977798462, "logits/rejected": -2.454536199569702, "logps/chosen": -358.01678466796875, "logps/rejected": -389.5230712890625, "loss": 0.1116, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0080442428588867, "rewards/margins": 0.5189759135246277, "rewards/rejected": -1.5270202159881592, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.610497133404795e-07, "logits/chosen": -2.582571506500244, "logits/rejected": -2.5270559787750244, "logps/chosen": -408.98175048828125, "logps/rejected": -428.68133544921875, "loss": 0.1108, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7440523505210876, "rewards/margins": 0.7437406778335571, "rewards/rejected": -1.4877930879592896, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": -2.541661024093628, "eval_logits/rejected": -2.5261168479919434, "eval_logps/chosen": -334.8226318359375, "eval_logps/rejected": -400.8387451171875, "eval_loss": 0.1121269091963768, "eval_rewards/accuracies": 0.7421875, "eval_rewards/chosen": -0.7778301239013672, "eval_rewards/margins": 0.657024621963501, "eval_rewards/rejected": -1.4348547458648682, "eval_runtime": 53.592, "eval_samples_per_second": 37.319, "eval_steps_per_second": 0.597, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.4440382358952115e-07, "logits/chosen": -2.515012741088867, "logits/rejected": -2.471090316772461, "logps/chosen": -360.82781982421875, "logps/rejected": -387.8971252441406, "loss": 0.108, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9362117648124695, "rewards/margins": 0.5384734869003296, "rewards/rejected": -1.4746851921081543, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.272542485937368e-07, "logits/chosen": -2.482541561126709, "logits/rejected": -2.4680473804473877, "logps/chosen": -393.7977600097656, "logps/rejected": -409.53424072265625, "loss": 0.1045, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9923732876777649, "rewards/margins": 0.6926249265670776, "rewards/rejected": -1.6849981546401978, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.096924887558854e-07, "logits/chosen": -2.487530469894409, "logits/rejected": -2.484644651412964, "logps/chosen": -383.8633117675781, "logps/rejected": -448.24578857421875, "loss": 0.1028, "rewards/accuracies": 0.71875, "rewards/chosen": -1.0205323696136475, "rewards/margins": 0.5532661080360413, "rewards/rejected": -1.573798418045044, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9181224366319943e-07, "logits/chosen": -2.478008270263672, "logits/rejected": -2.449632167816162, "logps/chosen": -384.19049072265625, "logps/rejected": -452.62060546875, "loss": 0.0937, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3048760890960693, "rewards/margins": 0.6360560655593872, "rewards/rejected": -1.9409319162368774, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.7370891215954565e-07, "logits/chosen": -2.4623560905456543, "logits/rejected": -2.4116673469543457, "logps/chosen": -400.11376953125, "logps/rejected": -440.81707763671875, "loss": 0.0877, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.4118852615356445, "rewards/margins": 0.6420382261276245, "rewards/rejected": -2.0539233684539795, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.55479083351317e-07, "logits/chosen": -2.453117847442627, "logits/rejected": -2.4495043754577637, "logps/chosen": -387.19244384765625, "logps/rejected": -417.88995361328125, "loss": 0.102, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1858361959457397, "rewards/margins": 0.508370578289032, "rewards/rejected": -1.6942065954208374, "step": 260 }, { "epoch": 0.56, "learning_rate": 2.3722002126275822e-07, "logits/chosen": -2.412879467010498, "logits/rejected": -2.405669927597046, "logps/chosen": -398.76544189453125, "logps/rejected": -445.95269775390625, "loss": 0.0953, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2391201257705688, "rewards/margins": 0.6647966504096985, "rewards/rejected": -1.903916597366333, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.19029145890313e-07, "logits/chosen": -2.4127285480499268, "logits/rejected": -2.3809783458709717, "logps/chosen": -374.5039978027344, "logps/rejected": -421.10302734375, "loss": 0.1009, "rewards/accuracies": 0.75, "rewards/chosen": -1.330984354019165, "rewards/margins": 0.5779751539230347, "rewards/rejected": -1.9089596271514893, "step": 280 }, { "epoch": 0.61, "learning_rate": 2.0100351342479216e-07, "logits/chosen": -2.393611192703247, "logits/rejected": -2.351775884628296, "logps/chosen": -355.9366455078125, "logps/rejected": -404.01226806640625, "loss": 0.099, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0932543277740479, "rewards/margins": 0.6044414639472961, "rewards/rejected": -1.6976957321166992, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8323929841460178e-07, "logits/chosen": -2.402987241744995, "logits/rejected": -2.370898723602295, "logps/chosen": -377.071044921875, "logps/rejected": -432.1875915527344, "loss": 0.0931, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2080243825912476, "rewards/margins": 0.5403395891189575, "rewards/rejected": -1.7483640909194946, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": -2.422478199005127, "eval_logits/rejected": -2.404529571533203, "eval_logps/chosen": -374.3614196777344, "eval_logps/rejected": -448.11669921875, "eval_loss": 0.09510383009910583, "eval_rewards/accuracies": 0.734375, "eval_rewards/chosen": -1.1732177734375, "eval_rewards/margins": 0.7344164848327637, "eval_rewards/rejected": -1.9076342582702637, "eval_runtime": 53.5886, "eval_samples_per_second": 37.321, "eval_steps_per_second": 0.597, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.6583128063291573e-07, "logits/chosen": -2.335116147994995, "logits/rejected": -2.2753539085388184, "logps/chosen": -394.2746887207031, "logps/rejected": -439.45281982421875, "loss": 0.0955, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2109578847885132, "rewards/margins": 0.7118672132492065, "rewards/rejected": -1.9228250980377197, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.488723393865766e-07, "logits/chosen": -2.3460986614227295, "logits/rejected": -2.32939076423645, "logps/chosen": -431.77569580078125, "logps/rejected": -426.1026916503906, "loss": 0.0865, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.353933572769165, "rewards/margins": 0.5530282258987427, "rewards/rejected": -1.9069616794586182, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3245295796480788e-07, "logits/chosen": -2.3651063442230225, "logits/rejected": -2.3300068378448486, "logps/chosen": -386.1956481933594, "logps/rejected": -425.67822265625, "loss": 0.0829, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3398241996765137, "rewards/margins": 0.5806491374969482, "rewards/rejected": -1.920473337173462, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1666074087171627e-07, "logits/chosen": -2.309349775314331, "logits/rejected": -2.2918732166290283, "logps/chosen": -387.6492919921875, "logps/rejected": -447.10223388671875, "loss": 0.0903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3640596866607666, "rewards/margins": 0.6877338290214539, "rewards/rejected": -2.0517935752868652, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0157994641835734e-07, "logits/chosen": -2.3288137912750244, "logits/rejected": -2.299982786178589, "logps/chosen": -362.3104553222656, "logps/rejected": -447.4925842285156, "loss": 0.0848, "rewards/accuracies": 0.71875, "rewards/chosen": -1.3636023998260498, "rewards/margins": 0.7043517827987671, "rewards/rejected": -2.0679543018341064, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.729103716819111e-08, "logits/chosen": -2.31933856010437, "logits/rejected": -2.2764596939086914, "logps/chosen": -400.70001220703125, "logps/rejected": -446.9363708496094, "loss": 0.0828, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.3343183994293213, "rewards/margins": 0.7416595816612244, "rewards/rejected": -2.0759778022766113, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.387025063449081e-08, "logits/chosen": -2.2944209575653076, "logits/rejected": -2.294306993484497, "logps/chosen": -382.673583984375, "logps/rejected": -459.6895446777344, "loss": 0.0852, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2742609977722168, "rewards/margins": 0.7799550890922546, "rewards/rejected": -2.054215908050537, "step": 370 }, { "epoch": 0.79, "learning_rate": 6.138919252022435e-08, "logits/chosen": -2.3369266986846924, "logits/rejected": -2.32401967048645, "logps/chosen": -400.67498779296875, "logps/rejected": -433.8631286621094, "loss": 0.0845, "rewards/accuracies": 0.75, "rewards/chosen": -1.3484565019607544, "rewards/margins": 0.6219080686569214, "rewards/rejected": -1.9703645706176758, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.991445467064689e-08, "logits/chosen": -2.32033109664917, "logits/rejected": -2.3084654808044434, "logps/chosen": -381.6254577636719, "logps/rejected": -448.0281677246094, "loss": 0.0797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.244260549545288, "rewards/margins": 0.8163622617721558, "rewards/rejected": -2.0606229305267334, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.9507259776993954e-08, "logits/chosen": -2.353458881378174, "logits/rejected": -2.3455700874328613, "logps/chosen": -439.0157165527344, "logps/rejected": -477.5210876464844, "loss": 0.0932, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3885571956634521, "rewards/margins": 0.6575387120246887, "rewards/rejected": -2.046095609664917, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": -2.373129367828369, "eval_logits/rejected": -2.355799913406372, "eval_logps/chosen": -389.6565246582031, "eval_logps/rejected": -470.91412353515625, "eval_loss": 0.0877470150589943, "eval_rewards/accuracies": 0.74609375, "eval_rewards/chosen": -1.3261686563491821, "eval_rewards/margins": 0.8094395399093628, "eval_rewards/rejected": -2.135608434677124, "eval_runtime": 53.5712, "eval_samples_per_second": 37.333, "eval_steps_per_second": 0.597, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.022313472693447e-08, "logits/chosen": -2.3416953086853027, "logits/rejected": -2.3408725261688232, "logps/chosen": -391.66741943359375, "logps/rejected": -419.4129943847656, "loss": 0.0849, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.369780421257019, "rewards/margins": 0.49969473481178284, "rewards/rejected": -1.8694753646850586, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.2111614344599684e-08, "logits/chosen": -2.3918662071228027, "logits/rejected": -2.3504040241241455, "logps/chosen": -404.1669006347656, "logps/rejected": -459.5892028808594, "loss": 0.0838, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.264682412147522, "rewards/margins": 0.8345810174942017, "rewards/rejected": -2.0992634296417236, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.521597710086439e-08, "logits/chosen": -2.3932044506073, "logits/rejected": -2.3674368858337402, "logps/chosen": -400.2489318847656, "logps/rejected": -435.07525634765625, "loss": 0.0917, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2529577016830444, "rewards/margins": 0.6412702202796936, "rewards/rejected": -1.8942277431488037, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.57301420397924e-09, "logits/chosen": -2.325007200241089, "logits/rejected": -2.3261613845825195, "logps/chosen": -412.94427490234375, "logps/rejected": -436.2431640625, "loss": 0.0885, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.3467872142791748, "rewards/margins": 0.6128811240196228, "rewards/rejected": -1.9596681594848633, "step": 440 }, { "epoch": 0.94, "learning_rate": 5.212833302556258e-09, "logits/chosen": -2.339844226837158, "logits/rejected": -2.3176491260528564, "logps/chosen": -408.6121520996094, "logps/rejected": -473.6128845214844, "loss": 0.0854, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3199455738067627, "rewards/margins": 0.7675041556358337, "rewards/rejected": -2.087449789047241, "step": 450 }, { "epoch": 0.96, "learning_rate": 2.158697848236607e-09, "logits/chosen": -2.3149573802948, "logits/rejected": -2.3090662956237793, "logps/chosen": -390.1296081542969, "logps/rejected": -438.67218017578125, "loss": 0.0818, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2482762336730957, "rewards/margins": 0.6689838171005249, "rewards/rejected": -1.917259931564331, "step": 460 }, { "epoch": 0.98, "learning_rate": 4.269029751107489e-10, "logits/chosen": -2.3617947101593018, "logits/rejected": -2.3339943885803223, "logps/chosen": -408.900390625, "logps/rejected": -437.16357421875, "loss": 0.0866, "rewards/accuracies": 0.75, "rewards/chosen": -1.2303766012191772, "rewards/margins": 0.7840299606323242, "rewards/rejected": -2.014406681060791, "step": 470 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.12649250616588353, "train_runtime": 3963.8799, "train_samples_per_second": 15.423, "train_steps_per_second": 0.121 } ], "logging_steps": 10, "max_steps": 478, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "trial_name": null, "trial_params": null }