{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998766954377312, "eval_steps": 1000, "global_step": 405, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2195121951219512e-08, "logits/chosen": -2.8695335388183594, "logits/rejected": -2.8522377014160156, "logps/chosen": -537.80126953125, "logps/rejected": -108.91968536376953, "loss": 0.5601, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.219512195121951e-07, "logits/chosen": -2.8006718158721924, "logits/rejected": -2.7512741088867188, "logps/chosen": -339.0959167480469, "logps/rejected": -113.41566467285156, "loss": 0.5529, "rewards/accuracies": 0.5763888955116272, "rewards/chosen": 0.0014224686892703176, "rewards/margins": 0.0021842769347131252, "rewards/rejected": -0.0007618081872351468, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.439024390243902e-07, "logits/chosen": -2.815359592437744, "logits/rejected": -2.8071651458740234, "logps/chosen": -435.11260986328125, "logps/rejected": -116.19319152832031, "loss": 0.5366, "rewards/accuracies": 0.793749988079071, "rewards/chosen": 0.022735606878995895, "rewards/margins": 0.04114392399787903, "rewards/rejected": -0.018408317118883133, "step": 20 }, { "epoch": 0.07, "learning_rate": 3.6585365853658536e-07, "logits/chosen": -2.7216827869415283, "logits/rejected": -2.68468976020813, "logps/chosen": -437.6546325683594, "logps/rejected": -142.09629821777344, "loss": 0.4702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.08811721950769424, "rewards/margins": 0.21381433308124542, "rewards/rejected": -0.12569710612297058, "step": 30 }, { "epoch": 0.1, "learning_rate": 4.878048780487804e-07, "logits/chosen": -2.589413642883301, "logits/rejected": -2.5702805519104004, "logps/chosen": -415.1499938964844, "logps/rejected": -170.38934326171875, "loss": 0.3716, "rewards/accuracies": 0.793749988079071, "rewards/chosen": 0.013831913471221924, "rewards/margins": 0.4765930771827698, "rewards/rejected": -0.4627610743045807, "step": 40 }, { "epoch": 0.12, "learning_rate": 4.992461696250783e-07, "logits/chosen": -2.469589948654175, "logits/rejected": -2.456850528717041, "logps/chosen": -421.22100830078125, "logps/rejected": -211.1681671142578, "loss": 0.281, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12107028812170029, "rewards/margins": 0.793075680732727, "rewards/rejected": -0.9141460657119751, "step": 50 }, { "epoch": 0.15, "learning_rate": 4.966461721767899e-07, "logits/chosen": -2.380545139312744, "logits/rejected": -2.347712755203247, "logps/chosen": -379.7464599609375, "logps/rejected": -211.6461944580078, "loss": 0.1977, "rewards/accuracies": 0.75, "rewards/chosen": -0.34397271275520325, "rewards/margins": 0.8727855682373047, "rewards/rejected": -1.216758370399475, "step": 60 }, { "epoch": 0.17, "learning_rate": 4.922100518015975e-07, "logits/chosen": -2.382647752761841, "logits/rejected": -2.3510990142822266, "logps/chosen": -435.24627685546875, "logps/rejected": -257.90399169921875, "loss": 0.1791, "rewards/accuracies": 0.75, "rewards/chosen": -0.3540270924568176, "rewards/margins": 1.2106399536132812, "rewards/rejected": -1.5646671056747437, "step": 70 }, { "epoch": 0.2, "learning_rate": 4.859708325770919e-07, "logits/chosen": -2.32224178314209, "logits/rejected": -2.277831554412842, "logps/chosen": -448.172607421875, "logps/rejected": -297.2752685546875, "loss": 0.1592, "rewards/accuracies": 0.84375, "rewards/chosen": -0.37227025628089905, "rewards/margins": 1.5082250833511353, "rewards/rejected": -1.880495309829712, "step": 80 }, { "epoch": 0.22, "learning_rate": 4.779749614980225e-07, "logits/chosen": -2.3661742210388184, "logits/rejected": -2.33076810836792, "logps/chosen": -458.77081298828125, "logps/rejected": -333.3035888671875, "loss": 0.1222, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7345383763313293, "rewards/margins": 1.5108360052108765, "rewards/rejected": -2.2453744411468506, "step": 90 }, { "epoch": 0.25, "learning_rate": 4.682819627081427e-07, "logits/chosen": -2.335977554321289, "logits/rejected": -2.281729221343994, "logps/chosen": -503.5619201660156, "logps/rejected": -364.14862060546875, "loss": 0.1166, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8016496896743774, "rewards/margins": 1.7226619720458984, "rewards/rejected": -2.5243115425109863, "step": 100 }, { "epoch": 0.27, "learning_rate": 4.569639943810477e-07, "logits/chosen": -2.3253397941589355, "logits/rejected": -2.2732253074645996, "logps/chosen": -540.4392700195312, "logps/rejected": -381.0623474121094, "loss": 0.0958, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9782747030258179, "rewards/margins": 1.7351051568984985, "rewards/rejected": -2.7133796215057373, "step": 110 }, { "epoch": 0.3, "learning_rate": 4.4410531154874543e-07, "logits/chosen": -2.319655418395996, "logits/rejected": -2.269864082336426, "logps/chosen": -528.8589477539062, "logps/rejected": -398.9247741699219, "loss": 0.0964, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9485586881637573, "rewards/margins": 1.865126609802246, "rewards/rejected": -2.813685178756714, "step": 120 }, { "epoch": 0.32, "learning_rate": 4.298016388768561e-07, "logits/chosen": -2.3149333000183105, "logits/rejected": -2.247347116470337, "logps/chosen": -502.3334045410156, "logps/rejected": -388.2889404296875, "loss": 0.1271, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7547552585601807, "rewards/margins": 1.8656883239746094, "rewards/rejected": -2.620443344116211, "step": 130 }, { "epoch": 0.35, "learning_rate": 4.1415945805573005e-07, "logits/chosen": -2.3083794116973877, "logits/rejected": -2.263607978820801, "logps/chosen": -507.48797607421875, "logps/rejected": -399.94720458984375, "loss": 0.1077, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.6250351071357727, "rewards/margins": 2.218956470489502, "rewards/rejected": -2.843991756439209, "step": 140 }, { "epoch": 0.37, "learning_rate": 3.972952151123984e-07, "logits/chosen": -2.271951198577881, "logits/rejected": -2.212960720062256, "logps/chosen": -497.3548889160156, "logps/rejected": -402.63250732421875, "loss": 0.1228, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.729441225528717, "rewards/margins": 2.022250175476074, "rewards/rejected": -2.7516913414001465, "step": 150 }, { "epoch": 0.39, "learning_rate": 3.793344535444142e-07, "logits/chosen": -2.3041064739227295, "logits/rejected": -2.241098642349243, "logps/chosen": -538.1851806640625, "logps/rejected": -396.97491455078125, "loss": 0.108, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.7181452512741089, "rewards/margins": 2.0927627086639404, "rewards/rejected": -2.810908079147339, "step": 160 }, { "epoch": 0.42, "learning_rate": 3.604108797288461e-07, "logits/chosen": -2.2430710792541504, "logits/rejected": -2.186084270477295, "logps/chosen": -503.18536376953125, "logps/rejected": -424.78863525390625, "loss": 0.0967, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.9911813735961914, "rewards/margins": 2.0638327598571777, "rewards/rejected": -3.055014133453369, "step": 170 }, { "epoch": 0.44, "learning_rate": 3.40665367563858e-07, "logits/chosen": -2.2562966346740723, "logits/rejected": -2.17518949508667, "logps/chosen": -452.6295471191406, "logps/rejected": -332.67901611328125, "loss": 0.1233, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7905207276344299, "rewards/margins": 1.5971567630767822, "rewards/rejected": -2.3876776695251465, "step": 180 }, { "epoch": 0.47, "learning_rate": 3.202449097526798e-07, "logits/chosen": -2.253117084503174, "logits/rejected": -2.1730282306671143, "logps/chosen": -486.3101501464844, "logps/rejected": -395.2755432128906, "loss": 0.1155, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9943073987960815, "rewards/margins": 1.831038475036621, "rewards/rejected": -2.825345993041992, "step": 190 }, { "epoch": 0.49, "learning_rate": 2.993015235369905e-07, "logits/chosen": -2.2392661571502686, "logits/rejected": -2.1586246490478516, "logps/chosen": -517.7527465820312, "logps/rejected": -401.18096923828125, "loss": 0.1106, "rewards/accuracies": 0.875, "rewards/chosen": -0.8338342905044556, "rewards/margins": 1.99163818359375, "rewards/rejected": -2.825472354888916, "step": 200 }, { "epoch": 0.52, "learning_rate": 2.7799111902582693e-07, "logits/chosen": -2.195733070373535, "logits/rejected": -2.118263006210327, "logps/chosen": -480.4593200683594, "logps/rejected": -390.95550537109375, "loss": 0.1078, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9628806114196777, "rewards/margins": 1.8329284191131592, "rewards/rejected": -2.795809268951416, "step": 210 }, { "epoch": 0.54, "learning_rate": 2.564723385445869e-07, "logits/chosen": -2.1850428581237793, "logits/rejected": -2.105459213256836, "logps/chosen": -511.00762939453125, "logps/rejected": -415.4864807128906, "loss": 0.1227, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.7178577184677124, "rewards/margins": 2.1069018840789795, "rewards/rejected": -2.8247594833374023, "step": 220 }, { "epoch": 0.57, "learning_rate": 2.3490537564442845e-07, "logits/chosen": -2.2125039100646973, "logits/rejected": -2.1424784660339355, "logps/chosen": -482.50213623046875, "logps/rejected": -388.6355285644531, "loss": 0.1171, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7609508037567139, "rewards/margins": 1.9426014423370361, "rewards/rejected": -2.703552722930908, "step": 230 }, { "epoch": 0.59, "learning_rate": 2.1345078256378801e-07, "logits/chosen": -2.201641798019409, "logits/rejected": -2.111548900604248, "logps/chosen": -484.5047912597656, "logps/rejected": -418.87945556640625, "loss": 0.0928, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7977638244628906, "rewards/margins": 2.190457344055176, "rewards/rejected": -2.9882209300994873, "step": 240 }, { "epoch": 0.62, "learning_rate": 1.9226827501969865e-07, "logits/chosen": -2.181918144226074, "logits/rejected": -2.0827794075012207, "logps/chosen": -518.0274658203125, "logps/rejected": -448.84912109375, "loss": 0.1207, "rewards/accuracies": 0.875, "rewards/chosen": -0.8979538083076477, "rewards/margins": 2.4133543968200684, "rewards/rejected": -3.3113083839416504, "step": 250 }, { "epoch": 0.64, "learning_rate": 1.715155432264775e-07, "logits/chosen": -2.1671414375305176, "logits/rejected": -2.077252149581909, "logps/chosen": -475.8624572753906, "logps/rejected": -393.2193298339844, "loss": 0.1295, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9034333229064941, "rewards/margins": 1.9351081848144531, "rewards/rejected": -2.8385415077209473, "step": 260 }, { "epoch": 0.67, "learning_rate": 1.51347077992983e-07, "logits/chosen": -2.1908605098724365, "logits/rejected": -2.0970406532287598, "logps/chosen": -487.83563232421875, "logps/rejected": -409.66546630859375, "loss": 0.1009, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.9220987558364868, "rewards/margins": 1.9496616125106812, "rewards/rejected": -2.871760845184326, "step": 270 }, { "epoch": 0.69, "learning_rate": 1.3191302063739906e-07, "logits/chosen": -2.190361261367798, "logits/rejected": -2.0984115600585938, "logps/chosen": -508.9253845214844, "logps/rejected": -422.1849060058594, "loss": 0.0899, "rewards/accuracies": 0.8125, "rewards/chosen": -1.112410545349121, "rewards/margins": 1.9581248760223389, "rewards/rejected": -3.070535659790039, "step": 280 }, { "epoch": 0.72, "learning_rate": 1.1335804528119475e-07, "logits/chosen": -2.1762964725494385, "logits/rejected": -2.0631372928619385, "logps/chosen": -554.19580078125, "logps/rejected": -427.8614196777344, "loss": 0.084, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0812236070632935, "rewards/margins": 2.109238862991333, "rewards/rejected": -3.190462827682495, "step": 290 }, { "epoch": 0.74, "learning_rate": 9.582028184286423e-08, "logits/chosen": -2.1851372718811035, "logits/rejected": -2.1166481971740723, "logps/chosen": -458.4752502441406, "logps/rejected": -388.1811828613281, "loss": 0.1002, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9944161176681519, "rewards/margins": 1.8174244165420532, "rewards/rejected": -2.811840534210205, "step": 300 }, { "epoch": 0.76, "learning_rate": 7.943028774907065e-08, "logits/chosen": -2.173356533050537, "logits/rejected": -2.102245330810547, "logps/chosen": -476.6107482910156, "logps/rejected": -382.61737060546875, "loss": 0.1083, "rewards/accuracies": 0.8125, "rewards/chosen": -0.709411084651947, "rewards/margins": 2.04428768157959, "rewards/rejected": -2.7536988258361816, "step": 310 }, { "epoch": 0.79, "learning_rate": 6.431007601814637e-08, "logits/chosen": -2.2670390605926514, "logits/rejected": -2.171151638031006, "logps/chosen": -525.9238891601562, "logps/rejected": -428.8578186035156, "loss": 0.0967, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9797876477241516, "rewards/margins": 2.1045024394989014, "rewards/rejected": -3.084290027618408, "step": 320 }, { "epoch": 0.81, "learning_rate": 5.0572206951246e-08, "logits/chosen": -2.2066216468811035, "logits/rejected": -2.0978851318359375, "logps/chosen": -530.3206787109375, "logps/rejected": -422.1839904785156, "loss": 0.0962, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7779099941253662, "rewards/margins": 2.2902636528015137, "rewards/rejected": -3.068173885345459, "step": 330 }, { "epoch": 0.84, "learning_rate": 3.831895019292897e-08, "logits/chosen": -2.2389538288116455, "logits/rejected": -2.1464695930480957, "logps/chosen": -548.3888549804688, "logps/rejected": -455.033935546875, "loss": 0.0911, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.8341192007064819, "rewards/margins": 2.394991874694824, "rewards/rejected": -3.2291111946105957, "step": 340 }, { "epoch": 0.86, "learning_rate": 2.764152339909756e-08, "logits/chosen": -2.2050280570983887, "logits/rejected": -2.1107256412506104, "logps/chosen": -484.3163146972656, "logps/rejected": -392.2890319824219, "loss": 0.1003, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9081208109855652, "rewards/margins": 1.9725834131240845, "rewards/rejected": -2.880704402923584, "step": 350 }, { "epoch": 0.89, "learning_rate": 1.861941317991664e-08, "logits/chosen": -2.1940646171569824, "logits/rejected": -2.1099205017089844, "logps/chosen": -481.4151306152344, "logps/rejected": -388.9750061035156, "loss": 0.1004, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.9678090810775757, "rewards/margins": 1.6934540271759033, "rewards/rejected": -2.6612629890441895, "step": 360 }, { "epoch": 0.91, "learning_rate": 1.13197833728636e-08, "logits/chosen": -2.192645788192749, "logits/rejected": -2.105945110321045, "logps/chosen": -499.65277099609375, "logps/rejected": -396.80560302734375, "loss": 0.101, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8237529993057251, "rewards/margins": 2.0019845962524414, "rewards/rejected": -2.825737714767456, "step": 370 }, { "epoch": 0.94, "learning_rate": 5.79697505093521e-09, "logits/chosen": -2.1727986335754395, "logits/rejected": -2.0996174812316895, "logps/chosen": -452.10504150390625, "logps/rejected": -356.10546875, "loss": 0.0947, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6749576330184937, "rewards/margins": 1.8276859521865845, "rewards/rejected": -2.5026438236236572, "step": 380 }, { "epoch": 0.96, "learning_rate": 2.092101988131256e-09, "logits/chosen": -2.247488498687744, "logits/rejected": -2.1600608825683594, "logps/chosen": -512.2217407226562, "logps/rejected": -402.2645568847656, "loss": 0.0923, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.7571254968643188, "rewards/margins": 2.047520399093628, "rewards/rejected": -2.8046462535858154, "step": 390 }, { "epoch": 0.99, "learning_rate": 2.327445937151673e-10, "logits/chosen": -2.178170680999756, "logits/rejected": -2.1037449836730957, "logps/chosen": -498.6328125, "logps/rejected": -398.8753967285156, "loss": 0.11, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.8009634017944336, "rewards/margins": 1.9531538486480713, "rewards/rejected": -2.754117250442505, "step": 400 }, { "epoch": 1.0, "step": 405, "total_flos": 0.0, "train_loss": 0.15393988585766452, "train_runtime": 3212.9353, "train_samples_per_second": 16.152, "train_steps_per_second": 0.126 } ], "logging_steps": 10, "max_steps": 405, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "trial_name": null, "trial_params": null }