|
{ |
|
"best_metric": 0.003762652399018407, |
|
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.1-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and no system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-400", |
|
"epoch": 5.293005671077505, |
|
"eval_steps": 50, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.94673752784729, |
|
"kl": 0.3060356676578522, |
|
"learning_rate": 0.0001666666666666667, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.0194, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.799897313117981, |
|
"kl": 0.08012839406728745, |
|
"learning_rate": 0.00019580052493438322, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.204, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -120.33751678466797, |
|
"eval_logps/rejected": -405.6329345703125, |
|
"eval_loss": 0.02746938355267048, |
|
"eval_rewards/chosen": 5.632839202880859, |
|
"eval_rewards/margins": 25.979337692260742, |
|
"eval_rewards/rejected": -20.346500396728516, |
|
"eval_runtime": 215.3361, |
|
"eval_samples_per_second": 2.322, |
|
"eval_steps_per_second": 0.58, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.1745920479297638, |
|
"kl": 0.20441873371601105, |
|
"learning_rate": 0.0001905511811023622, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0962, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8645330667495728, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018556430446194227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0881, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.13575485348701477, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018031496062992125, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.073, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -118.76765441894531, |
|
"eval_logps/rejected": -394.83203125, |
|
"eval_loss": 0.014343788847327232, |
|
"eval_rewards/chosen": 5.789826393127441, |
|
"eval_rewards/margins": 25.05623435974121, |
|
"eval_rewards/rejected": -19.266408920288086, |
|
"eval_runtime": 215.2593, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.04445016011595726, |
|
"kl": 0.25088077783584595, |
|
"learning_rate": 0.0001750656167979003, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0312, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.08600552380084991, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016981627296587927, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0553, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -118.54534149169922, |
|
"eval_logps/rejected": -501.9826354980469, |
|
"eval_loss": 0.022491294890642166, |
|
"eval_rewards/chosen": 5.812057018280029, |
|
"eval_rewards/margins": 35.79352569580078, |
|
"eval_rewards/rejected": -29.981468200683594, |
|
"eval_runtime": 215.2877, |
|
"eval_samples_per_second": 2.322, |
|
"eval_steps_per_second": 0.581, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.7538838386535645, |
|
"kl": 0.08890169113874435, |
|
"learning_rate": 0.00016456692913385828, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0322, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.1595699042081833, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015931758530183726, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0108, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.2758983373641968, |
|
"kl": 0.011286890134215355, |
|
"learning_rate": 0.0001540682414698163, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0232, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -112.15116882324219, |
|
"eval_logps/rejected": -478.0784912109375, |
|
"eval_loss": 0.0047710007056593895, |
|
"eval_rewards/chosen": 6.451474189758301, |
|
"eval_rewards/margins": 34.04253005981445, |
|
"eval_rewards/rejected": -27.591054916381836, |
|
"eval_runtime": 215.2869, |
|
"eval_samples_per_second": 2.322, |
|
"eval_steps_per_second": 0.581, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.04578682407736778, |
|
"kl": 0.08650239557027817, |
|
"learning_rate": 0.00014881889763779528, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0095, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 4.036252498626709, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001435695538057743, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0519, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -111.8521728515625, |
|
"eval_logps/rejected": -507.0782470703125, |
|
"eval_loss": 0.008064903318881989, |
|
"eval_rewards/chosen": 6.481375217437744, |
|
"eval_rewards/margins": 36.97240447998047, |
|
"eval_rewards/rejected": -30.491031646728516, |
|
"eval_runtime": 215.2836, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.027836302295327187, |
|
"kl": 0.10306696593761444, |
|
"learning_rate": 0.00013832020997375327, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0103, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.5005787014961243, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001330708661417323, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0185, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.010965784080326557, |
|
"kl": 0.008135443553328514, |
|
"learning_rate": 0.0001278215223097113, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0095, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -112.58515930175781, |
|
"eval_logps/rejected": -541.0062866210938, |
|
"eval_loss": 0.0153852179646492, |
|
"eval_rewards/chosen": 6.408076286315918, |
|
"eval_rewards/margins": 40.29190444946289, |
|
"eval_rewards/rejected": -33.88383102416992, |
|
"eval_runtime": 215.4103, |
|
"eval_samples_per_second": 2.321, |
|
"eval_steps_per_second": 0.58, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.03853553906083107, |
|
"kl": 0.09816194325685501, |
|
"learning_rate": 0.0001225721784776903, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0074, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.019064167514443398, |
|
"kl": 0.0, |
|
"learning_rate": 0.00011732283464566928, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0098, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -110.7034683227539, |
|
"eval_logps/rejected": -614.9013671875, |
|
"eval_loss": 0.005230295471847057, |
|
"eval_rewards/chosen": 6.596245288848877, |
|
"eval_rewards/margins": 47.869590759277344, |
|
"eval_rewards/rejected": -41.27334213256836, |
|
"eval_runtime": 215.26, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.011980261653661728, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001120734908136483, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0147, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.02171475999057293, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001068241469816273, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0066, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.013751395046710968, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001015748031496063, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0038, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -111.44074249267578, |
|
"eval_logps/rejected": -539.215576171875, |
|
"eval_loss": 0.003762652399018407, |
|
"eval_rewards/chosen": 6.522518157958984, |
|
"eval_rewards/margins": 40.22727966308594, |
|
"eval_rewards/rejected": -33.70476531982422, |
|
"eval_runtime": 215.2203, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.01895367167890072, |
|
"kl": 0.11727146804332733, |
|
"learning_rate": 9.63254593175853e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.003, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.024156125262379646, |
|
"kl": 0.0, |
|
"learning_rate": 9.10761154855643e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0068, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -113.2169418334961, |
|
"eval_logps/rejected": -635.6953735351562, |
|
"eval_loss": 0.007997407577931881, |
|
"eval_rewards/chosen": 6.34489631652832, |
|
"eval_rewards/margins": 49.6976432800293, |
|
"eval_rewards/rejected": -43.35274887084961, |
|
"eval_runtime": 215.2157, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.011542567051947117, |
|
"kl": 0.0, |
|
"learning_rate": 8.582677165354331e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0049, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.004607899580150843, |
|
"kl": 0.0, |
|
"learning_rate": 8.05774278215223e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0044, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"grad_norm": 0.01574736088514328, |
|
"kl": 0.0, |
|
"learning_rate": 7.532808398950132e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0037, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -111.0267562866211, |
|
"eval_logps/rejected": -647.2003784179688, |
|
"eval_loss": 0.007149172946810722, |
|
"eval_rewards/chosen": 6.56391716003418, |
|
"eval_rewards/margins": 51.067161560058594, |
|
"eval_rewards/rejected": -44.50325012207031, |
|
"eval_runtime": 215.232, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.006896671839058399, |
|
"kl": 0.0, |
|
"learning_rate": 7.007874015748031e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0128, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.011443168856203556, |
|
"kl": 0.0, |
|
"learning_rate": 6.482939632545932e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0032, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -110.33329010009766, |
|
"eval_logps/rejected": -497.2630615234375, |
|
"eval_loss": 0.008454231545329094, |
|
"eval_rewards/chosen": 6.633263111114502, |
|
"eval_rewards/margins": 36.14277648925781, |
|
"eval_rewards/rejected": -29.50951385498047, |
|
"eval_runtime": 215.2369, |
|
"eval_samples_per_second": 2.323, |
|
"eval_steps_per_second": 0.581, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 0.010104779154062271, |
|
"kl": 0.17969775199890137, |
|
"learning_rate": 5.958005249343832e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0361, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.006650259718298912, |
|
"kl": 0.0, |
|
"learning_rate": 5.433070866141733e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.005, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 0.007144566159695387, |
|
"kl": 0.0, |
|
"learning_rate": 4.908136482939633e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0029, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -111.09209442138672, |
|
"eval_logps/rejected": -623.0257568359375, |
|
"eval_loss": 0.0048020947724580765, |
|
"eval_rewards/chosen": 6.557381629943848, |
|
"eval_rewards/margins": 48.643157958984375, |
|
"eval_rewards/rejected": -42.08577346801758, |
|
"eval_runtime": 215.4697, |
|
"eval_samples_per_second": 2.321, |
|
"eval_steps_per_second": 0.58, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.019000574946403503, |
|
"kl": 0.0, |
|
"learning_rate": 4.383202099737533e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0036, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.005893173161894083, |
|
"kl": 0.0, |
|
"learning_rate": 3.858267716535433e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0028, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -110.00257873535156, |
|
"eval_logps/rejected": -615.8130493164062, |
|
"eval_loss": 0.0040972670540213585, |
|
"eval_rewards/chosen": 6.6663336753845215, |
|
"eval_rewards/margins": 48.030853271484375, |
|
"eval_rewards/rejected": -41.36451721191406, |
|
"eval_runtime": 215.5947, |
|
"eval_samples_per_second": 2.319, |
|
"eval_steps_per_second": 0.58, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"grad_norm": 0.007841067388653755, |
|
"kl": 0.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0024, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"grad_norm": 0.0067055970430374146, |
|
"kl": 0.0, |
|
"learning_rate": 2.8083989501312334e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0028, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"grad_norm": 0.008701217360794544, |
|
"kl": 0.10406114161014557, |
|
"learning_rate": 2.283464566929134e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0032, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -109.8930892944336, |
|
"eval_logps/rejected": -614.48583984375, |
|
"eval_loss": 0.003961743786931038, |
|
"eval_rewards/chosen": 6.67728328704834, |
|
"eval_rewards/margins": 47.9090690612793, |
|
"eval_rewards/rejected": -41.231788635253906, |
|
"eval_runtime": 215.5304, |
|
"eval_samples_per_second": 2.32, |
|
"eval_steps_per_second": 0.58, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|