|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.968, |
|
"eval_steps": 100, |
|
"global_step": 248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": 0.02732202410697937, |
|
"logits/rejected": 0.16736462712287903, |
|
"logps/chosen": -204.44515991210938, |
|
"logps/rejected": -186.30474853515625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": 0.11495557427406311, |
|
"logits/rejected": 0.14849303662776947, |
|
"logps/chosen": -174.2774658203125, |
|
"logps/rejected": -139.304443359375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": 0.0010303932940587401, |
|
"rewards/margins": 0.0013937298208475113, |
|
"rewards/rejected": -0.0003633367014117539, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.19859905540943146, |
|
"logits/rejected": 0.2755558490753174, |
|
"logps/chosen": -186.06753540039062, |
|
"logps/rejected": -150.23538208007812, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.0006829313351772726, |
|
"rewards/margins": 0.0010883348295465112, |
|
"rewards/rejected": -0.0017712658736854792, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.993800445762451e-06, |
|
"logits/chosen": 0.10206829011440277, |
|
"logits/rejected": 0.09731761366128922, |
|
"logps/chosen": -189.70846557617188, |
|
"logps/rejected": -176.63827514648438, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.0010909180855378509, |
|
"rewards/margins": 0.0010104707907885313, |
|
"rewards/rejected": 8.044719288591295e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.944388344834205e-06, |
|
"logits/chosen": 0.21991512179374695, |
|
"logits/rejected": 0.13409800827503204, |
|
"logps/chosen": -178.78292846679688, |
|
"logps/rejected": -151.7918243408203, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0007118875510059297, |
|
"rewards/margins": -0.000794673920609057, |
|
"rewards/rejected": 8.278638415504247e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8465431931347904e-06, |
|
"logits/chosen": 0.10882525146007538, |
|
"logits/rejected": 0.16875343024730682, |
|
"logps/chosen": -185.3433074951172, |
|
"logps/rejected": -174.74209594726562, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.0009307868895120919, |
|
"rewards/margins": 0.000729514576960355, |
|
"rewards/rejected": 0.00020127242896705866, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.702203692102539e-06, |
|
"logits/chosen": 0.1939707249403, |
|
"logits/rejected": 0.18511822819709778, |
|
"logps/chosen": -194.64193725585938, |
|
"logps/rejected": -159.30172729492188, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0016695528756827116, |
|
"rewards/margins": 8.004475239431486e-05, |
|
"rewards/rejected": 0.0015895080287009478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.514229781074239e-06, |
|
"logits/chosen": 0.1889864206314087, |
|
"logits/rejected": 0.1525781750679016, |
|
"logps/chosen": -196.69769287109375, |
|
"logps/rejected": -172.0983428955078, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0013059942284598947, |
|
"rewards/margins": -0.0003628497361205518, |
|
"rewards/rejected": 0.0016688440227881074, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.286345970517195e-06, |
|
"logits/chosen": 0.1526193767786026, |
|
"logits/rejected": 0.13564926385879517, |
|
"logps/chosen": -176.2852020263672, |
|
"logps/rejected": -149.50759887695312, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0020406683906912804, |
|
"rewards/margins": 0.0011222332250326872, |
|
"rewards/rejected": 0.0009184351074509323, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.023067544670082e-06, |
|
"logits/chosen": 0.19515976309776306, |
|
"logits/rejected": 0.16219770908355713, |
|
"logps/chosen": -183.90406799316406, |
|
"logps/rejected": -157.0895538330078, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.001846942352131009, |
|
"rewards/margins": -0.0017863952089101076, |
|
"rewards/rejected": -6.0547237808350474e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7296110958116845e-06, |
|
"logits/chosen": 0.16685011982917786, |
|
"logits/rejected": 0.05337408185005188, |
|
"logps/chosen": -177.84762573242188, |
|
"logps/rejected": -152.65464782714844, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0007909245905466378, |
|
"rewards/margins": 0.0006749060703441501, |
|
"rewards/rejected": -0.001465830602683127, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": 0.0015224728267639875, |
|
"eval_logits/rejected": 0.09820695966482162, |
|
"eval_logps/chosen": -306.2193603515625, |
|
"eval_logps/rejected": -278.5382080078125, |
|
"eval_loss": 0.001858623931184411, |
|
"eval_rewards/accuracies": 0.5149999856948853, |
|
"eval_rewards/chosen": 0.0015068423235788941, |
|
"eval_rewards/margins": 0.0008032902260310948, |
|
"eval_rewards/rejected": 0.0007035521557554603, |
|
"eval_runtime": 420.6562, |
|
"eval_samples_per_second": 4.754, |
|
"eval_steps_per_second": 1.189, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.4117911628292944e-06, |
|
"logits/chosen": 0.22267238795757294, |
|
"logits/rejected": 0.19200441241264343, |
|
"logps/chosen": -205.3648223876953, |
|
"logps/rejected": -172.17208862304688, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.002072205301374197, |
|
"rewards/margins": -0.0004376435244921595, |
|
"rewards/rejected": -0.0016345620388165116, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.075905022087675e-06, |
|
"logits/chosen": 0.1819857358932495, |
|
"logits/rejected": 0.23743709921836853, |
|
"logps/chosen": -184.61007690429688, |
|
"logps/rejected": -160.8583221435547, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.0009784279391169548, |
|
"rewards/margins": 0.0018532350659370422, |
|
"rewards/rejected": -0.000874807417858392, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.728607913349464e-06, |
|
"logits/chosen": 0.09743748605251312, |
|
"logits/rejected": 0.11662141233682632, |
|
"logps/chosen": -176.29867553710938, |
|
"logps/rejected": -140.44961547851562, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0014981284039095044, |
|
"rewards/margins": 0.0015437586698681116, |
|
"rewards/rejected": -4.563046604744159e-05, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.376781173017589e-06, |
|
"logits/chosen": 0.03450363129377365, |
|
"logits/rejected": 0.09166844189167023, |
|
"logps/chosen": -191.46543884277344, |
|
"logps/rejected": -154.39138793945312, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.001418759347870946, |
|
"rewards/margins": 0.0005404851399362087, |
|
"rewards/rejected": 0.0008782741497270763, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0273958875043877e-06, |
|
"logits/chosen": 0.15574321150779724, |
|
"logits/rejected": 0.15428531169891357, |
|
"logps/chosen": -179.232421875, |
|
"logps/rejected": -154.8232421875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0011932613560929894, |
|
"rewards/margins": 0.0013866318622604012, |
|
"rewards/rejected": -0.00019337031699251384, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.6873747682962393e-06, |
|
"logits/chosen": 0.2517469525337219, |
|
"logits/rejected": 0.1588711440563202, |
|
"logps/chosen": -193.44821166992188, |
|
"logps/rejected": -170.86611938476562, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00030686514219269156, |
|
"rewards/margins": 0.0002994650858454406, |
|
"rewards/rejected": -0.0006063304608687758, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.363454985517803e-06, |
|
"logits/chosen": 0.20987768471240997, |
|
"logits/rejected": 0.08343996852636337, |
|
"logps/chosen": -187.78610229492188, |
|
"logps/rejected": -167.81800842285156, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.002795418258756399, |
|
"rewards/margins": 0.0018930940423160791, |
|
"rewards/rejected": 0.0009023241582326591, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.062054677808238e-06, |
|
"logits/chosen": 0.23503074049949646, |
|
"logits/rejected": 0.2113850861787796, |
|
"logps/chosen": -186.79861450195312, |
|
"logps/rejected": -162.50196838378906, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.0017884777626022696, |
|
"rewards/margins": 0.0021424146834760904, |
|
"rewards/rejected": -0.0003539369790814817, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.891457834794711e-07, |
|
"logits/chosen": 0.138333261013031, |
|
"logits/rejected": 0.21794256567955017, |
|
"logps/chosen": -179.44418334960938, |
|
"logps/rejected": -158.67454528808594, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.0017921695252880454, |
|
"rewards/margins": 0.0020220079459249973, |
|
"rewards/rejected": -0.00022983844974078238, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.501357126768117e-07, |
|
"logits/chosen": 0.2148284912109375, |
|
"logits/rejected": 0.1433248072862625, |
|
"logps/chosen": -186.4739532470703, |
|
"logps/rejected": -167.48558044433594, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.002506103366613388, |
|
"rewards/margins": 0.0016806632047519088, |
|
"rewards/rejected": 0.0008254402200691402, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": -0.00685009965673089, |
|
"eval_logits/rejected": 0.09018866717815399, |
|
"eval_logps/chosen": -306.28704833984375, |
|
"eval_logps/rejected": -278.6018981933594, |
|
"eval_loss": 0.0019364985637366772, |
|
"eval_rewards/accuracies": 0.5195000171661377, |
|
"eval_rewards/chosen": 0.0008296637679450214, |
|
"eval_rewards/margins": 0.0007632386405020952, |
|
"eval_rewards/rejected": 6.642500375164673e-05, |
|
"eval_runtime": 420.9095, |
|
"eval_samples_per_second": 4.752, |
|
"eval_steps_per_second": 1.188, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.4976020508682345e-07, |
|
"logits/chosen": 0.10354860126972198, |
|
"logits/rejected": 0.1740628331899643, |
|
"logps/chosen": -187.9175262451172, |
|
"logps/rejected": -160.59580993652344, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0003708422009367496, |
|
"rewards/margins": 0.0009719420922920108, |
|
"rewards/rejected": -0.0006010999786667526, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9198949610721273e-07, |
|
"logits/chosen": 0.14361225068569183, |
|
"logits/rejected": 0.09037239849567413, |
|
"logps/chosen": -183.87716674804688, |
|
"logps/rejected": -157.98300170898438, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0009205196984112263, |
|
"rewards/margins": 0.0023204255849123, |
|
"rewards/rejected": -0.0013999061193317175, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.994965069994143e-08, |
|
"logits/chosen": 0.20537514984607697, |
|
"logits/rejected": 0.18879783153533936, |
|
"logps/chosen": -193.15943908691406, |
|
"logps/rejected": -162.2894744873047, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.00020981582929380238, |
|
"rewards/margins": 0.0006767899030819535, |
|
"rewards/rejected": -0.0008866057032719254, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5860623616664183e-08, |
|
"logits/chosen": 0.13774822652339935, |
|
"logits/rejected": 0.09208400547504425, |
|
"logps/chosen": -189.96868896484375, |
|
"logps/rejected": -159.98782348632812, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -6.72071473672986e-06, |
|
"rewards/margins": -0.0008294621366076171, |
|
"rewards/rejected": 0.0008227415382862091, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"step": 248, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0014411601897013643, |
|
"train_runtime": 2755.6406, |
|
"train_samples_per_second": 1.452, |
|
"train_steps_per_second": 0.09 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|