|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -3.110421895980835, |
|
"logits/rejected": -3.134347915649414, |
|
"logps/chosen": -514.6908569335938, |
|
"logps/rejected": -579.9437255859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -3.101275682449341, |
|
"logits/rejected": -3.0809450149536133, |
|
"logps/chosen": -515.6033935546875, |
|
"logps/rejected": -643.0913696289062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": -0.003898666240274906, |
|
"rewards/margins": 0.00284082000143826, |
|
"rewards/rejected": -0.006739485543221235, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -3.13663387298584, |
|
"logits/rejected": -3.1290111541748047, |
|
"logps/chosen": -521.3823852539062, |
|
"logps/rejected": -703.0851440429688, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.14891520142555237, |
|
"rewards/margins": 0.05075854808092117, |
|
"rewards/rejected": -0.19967375695705414, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.032599687576294, |
|
"logits/rejected": -3.0084524154663086, |
|
"logps/chosen": -572.4000854492188, |
|
"logps/rejected": -851.7752075195312, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.057348370552063, |
|
"rewards/margins": 0.3227284550666809, |
|
"rewards/rejected": -1.3800770044326782, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -3.5056405067443848, |
|
"logits/rejected": -3.5222296714782715, |
|
"logps/chosen": -622.5767822265625, |
|
"logps/rejected": -858.5099487304688, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6478599309921265, |
|
"rewards/margins": 0.43607082962989807, |
|
"rewards/rejected": -2.083930730819702, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-07, |
|
"logits/chosen": -3.6517977714538574, |
|
"logits/rejected": -3.6317756175994873, |
|
"logps/chosen": -670.474609375, |
|
"logps/rejected": -942.6395263671875, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0249228477478027, |
|
"rewards/margins": 0.6633843779563904, |
|
"rewards/rejected": -2.688307762145996, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -3.650251865386963, |
|
"logits/rejected": -3.607196807861328, |
|
"logps/chosen": -704.299072265625, |
|
"logps/rejected": -1045.16259765625, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.3376059532165527, |
|
"rewards/margins": 0.9999262690544128, |
|
"rewards/rejected": -3.3375325202941895, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.776193866647039e-07, |
|
"logits/chosen": -3.519620895385742, |
|
"logits/rejected": -3.5314018726348877, |
|
"logps/chosen": -829.9962768554688, |
|
"logps/rejected": -1192.3013916015625, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.2941513061523438, |
|
"rewards/margins": 1.215731143951416, |
|
"rewards/rejected": -4.50988245010376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -3.396796464920044, |
|
"logits/rejected": -3.372447967529297, |
|
"logps/chosen": -777.1376342773438, |
|
"logps/rejected": -1117.571533203125, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.0304834842681885, |
|
"rewards/margins": 1.2057898044586182, |
|
"rewards/rejected": -4.236273288726807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.489061372204452e-07, |
|
"logits/chosen": -3.3173828125, |
|
"logits/rejected": -3.3279190063476562, |
|
"logps/chosen": -800.7711181640625, |
|
"logps/rejected": -1121.0828857421875, |
|
"loss": 0.37, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.910794496536255, |
|
"rewards/margins": 1.0732452869415283, |
|
"rewards/rejected": -3.9840400218963623, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -3.256371259689331, |
|
"logits/rejected": -3.238799571990967, |
|
"logps/chosen": -774.020263671875, |
|
"logps/rejected": -1094.2913818359375, |
|
"loss": 0.3678, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.9788336753845215, |
|
"rewards/margins": 1.1674504280090332, |
|
"rewards/rejected": -4.146284103393555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -3.2330105304718018, |
|
"eval_logits/rejected": -3.2211368083953857, |
|
"eval_logps/chosen": -894.1544799804688, |
|
"eval_logps/rejected": -930.3583984375, |
|
"eval_loss": 0.8166332244873047, |
|
"eval_rewards/accuracies": 0.5703125, |
|
"eval_rewards/chosen": -3.5115277767181396, |
|
"eval_rewards/margins": 0.11863362044095993, |
|
"eval_rewards/rejected": -3.6301612854003906, |
|
"eval_runtime": 133.793, |
|
"eval_samples_per_second": 7.474, |
|
"eval_steps_per_second": 0.239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.10218903496256e-07, |
|
"logits/chosen": -3.049738645553589, |
|
"logits/rejected": -3.0541279315948486, |
|
"logps/chosen": -837.93798828125, |
|
"logps/rejected": -1120.489501953125, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.197683572769165, |
|
"rewards/margins": 0.9859063029289246, |
|
"rewards/rejected": -4.183589935302734, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.862804889678955, |
|
"logits/rejected": -2.847139835357666, |
|
"logps/chosen": -793.1903076171875, |
|
"logps/rejected": -1099.2779541015625, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.867206573486328, |
|
"rewards/margins": 1.1447758674621582, |
|
"rewards/rejected": -4.011982440948486, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-07, |
|
"logits/chosen": -2.801413059234619, |
|
"logits/rejected": -2.810880184173584, |
|
"logps/chosen": -794.3284912109375, |
|
"logps/rejected": -1098.575927734375, |
|
"loss": 0.3865, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.982292652130127, |
|
"rewards/margins": 1.1373599767684937, |
|
"rewards/rejected": -4.11965274810791, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -2.725770950317383, |
|
"logits/rejected": -2.6947712898254395, |
|
"logps/chosen": -855.1282958984375, |
|
"logps/rejected": -1178.1373291015625, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1478772163391113, |
|
"rewards/margins": 1.088375449180603, |
|
"rewards/rejected": -4.236252784729004, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.110851015344735e-07, |
|
"logits/chosen": -2.901252269744873, |
|
"logits/rejected": -2.8886735439300537, |
|
"logps/chosen": -843.8787841796875, |
|
"logps/rejected": -1142.3704833984375, |
|
"loss": 0.3919, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.437390089035034, |
|
"rewards/margins": 1.1703314781188965, |
|
"rewards/rejected": -4.607722282409668, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.8201375007629395, |
|
"logits/rejected": -2.809413433074951, |
|
"logps/chosen": -826.0286865234375, |
|
"logps/rejected": -1086.612060546875, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1525731086730957, |
|
"rewards/margins": 0.9918072819709778, |
|
"rewards/rejected": -4.144380569458008, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5560951607395126e-07, |
|
"logits/chosen": -2.9544239044189453, |
|
"logits/rejected": -2.957245349884033, |
|
"logps/chosen": -777.9389038085938, |
|
"logps/rejected": -1005.6591796875, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.6697211265563965, |
|
"rewards/margins": 0.7971886396408081, |
|
"rewards/rejected": -3.466909885406494, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -2.8567004203796387, |
|
"logits/rejected": -2.8372042179107666, |
|
"logps/chosen": -779.9255981445312, |
|
"logps/rejected": -981.6893310546875, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.700547456741333, |
|
"rewards/margins": 0.7202944755554199, |
|
"rewards/rejected": -3.420841932296753, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.998526460541818e-07, |
|
"logits/chosen": -2.821296215057373, |
|
"logits/rejected": -2.7948215007781982, |
|
"logps/chosen": -824.6710205078125, |
|
"logps/rejected": -1023.0182495117188, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.867779493331909, |
|
"rewards/margins": 0.6687132120132446, |
|
"rewards/rejected": -3.5364928245544434, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.842245101928711, |
|
"logits/rejected": -2.822925329208374, |
|
"logps/chosen": -775.6517333984375, |
|
"logps/rejected": -931.916015625, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.6696395874023438, |
|
"rewards/margins": 0.5579996705055237, |
|
"rewards/rejected": -3.2276394367218018, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.8897347450256348, |
|
"eval_logits/rejected": -2.870739221572876, |
|
"eval_logps/chosen": -840.2244262695312, |
|
"eval_logps/rejected": -875.9758911132812, |
|
"eval_loss": 0.7557607293128967, |
|
"eval_rewards/accuracies": 0.578125, |
|
"eval_rewards/chosen": -2.9722273349761963, |
|
"eval_rewards/margins": 0.11410895735025406, |
|
"eval_rewards/rejected": -3.086336135864258, |
|
"eval_runtime": 133.8945, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 0.239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4661037375836987e-07, |
|
"logits/chosen": -2.8397040367126465, |
|
"logits/rejected": -2.827742338180542, |
|
"logps/chosen": -825.7728271484375, |
|
"logps/rejected": -1029.4368896484375, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8868887424468994, |
|
"rewards/margins": 0.7782662510871887, |
|
"rewards/rejected": -3.6651549339294434, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -2.826509475708008, |
|
"logits/rejected": -2.8322367668151855, |
|
"logps/chosen": -816.0640869140625, |
|
"logps/rejected": -998.5843505859375, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.879359006881714, |
|
"rewards/margins": 0.658044695854187, |
|
"rewards/rejected": -3.5374042987823486, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-08, |
|
"logits/chosen": -2.9330477714538574, |
|
"logits/rejected": -2.914445161819458, |
|
"logps/chosen": -791.6444702148438, |
|
"logps/rejected": -906.9136962890625, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.653576612472534, |
|
"rewards/margins": 0.42590421438217163, |
|
"rewards/rejected": -3.0794806480407715, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -2.9414100646972656, |
|
"logits/rejected": -2.9438042640686035, |
|
"logps/chosen": -771.1376953125, |
|
"logps/rejected": -921.2554931640625, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.4653267860412598, |
|
"rewards/margins": 0.5729144811630249, |
|
"rewards/rejected": -3.038240909576416, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049722e-08, |
|
"logits/chosen": -2.931709051132202, |
|
"logits/rejected": -2.920923948287964, |
|
"logps/chosen": -756.0911865234375, |
|
"logps/rejected": -916.9544677734375, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.339541435241699, |
|
"rewards/margins": 0.598496675491333, |
|
"rewards/rejected": -2.9380381107330322, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -2.885463237762451, |
|
"logits/rejected": -2.881591558456421, |
|
"logps/chosen": -859.6702270507812, |
|
"logps/rejected": -984.1055908203125, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.5945677757263184, |
|
"rewards/margins": 0.44614577293395996, |
|
"rewards/rejected": -3.0407137870788574, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.724836895290805e-08, |
|
"logits/chosen": -2.936840772628784, |
|
"logits/rejected": -2.935763359069824, |
|
"logps/chosen": -759.699951171875, |
|
"logps/rejected": -853.43603515625, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.392477035522461, |
|
"rewards/margins": 0.3930489718914032, |
|
"rewards/rejected": -2.7855257987976074, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -2.9058921337127686, |
|
"logits/rejected": -2.907921314239502, |
|
"logps/chosen": -818.0262451171875, |
|
"logps/rejected": -934.5661010742188, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4692749977111816, |
|
"rewards/margins": 0.43090057373046875, |
|
"rewards/rejected": -2.9001753330230713, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-09, |
|
"logits/chosen": -2.9472568035125732, |
|
"logits/rejected": -2.9564738273620605, |
|
"logps/chosen": -775.2022705078125, |
|
"logps/rejected": -874.6551513671875, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.3951363563537598, |
|
"rewards/margins": 0.4553650915622711, |
|
"rewards/rejected": -2.850501537322998, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -2.9190421104431152, |
|
"logits/rejected": -2.8831028938293457, |
|
"logps/chosen": -839.2587890625, |
|
"logps/rejected": -975.5764770507812, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.542646884918213, |
|
"rewards/margins": 0.5185142755508423, |
|
"rewards/rejected": -3.0611610412597656, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -3.023275852203369, |
|
"eval_logits/rejected": -3.008624315261841, |
|
"eval_logps/chosen": -781.6083374023438, |
|
"eval_logps/rejected": -817.5390014648438, |
|
"eval_loss": 0.7196429967880249, |
|
"eval_rewards/accuracies": 0.5703125, |
|
"eval_rewards/chosen": -2.386066436767578, |
|
"eval_rewards/margins": 0.11590027809143066, |
|
"eval_rewards/rejected": -2.501966714859009, |
|
"eval_runtime": 133.3992, |
|
"eval_samples_per_second": 7.496, |
|
"eval_steps_per_second": 0.24, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336733e-11, |
|
"logits/chosen": -2.9566540718078613, |
|
"logits/rejected": -2.9173121452331543, |
|
"logps/chosen": -796.7486572265625, |
|
"logps/rejected": -895.9386596679688, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.3850722312927246, |
|
"rewards/margins": 0.37639713287353516, |
|
"rewards/rejected": -2.7614693641662598, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5059080181213526, |
|
"train_runtime": 5445.7158, |
|
"train_samples_per_second": 3.672, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|