|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9974424552429667, |
|
"eval_steps": 500, |
|
"global_step": 195, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1310.024749740419, |
|
"learning_rate": 2.5e-08, |
|
"logits/chosen": -5.0504608154296875, |
|
"logits/rejected": -5.35328483581543, |
|
"logps/chosen": -242.7239990234375, |
|
"logps/rejected": -185.90835571289062, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1343.8700325036616, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -4.959235191345215, |
|
"logits/rejected": -5.051504135131836, |
|
"logps/chosen": -226.43630981445312, |
|
"logps/rejected": -216.47547912597656, |
|
"loss": 0.7205, |
|
"rewards/accuracies": 0.4479166567325592, |
|
"rewards/chosen": 0.07974544167518616, |
|
"rewards/margins": 0.013408761471509933, |
|
"rewards/rejected": 0.06633666902780533, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1443.7667771719773, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -4.906929969787598, |
|
"logits/rejected": -5.0118937492370605, |
|
"logps/chosen": -240.65188598632812, |
|
"logps/rejected": -220.84378051757812, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7429171204566956, |
|
"rewards/margins": 1.1278517246246338, |
|
"rewards/rejected": -0.38493460416793823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1641.6770420153719, |
|
"learning_rate": 4.959823971496574e-07, |
|
"logits/chosen": -4.913812637329102, |
|
"logits/rejected": -5.012935638427734, |
|
"logps/chosen": -238.8269805908203, |
|
"logps/rejected": -228.05404663085938, |
|
"loss": 0.8116, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 1.8061437606811523, |
|
"rewards/margins": 4.523256301879883, |
|
"rewards/rejected": -2.7171127796173096, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1382.4291689510926, |
|
"learning_rate": 4.840587176599343e-07, |
|
"logits/chosen": -4.964416980743408, |
|
"logits/rejected": -5.0027852058410645, |
|
"logps/chosen": -249.1742706298828, |
|
"logps/rejected": -235.87576293945312, |
|
"loss": 0.9983, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": 1.3685696125030518, |
|
"rewards/margins": 4.053561210632324, |
|
"rewards/rejected": -2.6849913597106934, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1428.1508779981239, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -4.990395545959473, |
|
"logits/rejected": -5.134562015533447, |
|
"logps/chosen": -251.7528076171875, |
|
"logps/rejected": -226.17306518554688, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": 2.2698659896850586, |
|
"rewards/margins": 5.616934299468994, |
|
"rewards/rejected": -3.3470687866210938, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1429.7364912941882, |
|
"learning_rate": 4.3826786650090273e-07, |
|
"logits/chosen": -5.023388385772705, |
|
"logits/rejected": -5.144254684448242, |
|
"logps/chosen": -250.6563720703125, |
|
"logps/rejected": -241.12484741210938, |
|
"loss": 0.993, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 1.217611312866211, |
|
"rewards/margins": 6.1895647048950195, |
|
"rewards/rejected": -4.97195291519165, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1385.9054301583744, |
|
"learning_rate": 4.058724504646834e-07, |
|
"logits/chosen": -4.992190361022949, |
|
"logits/rejected": -5.075345039367676, |
|
"logps/chosen": -256.97406005859375, |
|
"logps/rejected": -242.94003295898438, |
|
"loss": 1.1539, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 2.1734097003936768, |
|
"rewards/margins": 5.453003883361816, |
|
"rewards/rejected": -3.2795944213867188, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1267.3737422156325, |
|
"learning_rate": 3.6846716561824967e-07, |
|
"logits/chosen": -5.066686630249023, |
|
"logits/rejected": -5.165375709533691, |
|
"logps/chosen": -246.781982421875, |
|
"logps/rejected": -232.3020477294922, |
|
"loss": 1.1127, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 2.182149887084961, |
|
"rewards/margins": 6.110042095184326, |
|
"rewards/rejected": -3.927891492843628, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1414.9882610729042, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -5.056512355804443, |
|
"logits/rejected": -5.19997501373291, |
|
"logps/chosen": -236.23886108398438, |
|
"logps/rejected": -219.4969940185547, |
|
"loss": 1.1651, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 2.3071811199188232, |
|
"rewards/margins": 4.593169212341309, |
|
"rewards/rejected": -2.2859878540039062, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1730.7459110414102, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -5.051321506500244, |
|
"logits/rejected": -5.197503089904785, |
|
"logps/chosen": -245.94680786132812, |
|
"logps/rejected": -224.7979278564453, |
|
"loss": 1.1049, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 2.0447471141815186, |
|
"rewards/margins": 3.989384412765503, |
|
"rewards/rejected": -1.9446370601654053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1376.721155787266, |
|
"learning_rate": 2.3878379241237134e-07, |
|
"logits/chosen": -5.05279541015625, |
|
"logits/rejected": -5.2380499839782715, |
|
"logps/chosen": -231.46408081054688, |
|
"logps/rejected": -221.2686309814453, |
|
"loss": 1.0653, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.9433412551879883, |
|
"rewards/margins": 7.433489799499512, |
|
"rewards/rejected": -4.490148544311523, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1298.5481767381427, |
|
"learning_rate": 1.9436976651092142e-07, |
|
"logits/chosen": -4.989577293395996, |
|
"logits/rejected": -5.143449306488037, |
|
"logps/chosen": -250.3534698486328, |
|
"logps/rejected": -237.04074096679688, |
|
"loss": 1.0694, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 2.3243861198425293, |
|
"rewards/margins": 8.470600128173828, |
|
"rewards/rejected": -6.146214485168457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1456.9702892975145, |
|
"learning_rate": 1.517437420865191e-07, |
|
"logits/chosen": -5.036610126495361, |
|
"logits/rejected": -5.181552886962891, |
|
"logps/chosen": -234.2519073486328, |
|
"logps/rejected": -226.05050659179688, |
|
"loss": 1.1374, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 2.612969160079956, |
|
"rewards/margins": 6.129396915435791, |
|
"rewards/rejected": -3.516427516937256, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1414.11944634508, |
|
"learning_rate": 1.1227575463697439e-07, |
|
"logits/chosen": -5.011117458343506, |
|
"logits/rejected": -5.0677995681762695, |
|
"logps/chosen": -246.2405242919922, |
|
"logps/rejected": -240.97647094726562, |
|
"loss": 1.0012, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 2.1312901973724365, |
|
"rewards/margins": 6.49268102645874, |
|
"rewards/rejected": -4.361390590667725, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1391.6252979817953, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -5.031737327575684, |
|
"logits/rejected": -5.141982078552246, |
|
"logps/chosen": -247.31640625, |
|
"logps/rejected": -245.01284790039062, |
|
"loss": 1.0468, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 3.413778781890869, |
|
"rewards/margins": 8.60617446899414, |
|
"rewards/rejected": -5.19239616394043, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1305.4800329449993, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -4.959289073944092, |
|
"logits/rejected": -5.040767192840576, |
|
"logps/chosen": -253.7027587890625, |
|
"logps/rejected": -250.91659545898438, |
|
"loss": 0.9992, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": 3.046278476715088, |
|
"rewards/margins": 8.344175338745117, |
|
"rewards/rejected": -5.297896862030029, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1228.1104796269808, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": -5.096159934997559, |
|
"logits/rejected": -5.178959369659424, |
|
"logps/chosen": -251.2628631591797, |
|
"logps/rejected": -233.06857299804688, |
|
"loss": 1.0057, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 2.824694871902466, |
|
"rewards/margins": 6.200740814208984, |
|
"rewards/rejected": -3.3760459423065186, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1348.827014256151, |
|
"learning_rate": 9.009284826036689e-09, |
|
"logits/chosen": -4.995651721954346, |
|
"logits/rejected": -5.102165222167969, |
|
"logps/chosen": -237.61990356445312, |
|
"logps/rejected": -232.7886962890625, |
|
"loss": 0.9321, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 2.423119068145752, |
|
"rewards/margins": 4.8792009353637695, |
|
"rewards/rejected": -2.4560813903808594, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1117.1672982866971, |
|
"learning_rate": 1.0064265011902328e-09, |
|
"logits/chosen": -5.071808815002441, |
|
"logits/rejected": -5.110179901123047, |
|
"logps/chosen": -236.14224243164062, |
|
"logps/rejected": -233.5693359375, |
|
"loss": 0.9891, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 1.8652112483978271, |
|
"rewards/margins": 5.820201873779297, |
|
"rewards/rejected": -3.9549899101257324, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 195, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9975380127246564, |
|
"train_runtime": 5482.1546, |
|
"train_samples_per_second": 9.12, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|