|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 100, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"logits/chosen": -2.5782699584960938, |
|
"logits/rejected": -2.4518439769744873, |
|
"logps/chosen": -312.26776123046875, |
|
"logps/rejected": -214.7339324951172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.643641233444214, |
|
"logits/rejected": -2.5239078998565674, |
|
"logps/chosen": -266.10455322265625, |
|
"logps/rejected": -183.74024963378906, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5763888955116272, |
|
"rewards/chosen": 0.004169768653810024, |
|
"rewards/margins": 0.003206828376278281, |
|
"rewards/rejected": 0.0009629399282857776, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994932636402032e-06, |
|
"logits/chosen": -2.627925395965576, |
|
"logits/rejected": -2.516096830368042, |
|
"logps/chosen": -251.03671264648438, |
|
"logps/rejected": -191.53500366210938, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.07321886718273163, |
|
"rewards/margins": 0.03704284131526947, |
|
"rewards/rejected": 0.03617602214217186, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.905416503522124e-06, |
|
"logits/chosen": -2.591784715652466, |
|
"logits/rejected": -2.4847412109375, |
|
"logps/chosen": -269.05462646484375, |
|
"logps/rejected": -215.66268920898438, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": 0.00681548286229372, |
|
"rewards/margins": 0.12864618003368378, |
|
"rewards/rejected": -0.12183071672916412, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707922373336524e-06, |
|
"logits/chosen": -2.4711194038391113, |
|
"logits/rejected": -2.359039783477783, |
|
"logps/chosen": -262.1103820800781, |
|
"logps/rejected": -221.37106323242188, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05128273367881775, |
|
"rewards/margins": 0.1722000390291214, |
|
"rewards/rejected": -0.22348275780677795, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.411315662967732e-06, |
|
"logits/chosen": -2.4395766258239746, |
|
"logits/rejected": -2.291720390319824, |
|
"logps/chosen": -260.3160705566406, |
|
"logps/rejected": -219.9590301513672, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.013967704959213734, |
|
"rewards/margins": 0.27147507667541504, |
|
"rewards/rejected": -0.2575073838233948, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.028910905897229e-06, |
|
"logits/chosen": -2.4264254570007324, |
|
"logits/rejected": -2.2081563472747803, |
|
"logps/chosen": -284.1495666503906, |
|
"logps/rejected": -232.05477905273438, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.018397916108369827, |
|
"rewards/margins": 0.331583172082901, |
|
"rewards/rejected": -0.3499810993671417, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.577874068920446e-06, |
|
"logits/chosen": -2.405364751815796, |
|
"logits/rejected": -2.20412278175354, |
|
"logps/chosen": -281.3593444824219, |
|
"logps/rejected": -230.1668701171875, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.03580819442868233, |
|
"rewards/margins": 0.31651827692985535, |
|
"rewards/rejected": -0.3523264527320862, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0784519801008546e-06, |
|
"logits/chosen": -2.336822271347046, |
|
"logits/rejected": -2.152445077896118, |
|
"logps/chosen": -271.4667053222656, |
|
"logps/rejected": -227.0992889404297, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.05491885542869568, |
|
"rewards/margins": 0.3223936855792999, |
|
"rewards/rejected": -0.3773125112056732, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.553063458334059e-06, |
|
"logits/chosen": -2.3475964069366455, |
|
"logits/rejected": -2.181095600128174, |
|
"logps/chosen": -268.07257080078125, |
|
"logps/rejected": -234.201904296875, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.08924304693937302, |
|
"rewards/margins": 0.3508160710334778, |
|
"rewards/rejected": -0.440059095621109, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.025292943281429e-06, |
|
"logits/chosen": -2.2798125743865967, |
|
"logits/rejected": -2.2160840034484863, |
|
"logps/chosen": -270.85076904296875, |
|
"logps/rejected": -248.674072265625, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.071611687541008, |
|
"rewards/margins": 0.3644859194755554, |
|
"rewards/rejected": -0.4360976219177246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -2.2954230308532715, |
|
"eval_logits/rejected": -2.1948401927948, |
|
"eval_logps/chosen": -296.5362548828125, |
|
"eval_logps/rejected": -282.4073791503906, |
|
"eval_loss": 0.6396384239196777, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.033822380006313324, |
|
"eval_rewards/margins": 0.1209036335349083, |
|
"eval_rewards/rejected": -0.1547260284423828, |
|
"eval_runtime": 383.8924, |
|
"eval_samples_per_second": 5.21, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5188318011445907e-06, |
|
"logits/chosen": -2.293454170227051, |
|
"logits/rejected": -2.161669969558716, |
|
"logps/chosen": -272.22222900390625, |
|
"logps/rejected": -258.5532531738281, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.13500520586967468, |
|
"rewards/margins": 0.3878163695335388, |
|
"rewards/rejected": -0.5228215456008911, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0564148305586296e-06, |
|
"logits/chosen": -2.346884250640869, |
|
"logits/rejected": -2.1674342155456543, |
|
"logps/chosen": -285.71630859375, |
|
"logps/rejected": -253.7765655517578, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.0934138298034668, |
|
"rewards/margins": 0.37636226415634155, |
|
"rewards/rejected": -0.4697761535644531, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.587997083462197e-07, |
|
"logits/chosen": -2.2985992431640625, |
|
"logits/rejected": -2.160468339920044, |
|
"logps/chosen": -288.93536376953125, |
|
"logps/rejected": -263.5223693847656, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.1033419594168663, |
|
"rewards/margins": 0.399946391582489, |
|
"rewards/rejected": -0.5032883882522583, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.438351873250492e-07, |
|
"logits/chosen": -2.281365156173706, |
|
"logits/rejected": -2.1153156757354736, |
|
"logps/chosen": -278.9851989746094, |
|
"logps/rejected": -236.8888702392578, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09803396463394165, |
|
"rewards/margins": 0.41163355112075806, |
|
"rewards/rejected": -0.5096675157546997, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2565987432367032e-07, |
|
"logits/chosen": -2.314439058303833, |
|
"logits/rejected": -2.1131763458251953, |
|
"logps/chosen": -279.57867431640625, |
|
"logps/rejected": -245.7858123779297, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.0749698355793953, |
|
"rewards/margins": 0.4277007579803467, |
|
"rewards/rejected": -0.5026706457138062, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4067554877743861e-08, |
|
"logits/chosen": -2.238752603530884, |
|
"logits/rejected": -2.113945960998535, |
|
"logps/chosen": -260.85333251953125, |
|
"logps/rejected": -233.0952606201172, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06594842672348022, |
|
"rewards/margins": 0.4099964201450348, |
|
"rewards/rejected": -0.4759448170661926, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5745967612122045, |
|
"train_runtime": 7254.1779, |
|
"train_samples_per_second": 2.913, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|