|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.8967804908752441, |
|
"logits/rejected": -1.9837573766708374, |
|
"logps/chosen": -526.0550537109375, |
|
"logps/rejected": -649.3060302734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.896860122680664, |
|
"logits/rejected": -1.8309091329574585, |
|
"logps/chosen": -482.1170349121094, |
|
"logps/rejected": -534.401123046875, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": -0.026020465418696404, |
|
"rewards/margins": 0.004157062619924545, |
|
"rewards/rejected": -0.030177529901266098, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -1.636963129043579, |
|
"logits/rejected": -1.5652328729629517, |
|
"logps/chosen": -518.24560546875, |
|
"logps/rejected": -692.5880126953125, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7620652914047241, |
|
"rewards/margins": 0.2627033591270447, |
|
"rewards/rejected": -1.024768590927124, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -0.6123544573783875, |
|
"logits/rejected": -0.5012580752372742, |
|
"logps/chosen": -642.9691162109375, |
|
"logps/rejected": -757.7621459960938, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.682318091392517, |
|
"rewards/margins": 0.44499659538269043, |
|
"rewards/rejected": -2.127314567565918, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -0.06902433931827545, |
|
"logits/rejected": 0.01894857920706272, |
|
"logps/chosen": -600.1622314453125, |
|
"logps/rejected": -754.4539794921875, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2441052198410034, |
|
"rewards/margins": 0.34002283215522766, |
|
"rewards/rejected": -1.5841280221939087, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -0.14471003413200378, |
|
"logits/rejected": 0.360783189535141, |
|
"logps/chosen": -613.9149780273438, |
|
"logps/rejected": -760.083740234375, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1845529079437256, |
|
"rewards/margins": 0.4947661757469177, |
|
"rewards/rejected": -1.6793190240859985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": 0.5329059362411499, |
|
"logits/rejected": 0.6899431943893433, |
|
"logps/chosen": -675.7152099609375, |
|
"logps/rejected": -822.51025390625, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8414958715438843, |
|
"rewards/margins": 0.6188977956771851, |
|
"rewards/rejected": -2.4603934288024902, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": 0.21469660103321075, |
|
"logits/rejected": 0.6365100145339966, |
|
"logps/chosen": -613.69921875, |
|
"logps/rejected": -791.498046875, |
|
"loss": 0.5683, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4487526416778564, |
|
"rewards/margins": 0.7974711656570435, |
|
"rewards/rejected": -2.2462239265441895, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": 0.050884656608104706, |
|
"logits/rejected": 0.49703994393348694, |
|
"logps/chosen": -631.5872192382812, |
|
"logps/rejected": -790.6751708984375, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3860604763031006, |
|
"rewards/margins": 0.5967694520950317, |
|
"rewards/rejected": -1.9828300476074219, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 0.4010600447654724, |
|
"logits/rejected": 0.7713971138000488, |
|
"logps/chosen": -658.26513671875, |
|
"logps/rejected": -814.2492065429688, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5723729133605957, |
|
"rewards/margins": 0.7238161563873291, |
|
"rewards/rejected": -2.296189308166504, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": 0.13108967244625092, |
|
"logits/rejected": 0.49605846405029297, |
|
"logps/chosen": -634.7674560546875, |
|
"logps/rejected": -771.0775146484375, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3438690900802612, |
|
"rewards/margins": 0.6646407246589661, |
|
"rewards/rejected": -2.008509874343872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": 0.21024885773658752, |
|
"logits/rejected": 0.666106641292572, |
|
"logps/chosen": -694.7786254882812, |
|
"logps/rejected": -792.2867431640625, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.5787041187286377, |
|
"rewards/margins": 0.609796404838562, |
|
"rewards/rejected": -2.18850040435791, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 0.2621000409126282, |
|
"logits/rejected": 0.6290411949157715, |
|
"logps/chosen": -641.9815673828125, |
|
"logps/rejected": -823.8590087890625, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6526798009872437, |
|
"rewards/margins": 0.5855975151062012, |
|
"rewards/rejected": -2.2382771968841553, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": 0.3799557089805603, |
|
"logits/rejected": 0.6229809522628784, |
|
"logps/chosen": -664.0032348632812, |
|
"logps/rejected": -802.6885986328125, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6382343769073486, |
|
"rewards/margins": 0.5937812924385071, |
|
"rewards/rejected": -2.232015609741211, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 0.492957204580307, |
|
"logits/rejected": 0.9118458032608032, |
|
"logps/chosen": -631.5418090820312, |
|
"logps/rejected": -813.458251953125, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.540797472000122, |
|
"rewards/margins": 0.7339878082275391, |
|
"rewards/rejected": -2.274785280227661, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.018977392464876175, |
|
"logits/rejected": 0.5174835920333862, |
|
"logps/chosen": -643.7967529296875, |
|
"logps/rejected": -808.516357421875, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.3859628438949585, |
|
"rewards/margins": 0.8056131601333618, |
|
"rewards/rejected": -2.1915760040283203, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5745523470752644, |
|
"train_runtime": 2655.032, |
|
"train_samples_per_second": 7.675, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|