autotrain-gemma / checkpoint-396 /trainer_state.json
mjmanashti's picture
Upload folder using huggingface_hub
c1429da verified
raw
history blame contribute delete
No virus
8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 396,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 0.0009370408370159566,
"learning_rate": 1.95e-05,
"logits/chosen": -22.68578338623047,
"logits/rejected": -22.825130462646484,
"logps/chosen": -78.43010711669922,
"logps/rejected": -102.63253784179688,
"loss": 0.1807,
"rewards/accuracies": 0.9230769276618958,
"rewards/chosen": 2.8142247200012207,
"rewards/margins": 4.811609268188477,
"rewards/rejected": -1.997384786605835,
"step": 26
},
{
"epoch": 0.39,
"grad_norm": 2.192274041590281e-05,
"learning_rate": 2.8988764044943823e-05,
"logits/chosen": -23.15672492980957,
"logits/rejected": -23.271657943725586,
"logps/chosen": -43.96305465698242,
"logps/rejected": -155.8493194580078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.304109573364258,
"rewards/margins": 13.57724380493164,
"rewards/rejected": -7.273132801055908,
"step": 52
},
{
"epoch": 0.59,
"grad_norm": 1.914922904688865e-05,
"learning_rate": 2.6797752808988762e-05,
"logits/chosen": -23.230398178100586,
"logits/rejected": -23.34272575378418,
"logps/chosen": -42.47319030761719,
"logps/rejected": -166.07025146484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.426936149597168,
"rewards/margins": 14.743646621704102,
"rewards/rejected": -8.316710472106934,
"step": 78
},
{
"epoch": 0.79,
"grad_norm": 1.923706804518588e-05,
"learning_rate": 2.4606741573033708e-05,
"logits/chosen": -23.297359466552734,
"logits/rejected": -23.406126022338867,
"logps/chosen": -42.298927307128906,
"logps/rejected": -167.82479858398438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4410505294799805,
"rewards/margins": 14.930893898010254,
"rewards/rejected": -8.48984432220459,
"step": 104
},
{
"epoch": 0.98,
"grad_norm": 0.00013795163249596953,
"learning_rate": 2.2415730337078654e-05,
"logits/chosen": -23.36400032043457,
"logits/rejected": -23.47435760498047,
"logps/chosen": -42.36582565307617,
"logps/rejected": -167.81008911132812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.42643404006958,
"rewards/margins": 14.924281120300293,
"rewards/rejected": -8.497847557067871,
"step": 130
},
{
"epoch": 1.18,
"grad_norm": 1.9170562154613435e-05,
"learning_rate": 2.0224719101123596e-05,
"logits/chosen": -23.275028228759766,
"logits/rejected": -23.385255813598633,
"logps/chosen": -42.21509552001953,
"logps/rejected": -167.64584350585938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.436044692993164,
"rewards/margins": 14.92562198638916,
"rewards/rejected": -8.48957633972168,
"step": 156
},
{
"epoch": 1.38,
"grad_norm": 1.7988468243856914e-05,
"learning_rate": 1.803370786516854e-05,
"logits/chosen": -23.300710678100586,
"logits/rejected": -23.410503387451172,
"logps/chosen": -42.11836624145508,
"logps/rejected": -169.15122985839844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.481032371520996,
"rewards/margins": 15.097738265991211,
"rewards/rejected": -8.616707801818848,
"step": 182
},
{
"epoch": 1.58,
"grad_norm": 2.1618798200506717e-05,
"learning_rate": 1.5842696629213484e-05,
"logits/chosen": -23.332603454589844,
"logits/rejected": -23.443927764892578,
"logps/chosen": -42.54871368408203,
"logps/rejected": -167.39431762695312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.385127067565918,
"rewards/margins": 14.849527359008789,
"rewards/rejected": -8.464401245117188,
"step": 208
},
{
"epoch": 1.77,
"grad_norm": 1.6424854038632475e-05,
"learning_rate": 1.3651685393258428e-05,
"logits/chosen": -23.301807403564453,
"logits/rejected": -23.41258430480957,
"logps/chosen": -42.13775634765625,
"logps/rejected": -168.62362670898438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.500583171844482,
"rewards/margins": 15.051546096801758,
"rewards/rejected": -8.550962448120117,
"step": 234
},
{
"epoch": 1.97,
"grad_norm": 1.6196921933442354e-05,
"learning_rate": 1.146067415730337e-05,
"logits/chosen": -23.335044860839844,
"logits/rejected": -23.44767951965332,
"logps/chosen": -42.26608657836914,
"logps/rejected": -167.9331817626953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.42073917388916,
"rewards/margins": 14.927839279174805,
"rewards/rejected": -8.507099151611328,
"step": 260
},
{
"epoch": 2.17,
"grad_norm": 1.7393831512890756e-05,
"learning_rate": 9.269662921348314e-06,
"logits/chosen": -23.317138671875,
"logits/rejected": -23.424781799316406,
"logps/chosen": -41.94506072998047,
"logps/rejected": -168.30055236816406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4749298095703125,
"rewards/margins": 15.034900665283203,
"rewards/rejected": -8.55997085571289,
"step": 286
},
{
"epoch": 2.36,
"grad_norm": 1.7613503587199375e-05,
"learning_rate": 7.078651685393258e-06,
"logits/chosen": -23.292570114135742,
"logits/rejected": -23.40188217163086,
"logps/chosen": -42.209102630615234,
"logps/rejected": -169.81747436523438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.467267990112305,
"rewards/margins": 15.145313262939453,
"rewards/rejected": -8.678045272827148,
"step": 312
},
{
"epoch": 2.56,
"grad_norm": 0.0001227569446200505,
"learning_rate": 4.8876404494382024e-06,
"logits/chosen": -23.324228286743164,
"logits/rejected": -23.43165397644043,
"logps/chosen": -41.959617614746094,
"logps/rejected": -169.5789337158203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.512591361999512,
"rewards/margins": 15.166027069091797,
"rewards/rejected": -8.653436660766602,
"step": 338
},
{
"epoch": 2.76,
"grad_norm": 1.6931946447584778e-05,
"learning_rate": 2.696629213483146e-06,
"logits/chosen": -23.30262565612793,
"logits/rejected": -23.414592742919922,
"logps/chosen": -42.351627349853516,
"logps/rejected": -168.41354370117188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.420273780822754,
"rewards/margins": 14.962029457092285,
"rewards/rejected": -8.541756629943848,
"step": 364
},
{
"epoch": 2.95,
"grad_norm": 1.576123213453684e-05,
"learning_rate": 5.056179775280899e-07,
"logits/chosen": -23.35226058959961,
"logits/rejected": -23.466854095458984,
"logps/chosen": -42.21808624267578,
"logps/rejected": -168.39918518066406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.449341297149658,
"rewards/margins": 15.006133079528809,
"rewards/rejected": -8.556791305541992,
"step": 390
}
],
"logging_steps": 26,
"max_steps": 396,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}