|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0064, |
|
"grad_norm": 1341.8773394764246, |
|
"learning_rate": 3.125e-09, |
|
"logits/chosen": -3.9499800205230713, |
|
"logits/rejected": -4.237819194793701, |
|
"logps/chosen": -300.693115234375, |
|
"logps/rejected": -249.96307373046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 1342.2810836893796, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -4.129705905914307, |
|
"logits/rejected": -4.352028846740723, |
|
"logps/chosen": -351.5079650878906, |
|
"logps/rejected": -308.8138427734375, |
|
"loss": 0.7326, |
|
"rewards/accuracies": 0.3680555522441864, |
|
"rewards/chosen": -0.04078766331076622, |
|
"rewards/margins": -0.11378024518489838, |
|
"rewards/rejected": 0.07299260050058365, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 1252.3965895279962, |
|
"learning_rate": 4.9899357349880975e-08, |
|
"logits/chosen": -4.194980144500732, |
|
"logits/rejected": -4.382790565490723, |
|
"logps/chosen": -334.9039001464844, |
|
"logps/rejected": -293.8416748046875, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.22410114109516144, |
|
"rewards/margins": 0.11712154000997543, |
|
"rewards/rejected": 0.10697959363460541, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 904.3776918610464, |
|
"learning_rate": 4.877641290737884e-08, |
|
"logits/chosen": -4.230466365814209, |
|
"logits/rejected": -4.363996505737305, |
|
"logps/chosen": -327.71453857421875, |
|
"logps/rejected": -295.3287658691406, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.9708820581436157, |
|
"rewards/margins": 0.5084127187728882, |
|
"rewards/rejected": 0.46246927976608276, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 894.6327423356746, |
|
"learning_rate": 4.646121984004665e-08, |
|
"logits/chosen": -4.1493096351623535, |
|
"logits/rejected": -4.351648807525635, |
|
"logps/chosen": -330.09368896484375, |
|
"logps/rejected": -288.2974853515625, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": 1.9414455890655518, |
|
"rewards/margins": 1.1434320211410522, |
|
"rewards/rejected": 0.7980135083198547, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 706.4309708182283, |
|
"learning_rate": 4.3069871595684784e-08, |
|
"logits/chosen": -4.244365215301514, |
|
"logits/rejected": -4.423664093017578, |
|
"logps/chosen": -329.6412353515625, |
|
"logps/rejected": -291.22528076171875, |
|
"loss": 0.3694, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": 2.6057987213134766, |
|
"rewards/margins": 1.537340521812439, |
|
"rewards/rejected": 1.068458080291748, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 679.6447682422123, |
|
"learning_rate": 3.8772424536302564e-08, |
|
"logits/chosen": -4.262530326843262, |
|
"logits/rejected": -4.4340620040893555, |
|
"logps/chosen": -320.7197570800781, |
|
"logps/rejected": -291.15264892578125, |
|
"loss": 0.3459, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": 3.022132158279419, |
|
"rewards/margins": 1.8344866037368774, |
|
"rewards/rejected": 1.187645673751831, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 600.9568341116722, |
|
"learning_rate": 3.378437060203357e-08, |
|
"logits/chosen": -4.188047885894775, |
|
"logits/rejected": -4.377224445343018, |
|
"logps/chosen": -320.23345947265625, |
|
"logps/rejected": -288.5027770996094, |
|
"loss": 0.3189, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 3.3037331104278564, |
|
"rewards/margins": 2.1254096031188965, |
|
"rewards/rejected": 1.1783230304718018, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 654.7049863576665, |
|
"learning_rate": 2.8355831645441387e-08, |
|
"logits/chosen": -4.0522565841674805, |
|
"logits/rejected": -4.341280937194824, |
|
"logps/chosen": -345.8344421386719, |
|
"logps/rejected": -307.4328918457031, |
|
"loss": 0.3105, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 3.7246456146240234, |
|
"rewards/margins": 2.5337729454040527, |
|
"rewards/rejected": 1.1908724308013916, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 638.1282144295093, |
|
"learning_rate": 2.2759017277414164e-08, |
|
"logits/chosen": -4.180428504943848, |
|
"logits/rejected": -4.390549659729004, |
|
"logps/chosen": -332.82275390625, |
|
"logps/rejected": -295.1810607910156, |
|
"loss": 0.3099, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 3.2552542686462402, |
|
"rewards/margins": 2.3172354698181152, |
|
"rewards/rejected": 0.9380186796188354, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 680.3285346474286, |
|
"learning_rate": 1.7274575140626317e-08, |
|
"logits/chosen": -4.167009353637695, |
|
"logits/rejected": -4.386021614074707, |
|
"logps/chosen": -330.049560546875, |
|
"logps/rejected": -285.8011169433594, |
|
"loss": 0.3123, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": 3.6218514442443848, |
|
"rewards/margins": 2.723836898803711, |
|
"rewards/rejected": 0.8980148434638977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 616.2712616857408, |
|
"learning_rate": 1.217751806485235e-08, |
|
"logits/chosen": -4.145500183105469, |
|
"logits/rejected": -4.386542320251465, |
|
"logps/chosen": -311.7583923339844, |
|
"logps/rejected": -276.3233947753906, |
|
"loss": 0.3022, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": 3.584909439086914, |
|
"rewards/margins": 2.6118006706237793, |
|
"rewards/rejected": 0.9731090664863586, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 649.1888991009114, |
|
"learning_rate": 7.723433775328384e-09, |
|
"logits/chosen": -4.141805171966553, |
|
"logits/rejected": -4.35054874420166, |
|
"logps/chosen": -325.5559997558594, |
|
"logps/rejected": -280.5980529785156, |
|
"loss": 0.3033, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": 3.6838138103485107, |
|
"rewards/margins": 2.6417319774627686, |
|
"rewards/rejected": 1.0420820713043213, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 747.4298760038148, |
|
"learning_rate": 4.135668656967433e-09, |
|
"logits/chosen": -4.228358268737793, |
|
"logits/rejected": -4.38976526260376, |
|
"logps/chosen": -331.02642822265625, |
|
"logps/rejected": -286.7439880371094, |
|
"loss": 0.3064, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 3.7264277935028076, |
|
"rewards/margins": 2.6530587673187256, |
|
"rewards/rejected": 1.073369026184082, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 697.5841535989922, |
|
"learning_rate": 1.5941282340065698e-09, |
|
"logits/chosen": -4.18213415145874, |
|
"logits/rejected": -4.3970947265625, |
|
"logps/chosen": -332.56500244140625, |
|
"logps/rejected": -303.63543701171875, |
|
"loss": 0.3069, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 3.5617058277130127, |
|
"rewards/margins": 2.6050186157226562, |
|
"rewards/rejected": 0.9566874504089355, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 567.1610784183449, |
|
"learning_rate": 2.262559558016325e-10, |
|
"logits/chosen": -4.118973731994629, |
|
"logits/rejected": -4.348026752471924, |
|
"logps/chosen": -339.0107116699219, |
|
"logps/rejected": -295.09564208984375, |
|
"loss": 0.3078, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": 3.7477049827575684, |
|
"rewards/margins": 2.61022686958313, |
|
"rewards/rejected": 1.1374781131744385, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3884877807054764, |
|
"train_runtime": 4677.6403, |
|
"train_samples_per_second": 8.539, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|