|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.89247311827957, |
|
"eval_steps": 500, |
|
"global_step": 460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019976687691905393, |
|
"loss": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019906859460363307, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019790840876823232, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019629172873477995, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019422609221188207, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019172113015054532, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5053763440860215, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001887885218402375, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00018544194045464886, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001816969893010442, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000177571129070442, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.3655913978494625, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00017308359642781242, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00016825531432186543, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.795698924731183, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00016310879443260528, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.010752688172043, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00015766803221148673, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.225806451612903, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00015195839500354335, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.4408602150537635, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00014600650377311522, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.6559139784946235, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00013984010898462416, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.870967741935484, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00013348796121709862, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.086021505376344, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00012697967711570242, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.301075268817204, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001203456013052634, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.516129032258064, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00011361666490962468, |
|
"loss": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.731182795698925, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001068242413364671, |
|
"loss": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.946236559139785, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.161290322580645, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.317575866353292e-05, |
|
"loss": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.376344086021505, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.638333509037536e-05, |
|
"loss": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.591397849462366, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.965439869473664e-05, |
|
"loss": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.806451612903226, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.302032288429756e-05, |
|
"loss": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.021505376344086, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.651203878290139e-05, |
|
"loss": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.236559139784946, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.015989101537586e-05, |
|
"loss": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.451612903225806, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.399349622688479e-05, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.804160499645667e-05, |
|
"loss": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.881720430107527, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.2331967788513295e-05, |
|
"loss": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.096774193548387, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.689120556739475e-05, |
|
"loss": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.311827956989247, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.174468567813461e-05, |
|
"loss": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.526881720430108, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.691640357218759e-05, |
|
"loss": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.741935483870968, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.242887092955801e-05, |
|
"loss": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.956989247311828, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.8303010698955804e-05, |
|
"loss": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.172043010752688, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4558059545351143e-05, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.387096774193548, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.1211478159762478e-05, |
|
"loss": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.602150537634408, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.278869849454718e-06, |
|
"loss": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.817204301075268, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.77390778811796e-06, |
|
"loss": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 9.03225806451613, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.7082712652200867e-06, |
|
"loss": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.24731182795699, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.091591231767709e-06, |
|
"loss": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.46236559139785, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.314053963669245e-07, |
|
"loss": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.67741935483871, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3312308094607382e-07, |
|
"loss": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.89247311827957, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.89247311827957, |
|
"step": 460, |
|
"total_flos": 2.262683294367744e+16, |
|
"train_loss": 0.0, |
|
"train_runtime": 377.8672, |
|
"train_samples_per_second": 4.922, |
|
"train_steps_per_second": 1.217 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.262683294367744e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|