|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 426, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.176056338028169, |
|
"grad_norm": 1.0253130197525024, |
|
"learning_rate": 0.00019958367684748586, |
|
"loss": 1.8153, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 0.5390976071357727, |
|
"learning_rate": 0.00019606536598722435, |
|
"loss": 1.384, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.528169014084507, |
|
"grad_norm": 0.6293356418609619, |
|
"learning_rate": 0.0001890833789866129, |
|
"loss": 1.222, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 0.7000795602798462, |
|
"learning_rate": 0.00017888945424832895, |
|
"loss": 1.1705, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8802816901408451, |
|
"grad_norm": 0.5979147553443909, |
|
"learning_rate": 0.00016585113790650388, |
|
"loss": 1.1829, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 0.6060128211975098, |
|
"learning_rate": 0.00015043853180022836, |
|
"loss": 1.0767, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.232394366197183, |
|
"grad_norm": 0.7619791030883789, |
|
"learning_rate": 0.00013320734375908607, |
|
"loss": 1.0177, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 0.8311352133750916, |
|
"learning_rate": 0.00011477885132961679, |
|
"loss": 1.0361, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.584507042253521, |
|
"grad_norm": 0.7331663370132446, |
|
"learning_rate": 9.581750135876277e-05, |
|
"loss": 0.9322, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"grad_norm": 0.5762674808502197, |
|
"learning_rate": 7.700695309049767e-05, |
|
"loss": 0.792, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.936619718309859, |
|
"grad_norm": 0.7466037273406982, |
|
"learning_rate": 5.902542855160642e-05, |
|
"loss": 0.837, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 0.7813904285430908, |
|
"learning_rate": 4.252125897855932e-05, |
|
"loss": 0.8631, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.288732394366197, |
|
"grad_norm": 0.6281647086143494, |
|
"learning_rate": 2.8089508969081e-05, |
|
"loss": 0.7121, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"grad_norm": 1.0807723999023438, |
|
"learning_rate": 1.625052118420889e-05, |
|
"loss": 0.7519, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.640845070422535, |
|
"grad_norm": 0.8650486469268799, |
|
"learning_rate": 7.431155180401705e-06, |
|
"loss": 0.8256, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 0.860803484916687, |
|
"learning_rate": 1.9493968132951458e-06, |
|
"loss": 0.7998, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.992957746478873, |
|
"grad_norm": 0.8234962224960327, |
|
"learning_rate": 2.893127359282488e-09, |
|
"loss": 0.683, |
|
"step": 425 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 426, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7981829121835008.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|