fedovtt's picture
Training in progress, step 15, checkpoint
51297a8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.010256410256410256,
"eval_steps": 2,
"global_step": 15,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006837606837606838,
"grad_norm": 0.01459360308945179,
"learning_rate": 2e-05,
"loss": 10.3793,
"step": 1
},
{
"epoch": 0.0006837606837606838,
"eval_loss": 10.37917423248291,
"eval_runtime": 3.5837,
"eval_samples_per_second": 171.888,
"eval_steps_per_second": 85.944,
"step": 1
},
{
"epoch": 0.0013675213675213675,
"grad_norm": 0.013831038028001785,
"learning_rate": 4e-05,
"loss": 10.3799,
"step": 2
},
{
"epoch": 0.0013675213675213675,
"eval_loss": 10.379171371459961,
"eval_runtime": 3.5514,
"eval_samples_per_second": 173.452,
"eval_steps_per_second": 86.726,
"step": 2
},
{
"epoch": 0.0020512820512820513,
"grad_norm": 0.014491274021565914,
"learning_rate": 6e-05,
"loss": 10.3789,
"step": 3
},
{
"epoch": 0.002735042735042735,
"grad_norm": 0.01442716270685196,
"learning_rate": 8e-05,
"loss": 10.3815,
"step": 4
},
{
"epoch": 0.002735042735042735,
"eval_loss": 10.379145622253418,
"eval_runtime": 3.5666,
"eval_samples_per_second": 172.711,
"eval_steps_per_second": 86.356,
"step": 4
},
{
"epoch": 0.003418803418803419,
"grad_norm": 0.01321357674896717,
"learning_rate": 0.0001,
"loss": 10.3781,
"step": 5
},
{
"epoch": 0.0041025641025641026,
"grad_norm": 0.012160197831690311,
"learning_rate": 9.755282581475769e-05,
"loss": 10.3803,
"step": 6
},
{
"epoch": 0.0041025641025641026,
"eval_loss": 10.379097938537598,
"eval_runtime": 3.6086,
"eval_samples_per_second": 170.703,
"eval_steps_per_second": 85.352,
"step": 6
},
{
"epoch": 0.004786324786324786,
"grad_norm": 0.01298796571791172,
"learning_rate": 9.045084971874738e-05,
"loss": 10.3767,
"step": 7
},
{
"epoch": 0.00547008547008547,
"grad_norm": 0.012850708328187466,
"learning_rate": 7.938926261462366e-05,
"loss": 10.3795,
"step": 8
},
{
"epoch": 0.00547008547008547,
"eval_loss": 10.379048347473145,
"eval_runtime": 3.5895,
"eval_samples_per_second": 171.61,
"eval_steps_per_second": 85.805,
"step": 8
},
{
"epoch": 0.006153846153846154,
"grad_norm": 0.013943369500339031,
"learning_rate": 6.545084971874738e-05,
"loss": 10.3819,
"step": 9
},
{
"epoch": 0.006837606837606838,
"grad_norm": 0.017278488725423813,
"learning_rate": 5e-05,
"loss": 10.3765,
"step": 10
},
{
"epoch": 0.006837606837606838,
"eval_loss": 10.379008293151855,
"eval_runtime": 3.566,
"eval_samples_per_second": 172.744,
"eval_steps_per_second": 86.372,
"step": 10
},
{
"epoch": 0.007521367521367521,
"grad_norm": 0.01218665111809969,
"learning_rate": 3.4549150281252636e-05,
"loss": 10.3796,
"step": 11
},
{
"epoch": 0.008205128205128205,
"grad_norm": 0.014596718363463879,
"learning_rate": 2.061073738537635e-05,
"loss": 10.3781,
"step": 12
},
{
"epoch": 0.008205128205128205,
"eval_loss": 10.378983497619629,
"eval_runtime": 3.7412,
"eval_samples_per_second": 164.654,
"eval_steps_per_second": 82.327,
"step": 12
},
{
"epoch": 0.008888888888888889,
"grad_norm": 0.016098329797387123,
"learning_rate": 9.549150281252633e-06,
"loss": 10.3813,
"step": 13
},
{
"epoch": 0.009572649572649573,
"grad_norm": 0.0119888074696064,
"learning_rate": 2.4471741852423237e-06,
"loss": 10.3783,
"step": 14
},
{
"epoch": 0.009572649572649573,
"eval_loss": 10.378974914550781,
"eval_runtime": 3.5778,
"eval_samples_per_second": 172.173,
"eval_steps_per_second": 86.086,
"step": 14
},
{
"epoch": 0.010256410256410256,
"grad_norm": 0.015922540798783302,
"learning_rate": 0.0,
"loss": 10.3789,
"step": 15
}
],
"logging_steps": 1,
"max_steps": 15,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 775854489600.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}