File size: 1,999 Bytes
dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c 5210ab6 dda2c5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.5555555555555554,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5925925925925926,
"grad_norm": 1.1158123016357422,
"learning_rate": 0.0001666666666666667,
"loss": 9.4829,
"step": 2
},
{
"epoch": 1.1851851851851851,
"grad_norm": 1.368739366531372,
"learning_rate": 0.00013333333333333334,
"loss": 9.144,
"step": 4
},
{
"epoch": 1.7777777777777777,
"grad_norm": 2.1182327270507812,
"learning_rate": 0.0001,
"loss": 8.7124,
"step": 6
},
{
"epoch": 2.3703703703703702,
"grad_norm": 2.3530497550964355,
"learning_rate": 6.666666666666667e-05,
"loss": 8.2535,
"step": 8
},
{
"epoch": 2.962962962962963,
"grad_norm": 1.8051578998565674,
"learning_rate": 3.3333333333333335e-05,
"loss": 7.8896,
"step": 10
},
{
"epoch": 3.5555555555555554,
"grad_norm": 1.4535489082336426,
"learning_rate": 0.0,
"loss": 7.7924,
"step": 12
},
{
"epoch": 3.5555555555555554,
"step": 12,
"total_flos": 57778513872936.0,
"train_loss": 8.54578431447347,
"train_runtime": 54.5704,
"train_samples_per_second": 3.958,
"train_steps_per_second": 0.22
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 57778513872936.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|