File size: 1,697 Bytes
db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 dacbbef db7f9c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.3847486674785614,
"learning_rate": 0.0002,
"loss": 0.774,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.48815852403640747,
"learning_rate": 0.0002,
"loss": 0.3868,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.574570894241333,
"learning_rate": 0.0002,
"loss": 0.2431,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.3991849720478058,
"learning_rate": 0.0002,
"loss": 0.1852,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.34638234972953796,
"learning_rate": 0.0002,
"loss": 0.1582,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.32800230383872986,
"learning_rate": 0.0002,
"loss": 0.1419,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.028449026965504e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|