File size: 1,692 Bytes
78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 804f0fa 78222e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.6740289330482483,
"learning_rate": 0.0002,
"loss": 0.8442,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.722176194190979,
"learning_rate": 0.0002,
"loss": 0.421,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.679882287979126,
"learning_rate": 0.0002,
"loss": 0.2707,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.9423966407775879,
"learning_rate": 0.0002,
"loss": 0.2051,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.7787116765975952,
"learning_rate": 0.0002,
"loss": 0.1783,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.499831885099411,
"learning_rate": 0.0002,
"loss": 0.1577,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.324891664420864e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|