File size: 1,560 Bytes
5250b95 bf97c39 5250b95 bf97c39 5250b95 240c2ee a4ce4c7 f7507b0 f2a9eda 240c2ee a4ce4c7 f7507b0 f2a9eda 053f8fb a4ce4c7 f7507b0 f2a9eda 053f8fb a4ce4c7 f7507b0 f2a9eda 053f8fb bf97c39 f7507b0 bf97c39 f7507b0 bf97c39 f7507b0 bf97c39 f7507b0 bf97c39 5250b95 f7507b0 5250b95 240c2ee bf97c39 5250b95 bf97c39 5250b95 a4ce4c7 5250b95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 1000,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.5,
"learning_rate": 4.425e-07,
"loss": 9.0808,
"step": 25
},
{
"epoch": 5.0,
"learning_rate": 3.7999999999999996e-07,
"loss": 8.0507,
"step": 50
},
{
"epoch": 7.5,
"learning_rate": 3.175e-07,
"loss": 7.5948,
"step": 75
},
{
"epoch": 10.0,
"learning_rate": 2.55e-07,
"loss": 7.2855,
"step": 100
},
{
"epoch": 12.5,
"learning_rate": 1.9249999999999998e-07,
"loss": 7.1565,
"step": 125
},
{
"epoch": 15.0,
"learning_rate": 1.3e-07,
"loss": 7.0459,
"step": 150
},
{
"epoch": 17.5,
"learning_rate": 6.75e-08,
"loss": 7.0015,
"step": 175
},
{
"epoch": 20.0,
"learning_rate": 5e-09,
"loss": 6.9769,
"step": 200
},
{
"epoch": 20.0,
"step": 200,
"total_flos": 0.0,
"train_loss": 7.524070739746094,
"train_runtime": 153.2004,
"train_samples_per_second": 1.305,
"train_steps_per_second": 1.305
}
],
"logging_steps": 25,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|