File size: 2,378 Bytes
48de83f 9a012c2 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 9a012c2 552831d d1ea928 552831d 48de83f 6303895 9a012c2 552831d d1ea928 552831d 6303895 9a012c2 552831d d1ea928 552831d 6303895 48de83f 9a012c2 552831d 48de83f 9a012c2 48de83f 552831d 48de83f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 100,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.47619047619047616,
"grad_norm": 6.1757893562316895,
"learning_rate": 0.0001785714285714286,
"loss": 0.5829,
"step": 10
},
{
"epoch": 0.9523809523809523,
"grad_norm": 3.337554931640625,
"learning_rate": 0.00015476190476190478,
"loss": 0.7423,
"step": 20
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.07775535434484482,
"learning_rate": 0.00013095238095238096,
"loss": 0.0198,
"step": 30
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.0923432782292366,
"learning_rate": 0.00010714285714285715,
"loss": 0.2508,
"step": 40
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.010310073383152485,
"learning_rate": 8.333333333333334e-05,
"loss": 0.0006,
"step": 50
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.022837914526462555,
"learning_rate": 5.9523809523809524e-05,
"loss": 0.0011,
"step": 60
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.0032586820889264345,
"learning_rate": 3.571428571428572e-05,
"loss": 0.0002,
"step": 70
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.00261130603030324,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.0001,
"step": 80
},
{
"epoch": 4.0,
"step": 84,
"total_flos": 2.5417372593586176e+16,
"train_loss": 0.1902264037302562,
"train_runtime": 30.4122,
"train_samples_per_second": 10.785,
"train_steps_per_second": 2.762
}
],
"logging_steps": 10,
"max_steps": 84,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.5417372593586176e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|