File size: 1,697 Bytes
adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 c7a9730 adc1f96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.34910157322883606,
"learning_rate": 0.0002,
"loss": 0.7752,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.5253486037254333,
"learning_rate": 0.0002,
"loss": 0.3892,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.6807448267936707,
"learning_rate": 0.0002,
"loss": 0.2486,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.5795254111289978,
"learning_rate": 0.0002,
"loss": 0.188,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.4031554162502289,
"learning_rate": 0.0002,
"loss": 0.1597,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.35988086462020874,
"learning_rate": 0.0002,
"loss": 0.1421,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.028449026965504e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|