File size: 2,659 Bytes
1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db 7b9f354 1b707db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
{
"best_metric": 0.48496538400650024,
"best_model_checkpoint": "mikhail-panzo/fil_b32_le5_s4000/checkpoint-500",
"epoch": 44.44444444444444,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.444444444444445,
"grad_norm": 4.288165092468262,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.7989,
"step": 50
},
{
"epoch": 8.88888888888889,
"grad_norm": 2.4493463039398193,
"learning_rate": 5.000000000000001e-07,
"loss": 0.7701,
"step": 100
},
{
"epoch": 13.333333333333334,
"grad_norm": 1.3798907995224,
"learning_rate": 7.5e-07,
"loss": 0.7374,
"step": 150
},
{
"epoch": 17.77777777777778,
"grad_norm": 2.6173877716064453,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7101,
"step": 200
},
{
"epoch": 22.22222222222222,
"grad_norm": 1.3756701946258545,
"learning_rate": 1.25e-06,
"loss": 0.6887,
"step": 250
},
{
"epoch": 26.666666666666668,
"grad_norm": 1.0456843376159668,
"learning_rate": 1.5e-06,
"loss": 0.672,
"step": 300
},
{
"epoch": 31.11111111111111,
"grad_norm": 0.9518398642539978,
"learning_rate": 1.75e-06,
"loss": 0.6583,
"step": 350
},
{
"epoch": 35.55555555555556,
"grad_norm": 1.305978775024414,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.6258,
"step": 400
},
{
"epoch": 40.0,
"grad_norm": 0.8857623338699341,
"learning_rate": 2.25e-06,
"loss": 0.5835,
"step": 450
},
{
"epoch": 44.44444444444444,
"grad_norm": 1.052182912826538,
"learning_rate": 2.5e-06,
"loss": 0.5539,
"step": 500
},
{
"epoch": 44.44444444444444,
"eval_loss": 0.48496538400650024,
"eval_runtime": 9.4574,
"eval_samples_per_second": 16.812,
"eval_steps_per_second": 2.115,
"step": 500
}
],
"logging_steps": 50,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 364,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3753846024576416e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|