File size: 2,324 Bytes
9e5bc07 8f1c71f 9e5bc07 a718b54 9e5bc07 8f1c71f a718b54 8f1c71f 9e5bc07 a718b54 9e5bc07 a718b54 8c429e8 a718b54 8c429e8 a718b54 8c429e8 a718b54 8c429e8 8f1c71f a718b54 8c429e8 a718b54 8c429e8 8f1c71f a718b54 84c2159 a718b54 f18fc82 8f1c71f a718b54 f18fc82 8f1c71f a718b54 9e5bc07 8f1c71f a718b54 9e5bc07 8f1c71f 9e5bc07 a718b54 8f1c71f 9e5bc07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.36,
"learning_rate": 7.996491320395433e-06,
"loss": 459.6986,
"step": 10
},
{
"epoch": 0.71,
"learning_rate": 7.582847040957651e-06,
"loss": 429.225,
"step": 20
},
{
"epoch": 1.0,
"eval_f1": 0.0,
"eval_loss": 329.4008483886719,
"eval_runtime": 2.9528,
"eval_samples_per_second": 59.604,
"eval_steps_per_second": 1.016,
"step": 28
},
{
"epoch": 1.07,
"learning_rate": 6.549695958994759e-06,
"loss": 361.3969,
"step": 30
},
{
"epoch": 1.43,
"learning_rate": 5.0756792824610625e-06,
"loss": 347.9836,
"step": 40
},
{
"epoch": 1.79,
"learning_rate": 3.415667885750354e-06,
"loss": 312.0525,
"step": 50
},
{
"epoch": 2.0,
"eval_f1": 0.0,
"eval_loss": 266.168701171875,
"eval_runtime": 2.5812,
"eval_samples_per_second": 68.187,
"eval_steps_per_second": 1.162,
"step": 56
},
{
"epoch": 2.14,
"learning_rate": 1.8566928200840125e-06,
"loss": 324.8967,
"step": 60
},
{
"epoch": 2.5,
"learning_rate": 6.683150371596022e-07,
"loss": 286.3452,
"step": 70
},
{
"epoch": 2.86,
"learning_rate": 5.601585171798051e-08,
"loss": 280.638,
"step": 80
},
{
"epoch": 3.0,
"eval_f1": 0.0,
"eval_loss": 253.1309051513672,
"eval_runtime": 2.6388,
"eval_samples_per_second": 66.696,
"eval_steps_per_second": 1.137,
"step": 84
},
{
"epoch": 3.0,
"step": 84,
"total_flos": 2697390194688.0,
"train_loss": 348.7352382114955,
"train_runtime": 184.625,
"train_samples_per_second": 14.251,
"train_steps_per_second": 0.455
}
],
"logging_steps": 10,
"max_steps": 84,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2697390194688.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|