File size: 4,926 Bytes
0b47bca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
{
"best_metric": 0.02586853690445423,
"best_model_checkpoint": "autotrain-kno3k-fiasf/checkpoint-678",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 678,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 10.68883228302002,
"learning_rate": 3.0882352941176475e-05,
"loss": 0.8263,
"step": 45
},
{
"epoch": 0.4,
"grad_norm": 58.380653381347656,
"learning_rate": 4.852459016393443e-05,
"loss": 0.2512,
"step": 90
},
{
"epoch": 0.6,
"grad_norm": 0.1297217607498169,
"learning_rate": 4.491803278688525e-05,
"loss": 0.1081,
"step": 135
},
{
"epoch": 0.8,
"grad_norm": 0.23067115247249603,
"learning_rate": 4.122950819672131e-05,
"loss": 0.1776,
"step": 180
},
{
"epoch": 1.0,
"grad_norm": 0.0700414851307869,
"learning_rate": 3.754098360655738e-05,
"loss": 0.0775,
"step": 225
},
{
"epoch": 1.0,
"eval_accuracy": 0.9844530816213215,
"eval_f1_macro": 0.9790876623736272,
"eval_f1_micro": 0.9844530816213215,
"eval_f1_weighted": 0.9845629146174618,
"eval_loss": 0.05487915500998497,
"eval_precision_macro": 0.9728622841711356,
"eval_precision_micro": 0.9844530816213215,
"eval_precision_weighted": 0.9849390553498074,
"eval_recall_macro": 0.9856721822982112,
"eval_recall_micro": 0.9844530816213215,
"eval_recall_weighted": 0.9844530816213215,
"eval_runtime": 10.4593,
"eval_samples_per_second": 172.191,
"eval_steps_per_second": 10.804,
"step": 226
},
{
"epoch": 1.19,
"grad_norm": 0.03147374466061592,
"learning_rate": 3.39344262295082e-05,
"loss": 0.1134,
"step": 270
},
{
"epoch": 1.39,
"grad_norm": 0.15661084651947021,
"learning_rate": 3.0245901639344264e-05,
"loss": 0.069,
"step": 315
},
{
"epoch": 1.59,
"grad_norm": 0.04975114390254021,
"learning_rate": 2.6557377049180327e-05,
"loss": 0.0821,
"step": 360
},
{
"epoch": 1.79,
"grad_norm": 0.02913733199238777,
"learning_rate": 2.2868852459016393e-05,
"loss": 0.0636,
"step": 405
},
{
"epoch": 1.99,
"grad_norm": 0.03793076425790787,
"learning_rate": 1.918032786885246e-05,
"loss": 0.135,
"step": 450
},
{
"epoch": 2.0,
"eval_accuracy": 0.9922265408106608,
"eval_f1_macro": 0.9885732001605064,
"eval_f1_micro": 0.9922265408106608,
"eval_f1_weighted": 0.9922495320181636,
"eval_loss": 0.0289547611027956,
"eval_precision_macro": 0.9857016924464328,
"eval_precision_micro": 0.9922265408106608,
"eval_precision_weighted": 0.9923116897185307,
"eval_recall_macro": 0.991509527641691,
"eval_recall_micro": 0.9922265408106608,
"eval_recall_weighted": 0.9922265408106608,
"eval_runtime": 10.5141,
"eval_samples_per_second": 171.294,
"eval_steps_per_second": 10.747,
"step": 452
},
{
"epoch": 2.19,
"grad_norm": 23.56169891357422,
"learning_rate": 1.5491803278688525e-05,
"loss": 0.0508,
"step": 495
},
{
"epoch": 2.39,
"grad_norm": 0.0823952853679657,
"learning_rate": 1.1803278688524591e-05,
"loss": 0.0443,
"step": 540
},
{
"epoch": 2.59,
"grad_norm": 0.025019163265824318,
"learning_rate": 8.114754098360657e-06,
"loss": 0.1144,
"step": 585
},
{
"epoch": 2.79,
"grad_norm": 0.830748975276947,
"learning_rate": 4.426229508196722e-06,
"loss": 0.0624,
"step": 630
},
{
"epoch": 2.99,
"grad_norm": 1.670173168182373,
"learning_rate": 7.377049180327869e-07,
"loss": 0.131,
"step": 675
},
{
"epoch": 3.0,
"eval_accuracy": 0.9938922820655192,
"eval_f1_macro": 0.990657441189161,
"eval_f1_micro": 0.9938922820655192,
"eval_f1_weighted": 0.9939035669908675,
"eval_loss": 0.02586853690445423,
"eval_precision_macro": 0.9889770605232528,
"eval_precision_micro": 0.9938922820655192,
"eval_precision_weighted": 0.9939283176272281,
"eval_recall_macro": 0.9923605914714783,
"eval_recall_micro": 0.9938922820655192,
"eval_recall_weighted": 0.9938922820655192,
"eval_runtime": 10.4887,
"eval_samples_per_second": 171.709,
"eval_steps_per_second": 10.774,
"step": 678
}
],
"logging_steps": 45,
"max_steps": 678,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.3599465039988531e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|