File size: 5,041 Bytes
9986b96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
{
"best_metric": 0.3634186694531522,
"best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed/kw_pubmed_1000_0.0003/checkpoint-12",
"epoch": 1.1524390243902438,
"global_step": 52,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"eval_accuracy": 0.3436084987809126,
"eval_loss": 4.372271537780762,
"eval_runtime": 16.6642,
"eval_samples_per_second": 600.088,
"eval_steps_per_second": 18.783,
"step": 4
},
{
"epoch": 0.11,
"learning_rate": 0.00029466666666666666,
"loss": 6.0386,
"step": 5
},
{
"epoch": 0.17,
"eval_accuracy": 0.34417450365726227,
"eval_loss": 4.2112579345703125,
"eval_runtime": 16.5968,
"eval_samples_per_second": 602.524,
"eval_steps_per_second": 18.859,
"step": 8
},
{
"epoch": 0.22,
"learning_rate": 0.0002893333333333333,
"loss": 3.7573,
"step": 10
},
{
"epoch": 0.26,
"eval_accuracy": 0.3634186694531522,
"eval_loss": 4.2079362869262695,
"eval_runtime": 16.5847,
"eval_samples_per_second": 602.965,
"eval_steps_per_second": 18.873,
"step": 12
},
{
"epoch": 0.33,
"learning_rate": 0.00028266666666666663,
"loss": 2.9944,
"step": 15
},
{
"epoch": 0.35,
"eval_accuracy": 0.3512713340299547,
"eval_loss": 4.3369622230529785,
"eval_runtime": 16.6084,
"eval_samples_per_second": 602.106,
"eval_steps_per_second": 18.846,
"step": 16
},
{
"epoch": 0.44,
"learning_rate": 0.000276,
"loss": 2.7048,
"step": 20
},
{
"epoch": 0.44,
"eval_accuracy": 0.30673110414489724,
"eval_loss": 4.859361171722412,
"eval_runtime": 16.6048,
"eval_samples_per_second": 602.234,
"eval_steps_per_second": 18.85,
"step": 20
},
{
"epoch": 0.52,
"eval_accuracy": 0.3382967607105538,
"eval_loss": 4.492859840393066,
"eval_runtime": 16.5439,
"eval_samples_per_second": 604.454,
"eval_steps_per_second": 18.919,
"step": 24
},
{
"epoch": 0.54,
"learning_rate": 0.00027066666666666667,
"loss": 2.9458,
"step": 25
},
{
"epoch": 0.61,
"eval_accuracy": 0.34077847439916403,
"eval_loss": 4.514556884765625,
"eval_runtime": 16.5866,
"eval_samples_per_second": 602.897,
"eval_steps_per_second": 18.871,
"step": 28
},
{
"epoch": 0.65,
"learning_rate": 0.00026399999999999997,
"loss": 2.3783,
"step": 30
},
{
"epoch": 0.7,
"eval_accuracy": 0.3429989550679206,
"eval_loss": 4.5680060386657715,
"eval_runtime": 16.5703,
"eval_samples_per_second": 603.491,
"eval_steps_per_second": 18.889,
"step": 32
},
{
"epoch": 0.76,
"learning_rate": 0.0002573333333333333,
"loss": 2.2485,
"step": 35
},
{
"epoch": 0.78,
"eval_accuracy": 0.34770114942528735,
"eval_loss": 4.509522914886475,
"eval_runtime": 16.5871,
"eval_samples_per_second": 602.877,
"eval_steps_per_second": 18.87,
"step": 36
},
{
"epoch": 0.87,
"learning_rate": 0.00025066666666666667,
"loss": 2.1701,
"step": 40
},
{
"epoch": 0.87,
"eval_accuracy": 0.3449146638801811,
"eval_loss": 4.4971489906311035,
"eval_runtime": 16.5577,
"eval_samples_per_second": 603.949,
"eval_steps_per_second": 18.904,
"step": 40
},
{
"epoch": 0.96,
"eval_accuracy": 0.33207070707070707,
"eval_loss": 4.7050604820251465,
"eval_runtime": 16.5693,
"eval_samples_per_second": 603.527,
"eval_steps_per_second": 18.89,
"step": 44
},
{
"epoch": 0.98,
"learning_rate": 0.000244,
"loss": 2.0861,
"step": 45
},
{
"epoch": 1.07,
"eval_accuracy": 0.3310257749912922,
"eval_loss": 4.761545658111572,
"eval_runtime": 16.5548,
"eval_samples_per_second": 604.054,
"eval_steps_per_second": 18.907,
"step": 48
},
{
"epoch": 1.11,
"learning_rate": 0.00023733333333333332,
"loss": 2.4168,
"step": 50
},
{
"epoch": 1.15,
"eval_accuracy": 0.33938523162661094,
"eval_loss": 4.7085795402526855,
"eval_runtime": 16.5811,
"eval_samples_per_second": 603.096,
"eval_steps_per_second": 18.877,
"step": 52
},
{
"epoch": 1.15,
"step": 52,
"total_flos": 3.18987289303776e+16,
"train_loss": 2.93648067345986,
"train_runtime": 1938.9621,
"train_samples_per_second": 947.283,
"train_steps_per_second": 0.116
}
],
"max_steps": 225,
"num_train_epochs": 5,
"total_flos": 3.18987289303776e+16,
"trial_name": null,
"trial_params": null
}
|