Training in progress, epoch 22, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6753c78b7c0ef33c0e06df98fc2b17af826195a3d977ff76dfe9dc01bc829d36
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:017a44d3ac93ea4bfa9fd21994bffd93889bffa070c7758ab5420752aaad3c5b
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd02ebed0dfeff533894ca511f1b34ff916fe512d4945de5b16f69750a67af48
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4565350eeb4b76edcdc222b4a803476b9ee67ea5c3c110e2c10f3af6e808474b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 34.
|
| 3 |
-
"best_model_checkpoint": "/kaggle/working/output/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2093,6 +2093,105 @@
|
|
| 2093 |
"eval_samples_per_second": 26.452,
|
| 2094 |
"eval_steps_per_second": 3.324,
|
| 2095 |
"step": 27405
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2096 |
}
|
| 2097 |
],
|
| 2098 |
"logging_steps": 100,
|
|
@@ -2107,7 +2206,7 @@
|
|
| 2107 |
"early_stopping_threshold": 0.0
|
| 2108 |
},
|
| 2109 |
"attributes": {
|
| 2110 |
-
"early_stopping_patience_counter":
|
| 2111 |
}
|
| 2112 |
},
|
| 2113 |
"TrainerControl": {
|
|
@@ -2121,7 +2220,7 @@
|
|
| 2121 |
"attributes": {}
|
| 2122 |
}
|
| 2123 |
},
|
| 2124 |
-
"total_flos":
|
| 2125 |
"train_batch_size": 8,
|
| 2126 |
"trial_name": null,
|
| 2127 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 34.53865432739258,
|
| 3 |
+
"best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
|
| 4 |
+
"epoch": 22.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 28710,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2093 |
"eval_samples_per_second": 26.452,
|
| 2094 |
"eval_steps_per_second": 3.324,
|
| 2095 |
"step": 27405
|
| 2096 |
+
},
|
| 2097 |
+
{
|
| 2098 |
+
"epoch": 21.0727969348659,
|
| 2099 |
+
"grad_norm": 2.287121534347534,
|
| 2100 |
+
"learning_rate": 3.683572796934866e-05,
|
| 2101 |
+
"loss": 32.9962,
|
| 2102 |
+
"step": 27500
|
| 2103 |
+
},
|
| 2104 |
+
{
|
| 2105 |
+
"epoch": 21.149425287356323,
|
| 2106 |
+
"grad_norm": 2.5622124671936035,
|
| 2107 |
+
"learning_rate": 3.678783524904214e-05,
|
| 2108 |
+
"loss": 33.2565,
|
| 2109 |
+
"step": 27600
|
| 2110 |
+
},
|
| 2111 |
+
{
|
| 2112 |
+
"epoch": 21.226053639846743,
|
| 2113 |
+
"grad_norm": 2.2134974002838135,
|
| 2114 |
+
"learning_rate": 3.673994252873563e-05,
|
| 2115 |
+
"loss": 33.7442,
|
| 2116 |
+
"step": 27700
|
| 2117 |
+
},
|
| 2118 |
+
{
|
| 2119 |
+
"epoch": 21.302681992337163,
|
| 2120 |
+
"grad_norm": 2.574054002761841,
|
| 2121 |
+
"learning_rate": 3.669204980842912e-05,
|
| 2122 |
+
"loss": 33.7998,
|
| 2123 |
+
"step": 27800
|
| 2124 |
+
},
|
| 2125 |
+
{
|
| 2126 |
+
"epoch": 21.379310344827587,
|
| 2127 |
+
"grad_norm": 2.8479721546173096,
|
| 2128 |
+
"learning_rate": 3.6644157088122604e-05,
|
| 2129 |
+
"loss": 33.2015,
|
| 2130 |
+
"step": 27900
|
| 2131 |
+
},
|
| 2132 |
+
{
|
| 2133 |
+
"epoch": 21.455938697318008,
|
| 2134 |
+
"grad_norm": 4.845319747924805,
|
| 2135 |
+
"learning_rate": 3.659626436781609e-05,
|
| 2136 |
+
"loss": 33.7904,
|
| 2137 |
+
"step": 28000
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 21.532567049808428,
|
| 2141 |
+
"grad_norm": 2.353726863861084,
|
| 2142 |
+
"learning_rate": 3.6548371647509584e-05,
|
| 2143 |
+
"loss": 33.7207,
|
| 2144 |
+
"step": 28100
|
| 2145 |
+
},
|
| 2146 |
+
{
|
| 2147 |
+
"epoch": 21.60919540229885,
|
| 2148 |
+
"grad_norm": 3.003556966781616,
|
| 2149 |
+
"learning_rate": 3.650047892720307e-05,
|
| 2150 |
+
"loss": 33.297,
|
| 2151 |
+
"step": 28200
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"epoch": 21.685823754789272,
|
| 2155 |
+
"grad_norm": 4.815252304077148,
|
| 2156 |
+
"learning_rate": 3.645258620689656e-05,
|
| 2157 |
+
"loss": 33.3036,
|
| 2158 |
+
"step": 28300
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"epoch": 21.762452107279692,
|
| 2162 |
+
"grad_norm": 3.0622081756591797,
|
| 2163 |
+
"learning_rate": 3.640469348659004e-05,
|
| 2164 |
+
"loss": 33.3661,
|
| 2165 |
+
"step": 28400
|
| 2166 |
+
},
|
| 2167 |
+
{
|
| 2168 |
+
"epoch": 21.839080459770116,
|
| 2169 |
+
"grad_norm": 3.3728883266448975,
|
| 2170 |
+
"learning_rate": 3.6356800766283525e-05,
|
| 2171 |
+
"loss": 32.8782,
|
| 2172 |
+
"step": 28500
|
| 2173 |
+
},
|
| 2174 |
+
{
|
| 2175 |
+
"epoch": 21.915708812260537,
|
| 2176 |
+
"grad_norm": 2.2338080406188965,
|
| 2177 |
+
"learning_rate": 3.630890804597701e-05,
|
| 2178 |
+
"loss": 33.0412,
|
| 2179 |
+
"step": 28600
|
| 2180 |
+
},
|
| 2181 |
+
{
|
| 2182 |
+
"epoch": 21.992337164750957,
|
| 2183 |
+
"grad_norm": 3.717360019683838,
|
| 2184 |
+
"learning_rate": 3.62610153256705e-05,
|
| 2185 |
+
"loss": 33.0318,
|
| 2186 |
+
"step": 28700
|
| 2187 |
+
},
|
| 2188 |
+
{
|
| 2189 |
+
"epoch": 22.0,
|
| 2190 |
+
"eval_loss": 34.53865432739258,
|
| 2191 |
+
"eval_runtime": 49.3318,
|
| 2192 |
+
"eval_samples_per_second": 26.454,
|
| 2193 |
+
"eval_steps_per_second": 3.324,
|
| 2194 |
+
"step": 28710
|
| 2195 |
}
|
| 2196 |
],
|
| 2197 |
"logging_steps": 100,
|
|
|
|
| 2206 |
"early_stopping_threshold": 0.0
|
| 2207 |
},
|
| 2208 |
"attributes": {
|
| 2209 |
+
"early_stopping_patience_counter": 0
|
| 2210 |
}
|
| 2211 |
},
|
| 2212 |
"TrainerControl": {
|
|
|
|
| 2220 |
"attributes": {}
|
| 2221 |
}
|
| 2222 |
},
|
| 2223 |
+
"total_flos": 3.0960839867037696e+16,
|
| 2224 |
"train_batch_size": 8,
|
| 2225 |
"trial_name": null,
|
| 2226 |
"trial_params": null
|