Training in progress, step 190000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96660d6f5a7546cbae5d80ec368bfbaaa75064d1479c94509ede1b3d6838f65f
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c901a2d1d260fa0bc2b99dd1f8c30cc7440abde8559ba2f2b780bf74e2ae733d
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ca7db713d4f0100abd3c2ce75a90131b2b1127a32f6d53a12e44189ac5bfd49
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28ef7b0271e31a74520c33608ebde7cd9f3ac788227fbc005c86bef695cd16c2
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c0b47f0086776db3094a2327d6fe5e3eeb30a5608648466c6de6c513e5229e7
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bd4611b27cdbeb566b16b618fb459ca34885ac0e852cb51f4e80411f805aa94
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c3bf7d39d50349c13985b5d79e8e43f189781b9a084733bba5f6296b8118011
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e4cafc3e4f3bc8162dfbd038f1b8cf8d33435669ac5a883017100fd78daf053
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2226,6 +2226,66 @@
|
|
2226 |
"learning_rate": 0.00010783062297404974,
|
2227 |
"loss": 0.338,
|
2228 |
"step": 185000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2229 |
}
|
2230 |
],
|
2231 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.2340405613569247,
|
5 |
+
"global_step": 190000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2226 |
"learning_rate": 0.00010783062297404974,
|
2227 |
"loss": 0.338,
|
2228 |
"step": 185000
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 3.16,
|
2232 |
+
"learning_rate": 0.00010762870236335414,
|
2233 |
+
"loss": 0.3379,
|
2234 |
+
"step": 185500
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 3.17,
|
2238 |
+
"learning_rate": 0.0001074265090685204,
|
2239 |
+
"loss": 0.338,
|
2240 |
+
"step": 186000
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 3.17,
|
2244 |
+
"learning_rate": 0.00010722404508511468,
|
2245 |
+
"loss": 0.3377,
|
2246 |
+
"step": 186500
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 3.18,
|
2250 |
+
"learning_rate": 0.00010702131241137474,
|
2251 |
+
"loss": 0.3376,
|
2252 |
+
"step": 187000
|
2253 |
+
},
|
2254 |
+
{
|
2255 |
+
"epoch": 3.19,
|
2256 |
+
"learning_rate": 0.00010681831304819025,
|
2257 |
+
"loss": 0.3378,
|
2258 |
+
"step": 187500
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 3.2,
|
2262 |
+
"learning_rate": 0.00010661504899908298,
|
2263 |
+
"loss": 0.3377,
|
2264 |
+
"step": 188000
|
2265 |
+
},
|
2266 |
+
{
|
2267 |
+
"epoch": 3.21,
|
2268 |
+
"learning_rate": 0.00010641192958446453,
|
2269 |
+
"loss": 0.3384,
|
2270 |
+
"step": 188500
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 3.22,
|
2274 |
+
"learning_rate": 0.00010620814270384223,
|
2275 |
+
"loss": 0.3378,
|
2276 |
+
"step": 189000
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 3.23,
|
2280 |
+
"learning_rate": 0.00010600450550732924,
|
2281 |
+
"loss": 0.3378,
|
2282 |
+
"step": 189500
|
2283 |
+
},
|
2284 |
+
{
|
2285 |
+
"epoch": 3.23,
|
2286 |
+
"learning_rate": 0.00010580020382426679,
|
2287 |
+
"loss": 0.3376,
|
2288 |
+
"step": 190000
|
2289 |
}
|
2290 |
],
|
2291 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c901a2d1d260fa0bc2b99dd1f8c30cc7440abde8559ba2f2b780bf74e2ae733d
|
3 |
size 201355195
|