joelniklaus commited on
Commit
821af66
1 Parent(s): c7f1a30

Training in progress, step 400000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec52e161c040be7e38dd28cdb6b7975772bf13ec1fffcbd27afb7335a6270d3a
3
  size 3480942553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a8f872099d0391c9507ce223e1e3ff4a42eb12b57cc249f652e861b7223e3f
3
  size 3480942553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2bc09c3b3d3f60b3345d5a4c9827dd55cb85a2dd43ba6fa5f3f3a535d72a5b
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a0263f3d21f63b75bfbe41e9f1a282e52db05c95edbe95df44515b1ea2c1e2
3
  size 1740493675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7536ea950af6d4098dda45ff8663f7a5dde8ce235a93ae14671349f23642027
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bdf8a607dd22dd050193b5e8274ec1988a5df7ee95345fb0b30620efbfb73c
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f220426de5a076dbb6f66f54955d3a3fc0acbab10b1bd60cf9472b552bfdca
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4ecef8b58c710458716a0153f8519567dd2a15c4728bc445f0af4d3fb15782
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.35,
5
- "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2162,11 +2162,319 @@
2162
  "eval_samples_per_second": 86.09,
2163
  "eval_steps_per_second": 1.36,
2164
  "step": 350000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2165
  }
2166
  ],
2167
  "max_steps": 1000000,
2168
  "num_train_epochs": 9223372036854775807,
2169
- "total_flos": 2.08840704393216e+19,
2170
  "trial_name": null,
2171
  "trial_params": null
2172
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4,
5
+ "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2162
  "eval_samples_per_second": 86.09,
2163
  "eval_steps_per_second": 1.36,
2164
  "step": 350000
2165
+ },
2166
+ {
2167
+ "epoch": 0.35,
2168
+ "learning_rate": 7.720883567456298e-05,
2169
+ "loss": 0.8479,
2170
+ "step": 351000
2171
+ },
2172
+ {
2173
+ "epoch": 0.35,
2174
+ "learning_rate": 7.70699658915369e-05,
2175
+ "loss": 0.8582,
2176
+ "step": 352000
2177
+ },
2178
+ {
2179
+ "epoch": 0.35,
2180
+ "learning_rate": 7.693080007570084e-05,
2181
+ "loss": 0.8729,
2182
+ "step": 353000
2183
+ },
2184
+ {
2185
+ "epoch": 0.35,
2186
+ "learning_rate": 7.679133974894983e-05,
2187
+ "loss": 0.8367,
2188
+ "step": 354000
2189
+ },
2190
+ {
2191
+ "epoch": 0.35,
2192
+ "learning_rate": 7.66515864363997e-05,
2193
+ "loss": 0.8272,
2194
+ "step": 355000
2195
+ },
2196
+ {
2197
+ "epoch": 0.36,
2198
+ "learning_rate": 7.651154166637025e-05,
2199
+ "loss": 0.8526,
2200
+ "step": 356000
2201
+ },
2202
+ {
2203
+ "epoch": 0.36,
2204
+ "learning_rate": 7.637120697036866e-05,
2205
+ "loss": 0.8495,
2206
+ "step": 357000
2207
+ },
2208
+ {
2209
+ "epoch": 0.36,
2210
+ "learning_rate": 7.623058388307269e-05,
2211
+ "loss": 0.8268,
2212
+ "step": 358000
2213
+ },
2214
+ {
2215
+ "epoch": 0.36,
2216
+ "learning_rate": 7.608967394231387e-05,
2217
+ "loss": 0.8643,
2218
+ "step": 359000
2219
+ },
2220
+ {
2221
+ "epoch": 0.36,
2222
+ "learning_rate": 7.594847868906076e-05,
2223
+ "loss": 0.8786,
2224
+ "step": 360000
2225
+ },
2226
+ {
2227
+ "epoch": 0.36,
2228
+ "learning_rate": 7.580699966740201e-05,
2229
+ "loss": 0.8769,
2230
+ "step": 361000
2231
+ },
2232
+ {
2233
+ "epoch": 0.36,
2234
+ "learning_rate": 7.566523842452958e-05,
2235
+ "loss": 0.8755,
2236
+ "step": 362000
2237
+ },
2238
+ {
2239
+ "epoch": 0.36,
2240
+ "learning_rate": 7.552319651072164e-05,
2241
+ "loss": 0.8843,
2242
+ "step": 363000
2243
+ },
2244
+ {
2245
+ "epoch": 0.36,
2246
+ "learning_rate": 7.538087547932585e-05,
2247
+ "loss": 0.8797,
2248
+ "step": 364000
2249
+ },
2250
+ {
2251
+ "epoch": 0.36,
2252
+ "learning_rate": 7.52382768867422e-05,
2253
+ "loss": 0.8719,
2254
+ "step": 365000
2255
+ },
2256
+ {
2257
+ "epoch": 0.37,
2258
+ "learning_rate": 7.509540229240601e-05,
2259
+ "loss": 0.8725,
2260
+ "step": 366000
2261
+ },
2262
+ {
2263
+ "epoch": 0.37,
2264
+ "learning_rate": 7.495225325877103e-05,
2265
+ "loss": 0.874,
2266
+ "step": 367000
2267
+ },
2268
+ {
2269
+ "epoch": 0.37,
2270
+ "learning_rate": 7.480883135129211e-05,
2271
+ "loss": 0.8795,
2272
+ "step": 368000
2273
+ },
2274
+ {
2275
+ "epoch": 0.37,
2276
+ "learning_rate": 7.466513813840825e-05,
2277
+ "loss": 0.8986,
2278
+ "step": 369000
2279
+ },
2280
+ {
2281
+ "epoch": 0.37,
2282
+ "learning_rate": 7.452117519152542e-05,
2283
+ "loss": 0.8921,
2284
+ "step": 370000
2285
+ },
2286
+ {
2287
+ "epoch": 0.37,
2288
+ "learning_rate": 7.437694408499933e-05,
2289
+ "loss": 0.8985,
2290
+ "step": 371000
2291
+ },
2292
+ {
2293
+ "epoch": 0.37,
2294
+ "learning_rate": 7.423244639611826e-05,
2295
+ "loss": 0.8932,
2296
+ "step": 372000
2297
+ },
2298
+ {
2299
+ "epoch": 0.37,
2300
+ "learning_rate": 7.408768370508576e-05,
2301
+ "loss": 0.9084,
2302
+ "step": 373000
2303
+ },
2304
+ {
2305
+ "epoch": 0.37,
2306
+ "learning_rate": 7.394265759500348e-05,
2307
+ "loss": 0.891,
2308
+ "step": 374000
2309
+ },
2310
+ {
2311
+ "epoch": 0.38,
2312
+ "learning_rate": 7.379736965185368e-05,
2313
+ "loss": 0.8804,
2314
+ "step": 375000
2315
+ },
2316
+ {
2317
+ "epoch": 0.38,
2318
+ "learning_rate": 7.365182146448205e-05,
2319
+ "loss": 0.8703,
2320
+ "step": 376000
2321
+ },
2322
+ {
2323
+ "epoch": 0.38,
2324
+ "learning_rate": 7.350601462458024e-05,
2325
+ "loss": 0.8582,
2326
+ "step": 377000
2327
+ },
2328
+ {
2329
+ "epoch": 0.38,
2330
+ "learning_rate": 7.335995072666848e-05,
2331
+ "loss": 0.8407,
2332
+ "step": 378000
2333
+ },
2334
+ {
2335
+ "epoch": 0.38,
2336
+ "learning_rate": 7.32136313680782e-05,
2337
+ "loss": 0.854,
2338
+ "step": 379000
2339
+ },
2340
+ {
2341
+ "epoch": 0.38,
2342
+ "learning_rate": 7.30670581489344e-05,
2343
+ "loss": 0.8649,
2344
+ "step": 380000
2345
+ },
2346
+ {
2347
+ "epoch": 0.38,
2348
+ "learning_rate": 7.292023267213835e-05,
2349
+ "loss": 0.854,
2350
+ "step": 381000
2351
+ },
2352
+ {
2353
+ "epoch": 0.38,
2354
+ "learning_rate": 7.277315654334997e-05,
2355
+ "loss": 0.8608,
2356
+ "step": 382000
2357
+ },
2358
+ {
2359
+ "epoch": 0.38,
2360
+ "learning_rate": 7.262583137097018e-05,
2361
+ "loss": 0.8471,
2362
+ "step": 383000
2363
+ },
2364
+ {
2365
+ "epoch": 0.38,
2366
+ "learning_rate": 7.247825876612353e-05,
2367
+ "loss": 0.8422,
2368
+ "step": 384000
2369
+ },
2370
+ {
2371
+ "epoch": 0.39,
2372
+ "learning_rate": 7.233044034264034e-05,
2373
+ "loss": 0.8405,
2374
+ "step": 385000
2375
+ },
2376
+ {
2377
+ "epoch": 0.39,
2378
+ "learning_rate": 7.218237771703921e-05,
2379
+ "loss": 0.8088,
2380
+ "step": 386000
2381
+ },
2382
+ {
2383
+ "epoch": 0.39,
2384
+ "learning_rate": 7.203407250850928e-05,
2385
+ "loss": 0.825,
2386
+ "step": 387000
2387
+ },
2388
+ {
2389
+ "epoch": 0.39,
2390
+ "learning_rate": 7.188552633889259e-05,
2391
+ "loss": 0.8299,
2392
+ "step": 388000
2393
+ },
2394
+ {
2395
+ "epoch": 0.39,
2396
+ "learning_rate": 7.173674083266624e-05,
2397
+ "loss": 0.8264,
2398
+ "step": 389000
2399
+ },
2400
+ {
2401
+ "epoch": 0.39,
2402
+ "learning_rate": 7.158771761692464e-05,
2403
+ "loss": 0.8145,
2404
+ "step": 390000
2405
+ },
2406
+ {
2407
+ "epoch": 0.39,
2408
+ "learning_rate": 7.143845832136188e-05,
2409
+ "loss": 0.8398,
2410
+ "step": 391000
2411
+ },
2412
+ {
2413
+ "epoch": 0.39,
2414
+ "learning_rate": 7.128896457825364e-05,
2415
+ "loss": 0.8496,
2416
+ "step": 392000
2417
+ },
2418
+ {
2419
+ "epoch": 0.39,
2420
+ "learning_rate": 7.113923802243957e-05,
2421
+ "loss": 0.8582,
2422
+ "step": 393000
2423
+ },
2424
+ {
2425
+ "epoch": 0.39,
2426
+ "learning_rate": 7.09892802913053e-05,
2427
+ "loss": 0.8667,
2428
+ "step": 394000
2429
+ },
2430
+ {
2431
+ "epoch": 0.4,
2432
+ "learning_rate": 7.083909302476453e-05,
2433
+ "loss": 0.8591,
2434
+ "step": 395000
2435
+ },
2436
+ {
2437
+ "epoch": 0.4,
2438
+ "learning_rate": 7.068867786524116e-05,
2439
+ "loss": 0.8623,
2440
+ "step": 396000
2441
+ },
2442
+ {
2443
+ "epoch": 0.4,
2444
+ "learning_rate": 7.053803645765128e-05,
2445
+ "loss": 0.8359,
2446
+ "step": 397000
2447
+ },
2448
+ {
2449
+ "epoch": 0.4,
2450
+ "learning_rate": 7.038717044938519e-05,
2451
+ "loss": 0.8499,
2452
+ "step": 398000
2453
+ },
2454
+ {
2455
+ "epoch": 0.4,
2456
+ "learning_rate": 7.023608149028937e-05,
2457
+ "loss": 0.8445,
2458
+ "step": 399000
2459
+ },
2460
+ {
2461
+ "epoch": 0.4,
2462
+ "learning_rate": 7.008477123264848e-05,
2463
+ "loss": 0.8213,
2464
+ "step": 400000
2465
+ },
2466
+ {
2467
+ "epoch": 0.4,
2468
+ "eval_loss": 0.45939022302627563,
2469
+ "eval_runtime": 195.195,
2470
+ "eval_samples_per_second": 25.615,
2471
+ "eval_steps_per_second": 0.405,
2472
+ "step": 400000
2473
  }
2474
  ],
2475
  "max_steps": 1000000,
2476
  "num_train_epochs": 9223372036854775807,
2477
+ "total_flos": 2.38675090735104e+19,
2478
  "trial_name": null,
2479
  "trial_params": null
2480
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82b4195e2ad665930b30d576207a9ab7565bc09b17aba2630befee0f99fbd36
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db82a1b292273e59a4cde694eb5b29617673aa0dcee2fd4598267bfb6eaa669f
3
  size 3439
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2bc09c3b3d3f60b3345d5a4c9827dd55cb85a2dd43ba6fa5f3f3a535d72a5b
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a0263f3d21f63b75bfbe41e9f1a282e52db05c95edbe95df44515b1ea2c1e2
3
  size 1740493675
runs/Feb17_16-41-47_t1v-n-15e54913-w-0/1676653311.9915628/events.out.tfevents.1676653311.t1v-n-15e54913-w-0.3049118.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529f14567b9eb3b30554167380cebba00e33d5ab1363c2d9aec8276b0e183933
3
+ size 5479
runs/Feb17_16-41-47_t1v-n-15e54913-w-0/events.out.tfevents.1676653311.t1v-n-15e54913-w-0.3049118.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cecd689d24547e1af2694189d2f11648c0d89266a867a001ae080f2743ed5144
3
+ size 12089
runs/Feb24_02-53-48_t1v-n-15e54913-w-0/1677207440.1359367/events.out.tfevents.1677207440.t1v-n-15e54913-w-0.2105622.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b8f24accbe16d2590aee98c51a5eb1cfb42f64344a5bc621dd2a8d39f51306
3
+ size 5479
runs/Feb24_02-53-48_t1v-n-15e54913-w-0/events.out.tfevents.1677207440.t1v-n-15e54913-w-0.2105622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a1ddfd98a25f153e4c36198bfcf09effbe12bdbfa60b593a3163153fc15395
3
+ size 3813
runs/Feb25_19-25-50_t1v-n-15e54913-w-0/1677353360.7470255/events.out.tfevents.1677353360.t1v-n-15e54913-w-0.2265434.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543b4b6a3982df6f2cc8994797b44896ee7420e4ed18a7bece522c24366190fd
3
+ size 5479
runs/Feb25_19-25-50_t1v-n-15e54913-w-0/events.out.tfevents.1677353360.t1v-n-15e54913-w-0.2265434.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccabefbfbaa0ff0cc0c430d513126b217fe287c8557827607f13fa6dd5bcd1fd
3
+ size 12089
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82b4195e2ad665930b30d576207a9ab7565bc09b17aba2630befee0f99fbd36
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db82a1b292273e59a4cde694eb5b29617673aa0dcee2fd4598267bfb6eaa669f
3
  size 3439