hariniiiiiiiiii commited on
Commit
311485d
1 Parent(s): 8de7203

Training in progress, step 3500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8fab46afba02b9664ca3eb5d4fc9d6f6e2f8b5882d6ad6a6f5bda047e6e1fbd
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dee4476ab05abceb3ef157f762adb7b7ff2d24c295242ef85bdc7aa9baf339c
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347082fb15486e1ce22829a5a9a17790976d005235f55b77b736bf422a4e49ef
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c224c2d13e7b12f967f2ff294e904f86dfd319b281de69e29b4ca2a309522ccf
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b47b76e58d40ed6257704d7bde822b19860484ca61d1450fb6ff42df326b971d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ace5200ce81b04d30bdcefc552d51b2d31acee8aa24f561db0693af839dd1d4
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e2936137cf5142f34c072a19508ea41ce0d6a55cae7261f6c7cb486aa65afbe
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb02ae445517d5b6d8cceafc80181af9dac20207d12d240d963ba9d73872898
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.927135988164221,
5
- "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2166,11 +2166,371 @@
2166
  "eval_samples_per_second": 0.213,
2167
  "eval_steps_per_second": 0.213,
2168
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2169
  }
2170
  ],
2171
  "max_steps": 3542,
2172
  "num_train_epochs": 7,
2173
- "total_flos": 7.297038322401485e+16,
2174
  "trial_name": null,
2175
  "trial_params": null
2176
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.915300209591912,
5
+ "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2166
  "eval_samples_per_second": 0.213,
2167
  "eval_steps_per_second": 0.213,
2168
  "step": 3000
2169
+ },
2170
+ {
2171
+ "epoch": 5.95,
2172
+ "learning_rate": 7.705677867902664e-05,
2173
+ "loss": 0.0824,
2174
+ "step": 3010
2175
+ },
2176
+ {
2177
+ "epoch": 5.97,
2178
+ "learning_rate": 7.560834298957127e-05,
2179
+ "loss": 0.1048,
2180
+ "step": 3020
2181
+ },
2182
+ {
2183
+ "epoch": 5.99,
2184
+ "learning_rate": 7.415990730011588e-05,
2185
+ "loss": 0.1295,
2186
+ "step": 3030
2187
+ },
2188
+ {
2189
+ "epoch": 6.01,
2190
+ "learning_rate": 7.271147161066048e-05,
2191
+ "loss": 0.1296,
2192
+ "step": 3040
2193
+ },
2194
+ {
2195
+ "epoch": 6.03,
2196
+ "learning_rate": 7.12630359212051e-05,
2197
+ "loss": 0.1146,
2198
+ "step": 3050
2199
+ },
2200
+ {
2201
+ "epoch": 6.05,
2202
+ "learning_rate": 6.981460023174971e-05,
2203
+ "loss": 0.1158,
2204
+ "step": 3060
2205
+ },
2206
+ {
2207
+ "epoch": 6.07,
2208
+ "learning_rate": 6.836616454229433e-05,
2209
+ "loss": 0.094,
2210
+ "step": 3070
2211
+ },
2212
+ {
2213
+ "epoch": 6.09,
2214
+ "learning_rate": 6.691772885283893e-05,
2215
+ "loss": 0.1141,
2216
+ "step": 3080
2217
+ },
2218
+ {
2219
+ "epoch": 6.11,
2220
+ "learning_rate": 6.546929316338354e-05,
2221
+ "loss": 0.1025,
2222
+ "step": 3090
2223
+ },
2224
+ {
2225
+ "epoch": 6.13,
2226
+ "learning_rate": 6.402085747392817e-05,
2227
+ "loss": 0.1474,
2228
+ "step": 3100
2229
+ },
2230
+ {
2231
+ "epoch": 6.13,
2232
+ "eval_loss": 0.9422538876533508,
2233
+ "eval_rouge1": 0.20069541569541566,
2234
+ "eval_rouge2": 0.10303030303030303,
2235
+ "eval_rougeL": 0.19625097125097124,
2236
+ "eval_rougeLsum": 0.19848096348096347,
2237
+ "eval_runtime": 94.7246,
2238
+ "eval_samples_per_second": 0.211,
2239
+ "eval_steps_per_second": 0.211,
2240
+ "step": 3100
2241
+ },
2242
+ {
2243
+ "epoch": 6.15,
2244
+ "learning_rate": 6.257242178447277e-05,
2245
+ "loss": 0.09,
2246
+ "step": 3110
2247
+ },
2248
+ {
2249
+ "epoch": 6.17,
2250
+ "learning_rate": 6.112398609501739e-05,
2251
+ "loss": 0.1235,
2252
+ "step": 3120
2253
+ },
2254
+ {
2255
+ "epoch": 6.19,
2256
+ "learning_rate": 5.9675550405561996e-05,
2257
+ "loss": 0.0733,
2258
+ "step": 3130
2259
+ },
2260
+ {
2261
+ "epoch": 6.21,
2262
+ "learning_rate": 5.822711471610661e-05,
2263
+ "loss": 0.1035,
2264
+ "step": 3140
2265
+ },
2266
+ {
2267
+ "epoch": 6.22,
2268
+ "learning_rate": 5.6778679026651216e-05,
2269
+ "loss": 0.1027,
2270
+ "step": 3150
2271
+ },
2272
+ {
2273
+ "epoch": 6.24,
2274
+ "learning_rate": 5.533024333719583e-05,
2275
+ "loss": 0.0863,
2276
+ "step": 3160
2277
+ },
2278
+ {
2279
+ "epoch": 6.26,
2280
+ "learning_rate": 5.388180764774044e-05,
2281
+ "loss": 0.095,
2282
+ "step": 3170
2283
+ },
2284
+ {
2285
+ "epoch": 6.28,
2286
+ "learning_rate": 5.243337195828506e-05,
2287
+ "loss": 0.1103,
2288
+ "step": 3180
2289
+ },
2290
+ {
2291
+ "epoch": 6.3,
2292
+ "learning_rate": 5.098493626882966e-05,
2293
+ "loss": 0.1325,
2294
+ "step": 3190
2295
+ },
2296
+ {
2297
+ "epoch": 6.32,
2298
+ "learning_rate": 4.953650057937428e-05,
2299
+ "loss": 0.1052,
2300
+ "step": 3200
2301
+ },
2302
+ {
2303
+ "epoch": 6.32,
2304
+ "eval_loss": 0.9328528642654419,
2305
+ "eval_rouge1": 0.2023076923076923,
2306
+ "eval_rouge2": 0.1102272727272727,
2307
+ "eval_rougeL": 0.19999999999999998,
2308
+ "eval_rougeLsum": 0.2,
2309
+ "eval_runtime": 92.8302,
2310
+ "eval_samples_per_second": 0.215,
2311
+ "eval_steps_per_second": 0.215,
2312
+ "step": 3200
2313
+ },
2314
+ {
2315
+ "epoch": 6.34,
2316
+ "learning_rate": 4.808806488991889e-05,
2317
+ "loss": 0.1293,
2318
+ "step": 3210
2319
+ },
2320
+ {
2321
+ "epoch": 6.36,
2322
+ "learning_rate": 4.6639629200463506e-05,
2323
+ "loss": 0.1301,
2324
+ "step": 3220
2325
+ },
2326
+ {
2327
+ "epoch": 6.38,
2328
+ "learning_rate": 4.519119351100811e-05,
2329
+ "loss": 0.0873,
2330
+ "step": 3230
2331
+ },
2332
+ {
2333
+ "epoch": 6.4,
2334
+ "learning_rate": 4.3742757821552725e-05,
2335
+ "loss": 0.1017,
2336
+ "step": 3240
2337
+ },
2338
+ {
2339
+ "epoch": 6.42,
2340
+ "learning_rate": 4.2294322132097335e-05,
2341
+ "loss": 0.0846,
2342
+ "step": 3250
2343
+ },
2344
+ {
2345
+ "epoch": 6.44,
2346
+ "learning_rate": 4.084588644264195e-05,
2347
+ "loss": 0.0898,
2348
+ "step": 3260
2349
+ },
2350
+ {
2351
+ "epoch": 6.46,
2352
+ "learning_rate": 3.9397450753186555e-05,
2353
+ "loss": 0.1494,
2354
+ "step": 3270
2355
+ },
2356
+ {
2357
+ "epoch": 6.48,
2358
+ "learning_rate": 3.794901506373117e-05,
2359
+ "loss": 0.0742,
2360
+ "step": 3280
2361
+ },
2362
+ {
2363
+ "epoch": 6.5,
2364
+ "learning_rate": 3.650057937427578e-05,
2365
+ "loss": 0.0793,
2366
+ "step": 3290
2367
+ },
2368
+ {
2369
+ "epoch": 6.52,
2370
+ "learning_rate": 3.50521436848204e-05,
2371
+ "loss": 0.1203,
2372
+ "step": 3300
2373
+ },
2374
+ {
2375
+ "epoch": 6.52,
2376
+ "eval_loss": 0.9380243420600891,
2377
+ "eval_rouge1": 0.2023076923076923,
2378
+ "eval_rouge2": 0.1102272727272727,
2379
+ "eval_rougeL": 0.19999999999999998,
2380
+ "eval_rougeLsum": 0.2,
2381
+ "eval_runtime": 94.5066,
2382
+ "eval_samples_per_second": 0.212,
2383
+ "eval_steps_per_second": 0.212,
2384
+ "step": 3300
2385
+ },
2386
+ {
2387
+ "epoch": 6.54,
2388
+ "learning_rate": 3.360370799536501e-05,
2389
+ "loss": 0.1257,
2390
+ "step": 3310
2391
+ },
2392
+ {
2393
+ "epoch": 6.56,
2394
+ "learning_rate": 3.215527230590962e-05,
2395
+ "loss": 0.1177,
2396
+ "step": 3320
2397
+ },
2398
+ {
2399
+ "epoch": 6.58,
2400
+ "learning_rate": 3.070683661645423e-05,
2401
+ "loss": 0.1359,
2402
+ "step": 3330
2403
+ },
2404
+ {
2405
+ "epoch": 6.6,
2406
+ "learning_rate": 2.9258400926998842e-05,
2407
+ "loss": 0.1303,
2408
+ "step": 3340
2409
+ },
2410
+ {
2411
+ "epoch": 6.62,
2412
+ "learning_rate": 2.7809965237543452e-05,
2413
+ "loss": 0.0968,
2414
+ "step": 3350
2415
+ },
2416
+ {
2417
+ "epoch": 6.64,
2418
+ "learning_rate": 2.6361529548088065e-05,
2419
+ "loss": 0.1061,
2420
+ "step": 3360
2421
+ },
2422
+ {
2423
+ "epoch": 6.66,
2424
+ "learning_rate": 2.4913093858632675e-05,
2425
+ "loss": 0.1307,
2426
+ "step": 3370
2427
+ },
2428
+ {
2429
+ "epoch": 6.68,
2430
+ "learning_rate": 2.346465816917729e-05,
2431
+ "loss": 0.0981,
2432
+ "step": 3380
2433
+ },
2434
+ {
2435
+ "epoch": 6.7,
2436
+ "learning_rate": 2.20162224797219e-05,
2437
+ "loss": 0.0901,
2438
+ "step": 3390
2439
+ },
2440
+ {
2441
+ "epoch": 6.72,
2442
+ "learning_rate": 2.0567786790266515e-05,
2443
+ "loss": 0.1125,
2444
+ "step": 3400
2445
+ },
2446
+ {
2447
+ "epoch": 6.72,
2448
+ "eval_loss": 0.9421626925468445,
2449
+ "eval_rouge1": 0.18958041958041955,
2450
+ "eval_rouge2": 0.0977272727272727,
2451
+ "eval_rougeL": 0.18615384615384614,
2452
+ "eval_rougeLsum": 0.19,
2453
+ "eval_runtime": 96.4239,
2454
+ "eval_samples_per_second": 0.207,
2455
+ "eval_steps_per_second": 0.207,
2456
+ "step": 3400
2457
+ },
2458
+ {
2459
+ "epoch": 6.74,
2460
+ "learning_rate": 1.9119351100811125e-05,
2461
+ "loss": 0.0823,
2462
+ "step": 3410
2463
+ },
2464
+ {
2465
+ "epoch": 6.76,
2466
+ "learning_rate": 1.767091541135574e-05,
2467
+ "loss": 0.1016,
2468
+ "step": 3420
2469
+ },
2470
+ {
2471
+ "epoch": 6.78,
2472
+ "learning_rate": 1.6222479721900348e-05,
2473
+ "loss": 0.1172,
2474
+ "step": 3430
2475
+ },
2476
+ {
2477
+ "epoch": 6.8,
2478
+ "learning_rate": 1.477404403244496e-05,
2479
+ "loss": 0.0959,
2480
+ "step": 3440
2481
+ },
2482
+ {
2483
+ "epoch": 6.82,
2484
+ "learning_rate": 1.3325608342989572e-05,
2485
+ "loss": 0.1534,
2486
+ "step": 3450
2487
+ },
2488
+ {
2489
+ "epoch": 6.84,
2490
+ "learning_rate": 1.1877172653534183e-05,
2491
+ "loss": 0.125,
2492
+ "step": 3460
2493
+ },
2494
+ {
2495
+ "epoch": 6.86,
2496
+ "learning_rate": 1.0428736964078795e-05,
2497
+ "loss": 0.1221,
2498
+ "step": 3470
2499
+ },
2500
+ {
2501
+ "epoch": 6.88,
2502
+ "learning_rate": 8.980301274623406e-06,
2503
+ "loss": 0.1391,
2504
+ "step": 3480
2505
+ },
2506
+ {
2507
+ "epoch": 6.9,
2508
+ "learning_rate": 7.531865585168019e-06,
2509
+ "loss": 0.0986,
2510
+ "step": 3490
2511
+ },
2512
+ {
2513
+ "epoch": 6.92,
2514
+ "learning_rate": 6.083429895712631e-06,
2515
+ "loss": 0.1323,
2516
+ "step": 3500
2517
+ },
2518
+ {
2519
+ "epoch": 6.92,
2520
+ "eval_loss": 0.9433181881904602,
2521
+ "eval_rouge1": 0.19,
2522
+ "eval_rouge2": 0.0977272727272727,
2523
+ "eval_rougeL": 0.18615384615384617,
2524
+ "eval_rougeLsum": 0.19,
2525
+ "eval_runtime": 94.6833,
2526
+ "eval_samples_per_second": 0.211,
2527
+ "eval_steps_per_second": 0.211,
2528
+ "step": 3500
2529
  }
2530
  ],
2531
  "max_steps": 3542,
2532
  "num_train_epochs": 7,
2533
+ "total_flos": 8.516307584906035e+16,
2534
  "trial_name": null,
2535
  "trial_params": null
2536
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347082fb15486e1ce22829a5a9a17790976d005235f55b77b736bf422a4e49ef
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c224c2d13e7b12f967f2ff294e904f86dfd319b281de69e29b4ca2a309522ccf
3
  size 2329702453
runs/Feb09_05-19-49_2f481ea0b382/events.out.tfevents.1675920384.2f481ea0b382.229.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fdd1cd3228b9e1861b5bb9722e297776fac3573f23a401d1935533fabc65f3f
3
- size 14426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b509b0a8c5d255f18347ea36ef5ec5e33b87ce49dd65c62b7d4e64b70e8b5a
3
+ size 24646