mohammadmahdinouri commited on
Commit
270b13d
·
verified ·
1 Parent(s): 765f91f

Training in progress, step 96000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22f1c37cc426dc46a70e9ba9431d95b3b817b015cb7a365920fba7cb1da04131
3
  size 487156538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d0fcc97e8f38ef4f177931a0b80e81f26fc2c3df44b1a834117199e9947a28
3
  size 487156538
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dd6b659ab7b65c0d1882aa3c4598cd5d54ad5c9cccc80e4fe48f89095a0ac34
3
  size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ff6e0e3931ffc4543840546a265cb104c9f8d044f751f1d1689af1785ac414
3
  size 1059459406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:679d5a0473f97f636733470316fd86786ed42325e42384361148a8b340e7a238
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16967f2c5425e2d3062a5c645e3099ddca9226921c4faea80b6f226c345b14e
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5905b280e26e8d88b4f51a4002ce7641eeded8cd7d7cd7d8ae9eb69a80d70016
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a57ad377b3c1d54cf00ec0f4a45ee8dcdfa0afda14d68b22cde2fefed3a794b
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69af864234b5e9471139d16353a90ba6758d66cf728856336954ab1232ff8d66
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db33e02267588a29827a8a403fef0f365d2173a37eee0f81da95484780955b80
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c0a03cf7d79472b567011bf3dcac8e639a555b7ca08e2eeba682b37c371ec47
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b2c97eeef7042413901b15e9528f4663411f959a4a8a8086a8807b96ca132e
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da8924fd9ebf1bfc34c9d222c0eeb1de5a903b56bd5f2b099e5c970eea697fbe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bee3f55096050a1b77c497eef45ddf3e44fe16bc128ff4c2f549ae26e06537c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14072489615983977,
6
  "eval_steps": 500,
7
- "global_step": 95000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -33258,6 +33258,356 @@
33258
  "learning_rate": 0.00047666377903599896,
33259
  "loss": 16.2668,
33260
  "step": 95000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33261
  }
33262
  ],
33263
  "logging_steps": 20,
@@ -33277,7 +33627,7 @@
33277
  "attributes": {}
33278
  }
33279
  },
33280
- "total_flos": 2.1142852403132052e+20,
33281
  "train_batch_size": 48,
33282
  "trial_name": null,
33283
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.14220621085625915,
6
  "eval_steps": 500,
7
+ "global_step": 96000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
33258
  "learning_rate": 0.00047666377903599896,
33259
  "loss": 16.2668,
33260
  "step": 95000
33261
+ },
33262
+ {
33263
+ "epoch": 0.14075452245376815,
33264
+ "grad_norm": 8.0,
33265
+ "learning_rate": 0.00047665884010099135,
33266
+ "loss": 16.2326,
33267
+ "step": 95020
33268
+ },
33269
+ {
33270
+ "epoch": 0.14078414874769657,
33271
+ "grad_norm": 8.0625,
33272
+ "learning_rate": 0.0004766539011659838,
33273
+ "loss": 16.2558,
33274
+ "step": 95040
33275
+ },
33276
+ {
33277
+ "epoch": 0.14081377504162496,
33278
+ "grad_norm": 9.9375,
33279
+ "learning_rate": 0.0004766489622309762,
33280
+ "loss": 16.2363,
33281
+ "step": 95060
33282
+ },
33283
+ {
33284
+ "epoch": 0.14084340133555334,
33285
+ "grad_norm": 7.65625,
33286
+ "learning_rate": 0.0004766440232959687,
33287
+ "loss": 16.2672,
33288
+ "step": 95080
33289
+ },
33290
+ {
33291
+ "epoch": 0.14087302762948173,
33292
+ "grad_norm": 6.84375,
33293
+ "learning_rate": 0.0004766390843609611,
33294
+ "loss": 16.3032,
33295
+ "step": 95100
33296
+ },
33297
+ {
33298
+ "epoch": 0.14090265392341011,
33299
+ "grad_norm": 7.5625,
33300
+ "learning_rate": 0.00047663414542595354,
33301
+ "loss": 16.1972,
33302
+ "step": 95120
33303
+ },
33304
+ {
33305
+ "epoch": 0.1409322802173385,
33306
+ "grad_norm": 7.96875,
33307
+ "learning_rate": 0.00047662920649094593,
33308
+ "loss": 16.2712,
33309
+ "step": 95140
33310
+ },
33311
+ {
33312
+ "epoch": 0.1409619065112669,
33313
+ "grad_norm": 9.8125,
33314
+ "learning_rate": 0.00047662426755593843,
33315
+ "loss": 16.2999,
33316
+ "step": 95160
33317
+ },
33318
+ {
33319
+ "epoch": 0.14099153280519527,
33320
+ "grad_norm": 7.71875,
33321
+ "learning_rate": 0.0004766193286209308,
33322
+ "loss": 16.2646,
33323
+ "step": 95180
33324
+ },
33325
+ {
33326
+ "epoch": 0.14102115909912366,
33327
+ "grad_norm": 7.5,
33328
+ "learning_rate": 0.0004766143896859232,
33329
+ "loss": 16.2533,
33330
+ "step": 95200
33331
+ },
33332
+ {
33333
+ "epoch": 0.14105078539305205,
33334
+ "grad_norm": 8.4375,
33335
+ "learning_rate": 0.00047660945075091567,
33336
+ "loss": 16.2281,
33337
+ "step": 95220
33338
+ },
33339
+ {
33340
+ "epoch": 0.14108041168698043,
33341
+ "grad_norm": 6.65625,
33342
+ "learning_rate": 0.0004766045118159081,
33343
+ "loss": 16.2489,
33344
+ "step": 95240
33345
+ },
33346
+ {
33347
+ "epoch": 0.14111003798090882,
33348
+ "grad_norm": 8.3125,
33349
+ "learning_rate": 0.00047659957288090056,
33350
+ "loss": 16.2813,
33351
+ "step": 95260
33352
+ },
33353
+ {
33354
+ "epoch": 0.1411396642748372,
33355
+ "grad_norm": 9.0,
33356
+ "learning_rate": 0.00047659463394589296,
33357
+ "loss": 16.2566,
33358
+ "step": 95280
33359
+ },
33360
+ {
33361
+ "epoch": 0.1411692905687656,
33362
+ "grad_norm": 8.75,
33363
+ "learning_rate": 0.00047658969501088546,
33364
+ "loss": 16.2978,
33365
+ "step": 95300
33366
+ },
33367
+ {
33368
+ "epoch": 0.14119891686269398,
33369
+ "grad_norm": 7.34375,
33370
+ "learning_rate": 0.00047658475607587785,
33371
+ "loss": 16.2168,
33372
+ "step": 95320
33373
+ },
33374
+ {
33375
+ "epoch": 0.14122854315662237,
33376
+ "grad_norm": 7.8125,
33377
+ "learning_rate": 0.0004765798171408703,
33378
+ "loss": 16.2683,
33379
+ "step": 95340
33380
+ },
33381
+ {
33382
+ "epoch": 0.14125816945055075,
33383
+ "grad_norm": 11.0,
33384
+ "learning_rate": 0.0004765748782058627,
33385
+ "loss": 16.3143,
33386
+ "step": 95360
33387
+ },
33388
+ {
33389
+ "epoch": 0.14128779574447914,
33390
+ "grad_norm": 8.0,
33391
+ "learning_rate": 0.0004765699392708552,
33392
+ "loss": 16.2963,
33393
+ "step": 95380
33394
+ },
33395
+ {
33396
+ "epoch": 0.14131742203840753,
33397
+ "grad_norm": 7.96875,
33398
+ "learning_rate": 0.0004765650003358476,
33399
+ "loss": 16.2477,
33400
+ "step": 95400
33401
+ },
33402
+ {
33403
+ "epoch": 0.1413470483323359,
33404
+ "grad_norm": 8.8125,
33405
+ "learning_rate": 0.00047656006140084004,
33406
+ "loss": 16.2887,
33407
+ "step": 95420
33408
+ },
33409
+ {
33410
+ "epoch": 0.1413766746262643,
33411
+ "grad_norm": 6.875,
33412
+ "learning_rate": 0.00047655512246583243,
33413
+ "loss": 16.3026,
33414
+ "step": 95440
33415
+ },
33416
+ {
33417
+ "epoch": 0.14140630092019268,
33418
+ "grad_norm": 7.28125,
33419
+ "learning_rate": 0.00047655018353082493,
33420
+ "loss": 16.2857,
33421
+ "step": 95460
33422
+ },
33423
+ {
33424
+ "epoch": 0.14143592721412107,
33425
+ "grad_norm": 8.75,
33426
+ "learning_rate": 0.0004765452445958173,
33427
+ "loss": 16.2834,
33428
+ "step": 95480
33429
+ },
33430
+ {
33431
+ "epoch": 0.14146555350804946,
33432
+ "grad_norm": 9.3125,
33433
+ "learning_rate": 0.0004765403056608098,
33434
+ "loss": 16.1975,
33435
+ "step": 95500
33436
+ },
33437
+ {
33438
+ "epoch": 0.14149517980197784,
33439
+ "grad_norm": 8.5625,
33440
+ "learning_rate": 0.00047653536672580217,
33441
+ "loss": 16.2638,
33442
+ "step": 95520
33443
+ },
33444
+ {
33445
+ "epoch": 0.14152480609590623,
33446
+ "grad_norm": 7.53125,
33447
+ "learning_rate": 0.0004765304277907946,
33448
+ "loss": 16.209,
33449
+ "step": 95540
33450
+ },
33451
+ {
33452
+ "epoch": 0.14155443238983462,
33453
+ "grad_norm": 7.09375,
33454
+ "learning_rate": 0.00047652548885578706,
33455
+ "loss": 16.2836,
33456
+ "step": 95560
33457
+ },
33458
+ {
33459
+ "epoch": 0.141584058683763,
33460
+ "grad_norm": 9.375,
33461
+ "learning_rate": 0.00047652054992077946,
33462
+ "loss": 16.2385,
33463
+ "step": 95580
33464
+ },
33465
+ {
33466
+ "epoch": 0.1416136849776914,
33467
+ "grad_norm": 6.15625,
33468
+ "learning_rate": 0.00047651561098577196,
33469
+ "loss": 16.3201,
33470
+ "step": 95600
33471
+ },
33472
+ {
33473
+ "epoch": 0.14164331127161978,
33474
+ "grad_norm": 7.09375,
33475
+ "learning_rate": 0.00047651067205076435,
33476
+ "loss": 16.2798,
33477
+ "step": 95620
33478
+ },
33479
+ {
33480
+ "epoch": 0.14167293756554816,
33481
+ "grad_norm": 8.3125,
33482
+ "learning_rate": 0.0004765057331157568,
33483
+ "loss": 16.3551,
33484
+ "step": 95640
33485
+ },
33486
+ {
33487
+ "epoch": 0.14170256385947655,
33488
+ "grad_norm": 6.25,
33489
+ "learning_rate": 0.0004765007941807492,
33490
+ "loss": 16.244,
33491
+ "step": 95660
33492
+ },
33493
+ {
33494
+ "epoch": 0.14173219015340496,
33495
+ "grad_norm": 9.375,
33496
+ "learning_rate": 0.0004764958552457417,
33497
+ "loss": 16.2651,
33498
+ "step": 95680
33499
+ },
33500
+ {
33501
+ "epoch": 0.14176181644733335,
33502
+ "grad_norm": 14.3125,
33503
+ "learning_rate": 0.0004764909163107341,
33504
+ "loss": 16.2386,
33505
+ "step": 95700
33506
+ },
33507
+ {
33508
+ "epoch": 0.14179144274126174,
33509
+ "grad_norm": 7.15625,
33510
+ "learning_rate": 0.00047648597737572654,
33511
+ "loss": 16.2521,
33512
+ "step": 95720
33513
+ },
33514
+ {
33515
+ "epoch": 0.14182106903519012,
33516
+ "grad_norm": 7.5,
33517
+ "learning_rate": 0.00047648103844071893,
33518
+ "loss": 16.2774,
33519
+ "step": 95740
33520
+ },
33521
+ {
33522
+ "epoch": 0.1418506953291185,
33523
+ "grad_norm": 6.9375,
33524
+ "learning_rate": 0.00047647609950571143,
33525
+ "loss": 16.2116,
33526
+ "step": 95760
33527
+ },
33528
+ {
33529
+ "epoch": 0.1418803216230469,
33530
+ "grad_norm": 6.125,
33531
+ "learning_rate": 0.0004764711605707038,
33532
+ "loss": 16.2365,
33533
+ "step": 95780
33534
+ },
33535
+ {
33536
+ "epoch": 0.14190994791697528,
33537
+ "grad_norm": 8.8125,
33538
+ "learning_rate": 0.0004764662216356963,
33539
+ "loss": 16.267,
33540
+ "step": 95800
33541
+ },
33542
+ {
33543
+ "epoch": 0.14193957421090367,
33544
+ "grad_norm": 7.8125,
33545
+ "learning_rate": 0.00047646128270068867,
33546
+ "loss": 16.382,
33547
+ "step": 95820
33548
+ },
33549
+ {
33550
+ "epoch": 0.14196920050483205,
33551
+ "grad_norm": 10.5,
33552
+ "learning_rate": 0.00047645634376568117,
33553
+ "loss": 16.2225,
33554
+ "step": 95840
33555
+ },
33556
+ {
33557
+ "epoch": 0.14199882679876044,
33558
+ "grad_norm": 6.9375,
33559
+ "learning_rate": 0.00047645140483067356,
33560
+ "loss": 16.2772,
33561
+ "step": 95860
33562
+ },
33563
+ {
33564
+ "epoch": 0.14202845309268883,
33565
+ "grad_norm": 6.59375,
33566
+ "learning_rate": 0.00047644646589566596,
33567
+ "loss": 16.2962,
33568
+ "step": 95880
33569
+ },
33570
+ {
33571
+ "epoch": 0.14205807938661721,
33572
+ "grad_norm": 11.8125,
33573
+ "learning_rate": 0.00047644152696065846,
33574
+ "loss": 16.3097,
33575
+ "step": 95900
33576
+ },
33577
+ {
33578
+ "epoch": 0.1420877056805456,
33579
+ "grad_norm": 7.0625,
33580
+ "learning_rate": 0.00047643658802565085,
33581
+ "loss": 16.2627,
33582
+ "step": 95920
33583
+ },
33584
+ {
33585
+ "epoch": 0.142117331974474,
33586
+ "grad_norm": 7.75,
33587
+ "learning_rate": 0.0004764316490906433,
33588
+ "loss": 16.2114,
33589
+ "step": 95940
33590
+ },
33591
+ {
33592
+ "epoch": 0.14214695826840237,
33593
+ "grad_norm": 20.25,
33594
+ "learning_rate": 0.0004764267101556357,
33595
+ "loss": 16.1394,
33596
+ "step": 95960
33597
+ },
33598
+ {
33599
+ "epoch": 0.14217658456233076,
33600
+ "grad_norm": 6.84375,
33601
+ "learning_rate": 0.0004764217712206282,
33602
+ "loss": 16.261,
33603
+ "step": 95980
33604
+ },
33605
+ {
33606
+ "epoch": 0.14220621085625915,
33607
+ "grad_norm": 7.34375,
33608
+ "learning_rate": 0.0004764168322856206,
33609
+ "loss": 16.226,
33610
+ "step": 96000
33611
  }
33612
  ],
33613
  "logging_steps": 20,
 
33627
  "attributes": {}
33628
  }
33629
  },
33630
+ "total_flos": 2.1365408984116442e+20,
33631
  "train_batch_size": 48,
33632
  "trial_name": null,
33633
  "trial_params": null