rdemorais commited on
Commit
922bf2e
1 Parent(s): 86f15c7

Training in progress, step 24200

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83610511bf655017bef6d56a87dc4398518746f6cb634305881687d4e9150310
3
  size 2226478553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582d50a5a8f019141e250e840870fee5833dcefaefb338a10e0a481e3b50a51c
3
  size 2226478553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5939e05ea79762928ba2ffbc3aab0bbb4a9497c4028bf156702d8652c176c5c3
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
3
  size 1113252715
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee1fbec3c9acaf1c116f8c0eed78eaf363d1d6ca3fedc4eb95e1c9cc80326ac3
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a6c9c9d9a0e90d25aefe3b1eec75c6a8d9602de8536842eb2cffe609f70cab
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81d62c4b0994f9b5cb61c52fa5a98a0dd932b4d2df04a8e35e3e4f5ab8129258
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23bf027f9216fbe8b17c91852878854c7eb1a9b37ceb42d5492b73cb3332b194
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:557da7379fb4a9930f4429bc4aeda90eb5da85c9e16fa137579aefdcef084998
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4216a163f98df4253a3d2dda8c01570c7d3175dacb0f7d8217e0ca3a27141a6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9660751405429207,
5
- "global_step": 24000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -14406,11 +14406,131 @@
14406
  "learning_rate": 1.7895276330400616e-06,
14407
  "loss": 1.08,
14408
  "step": 24000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14409
  }
14410
  ],
14411
  "max_steps": 24414,
14412
  "num_train_epochs": 2,
14413
- "total_flos": 3.242550318207621e+18,
14414
  "trial_name": null,
14415
  "trial_params": null
14416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9824590147147672,
5
+ "global_step": 24200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
14406
  "learning_rate": 1.7895276330400616e-06,
14407
  "loss": 1.08,
14408
  "step": 24000
14409
+ },
14410
+ {
14411
+ "epoch": 1.97,
14412
+ "learning_rate": 1.7468181429913726e-06,
14413
+ "loss": 1.0849,
14414
+ "step": 24010
14415
+ },
14416
+ {
14417
+ "epoch": 1.97,
14418
+ "learning_rate": 1.704108652942684e-06,
14419
+ "loss": 1.0995,
14420
+ "step": 24020
14421
+ },
14422
+ {
14423
+ "epoch": 1.97,
14424
+ "learning_rate": 1.661399162893995e-06,
14425
+ "loss": 1.0837,
14426
+ "step": 24030
14427
+ },
14428
+ {
14429
+ "epoch": 1.97,
14430
+ "learning_rate": 1.6186896728453061e-06,
14431
+ "loss": 1.0905,
14432
+ "step": 24040
14433
+ },
14434
+ {
14435
+ "epoch": 1.97,
14436
+ "learning_rate": 1.5759801827966177e-06,
14437
+ "loss": 1.0942,
14438
+ "step": 24050
14439
+ },
14440
+ {
14441
+ "epoch": 1.97,
14442
+ "learning_rate": 1.5332706927479286e-06,
14443
+ "loss": 1.0924,
14444
+ "step": 24060
14445
+ },
14446
+ {
14447
+ "epoch": 1.97,
14448
+ "learning_rate": 1.49056120269924e-06,
14449
+ "loss": 1.0925,
14450
+ "step": 24070
14451
+ },
14452
+ {
14453
+ "epoch": 1.97,
14454
+ "learning_rate": 1.447851712650551e-06,
14455
+ "loss": 1.0886,
14456
+ "step": 24080
14457
+ },
14458
+ {
14459
+ "epoch": 1.97,
14460
+ "learning_rate": 1.4051422226018622e-06,
14461
+ "loss": 1.1016,
14462
+ "step": 24090
14463
+ },
14464
+ {
14465
+ "epoch": 1.97,
14466
+ "learning_rate": 1.3624327325531735e-06,
14467
+ "loss": 1.0909,
14468
+ "step": 24100
14469
+ },
14470
+ {
14471
+ "epoch": 1.98,
14472
+ "learning_rate": 1.3197232425044844e-06,
14473
+ "loss": 1.1014,
14474
+ "step": 24110
14475
+ },
14476
+ {
14477
+ "epoch": 1.98,
14478
+ "learning_rate": 1.2770137524557958e-06,
14479
+ "loss": 1.0913,
14480
+ "step": 24120
14481
+ },
14482
+ {
14483
+ "epoch": 1.98,
14484
+ "learning_rate": 1.234304262407107e-06,
14485
+ "loss": 1.0932,
14486
+ "step": 24130
14487
+ },
14488
+ {
14489
+ "epoch": 1.98,
14490
+ "learning_rate": 1.191594772358418e-06,
14491
+ "loss": 1.1051,
14492
+ "step": 24140
14493
+ },
14494
+ {
14495
+ "epoch": 1.98,
14496
+ "learning_rate": 1.1488852823097293e-06,
14497
+ "loss": 1.0882,
14498
+ "step": 24150
14499
+ },
14500
+ {
14501
+ "epoch": 1.98,
14502
+ "learning_rate": 1.1061757922610405e-06,
14503
+ "loss": 1.0917,
14504
+ "step": 24160
14505
+ },
14506
+ {
14507
+ "epoch": 1.98,
14508
+ "learning_rate": 1.0634663022123516e-06,
14509
+ "loss": 1.0956,
14510
+ "step": 24170
14511
+ },
14512
+ {
14513
+ "epoch": 1.98,
14514
+ "learning_rate": 1.020756812163663e-06,
14515
+ "loss": 1.0957,
14516
+ "step": 24180
14517
+ },
14518
+ {
14519
+ "epoch": 1.98,
14520
+ "learning_rate": 9.780473221149738e-07,
14521
+ "loss": 1.0886,
14522
+ "step": 24190
14523
+ },
14524
+ {
14525
+ "epoch": 1.98,
14526
+ "learning_rate": 9.353378320662852e-07,
14527
+ "loss": 1.0992,
14528
+ "step": 24200
14529
  }
14530
  ],
14531
  "max_steps": 24414,
14532
  "num_train_epochs": 2,
14533
+ "total_flos": 3.269571533476485e+18,
14534
  "trial_name": null,
14535
  "trial_params": null
14536
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5939e05ea79762928ba2ffbc3aab0bbb4a9497c4028bf156702d8652c176c5c3
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
3
  size 1113252715