maesneako commited on
Commit
a3ffcb5
1 Parent(s): 288011f

Training in progress, step 36000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:901549b07aaf63b12bd802d712d91a5646fb35f565d1b05050f243886d4ebaa7
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9920ab3b7fcf347744f3df6a4d73d6e5424b66f7f9cac3561d3623f5918f8297
3
  size 995604017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4098fae9838da744ec4fa71d34991cfebeae0470f963eb11f11a440dd2e9bcd
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
3
  size 510396521
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dfee4e5f88f9c88746033f9a3b7585327a05941de93726140dc0ca30595f09e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2d72c15bf5b0aed76c4f006a7cbdf876848f08a7f53ee5df9432db53697c40
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04256808ab66f22115493a5a889d290b950c9ad2888c8968213907e22a6aa234
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e96cb9ff72c5907038895fe00e41f20b04c0701ff63b53fc3111d5dcbacfe411
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 3.926462411880493,
3
  "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
4
- "epoch": 6.740681998413957,
5
- "global_step": 34000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -550,11 +550,43 @@
550
  "eval_samples_per_second": 42.212,
551
  "eval_steps_per_second": 2.639,
552
  "step": 34000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  }
554
  ],
555
  "max_steps": 50440,
556
  "num_train_epochs": 10,
557
- "total_flos": 4.16374466688e+16,
558
  "trial_name": null,
559
  "trial_params": null
560
  }
 
1
  {
2
  "best_metric": 3.926462411880493,
3
  "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
4
+ "epoch": 7.137192704203014,
5
+ "global_step": 36000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
550
  "eval_samples_per_second": 42.212,
551
  "eval_steps_per_second": 2.639,
552
  "step": 34000
553
+ },
554
+ {
555
+ "epoch": 6.84,
556
+ "learning_rate": 6.345541401273886e-06,
557
+ "loss": 0.6306,
558
+ "step": 34500
559
+ },
560
+ {
561
+ "epoch": 6.94,
562
+ "learning_rate": 6.1464968152866244e-06,
563
+ "loss": 0.6146,
564
+ "step": 35000
565
+ },
566
+ {
567
+ "epoch": 7.04,
568
+ "learning_rate": 5.947452229299363e-06,
569
+ "loss": 0.5998,
570
+ "step": 35500
571
+ },
572
+ {
573
+ "epoch": 7.14,
574
+ "learning_rate": 5.748407643312103e-06,
575
+ "loss": 0.5837,
576
+ "step": 36000
577
+ },
578
+ {
579
+ "epoch": 7.14,
580
+ "eval_loss": 5.752773761749268,
581
+ "eval_runtime": 959.2742,
582
+ "eval_samples_per_second": 42.2,
583
+ "eval_steps_per_second": 2.638,
584
+ "step": 36000
585
  }
586
  ],
587
  "max_steps": 50440,
588
  "num_train_epochs": 10,
589
+ "total_flos": 4.40860643136e+16,
590
  "trial_name": null,
591
  "trial_params": null
592
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4098fae9838da744ec4fa71d34991cfebeae0470f963eb11f11a440dd2e9bcd
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
3
  size 510396521
stderr.slurm CHANGED
The diff for this file is too large to render. See raw diff
 
stdout.slurm CHANGED
@@ -82,3 +82,8 @@
82
  {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
83
  {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
84
  {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}
 
 
 
 
 
 
82
  {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
83
  {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
84
  {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}
85
+ {'eval_loss': 5.690598011016846, 'eval_runtime': 958.9891, 'eval_samples_per_second': 42.212, 'eval_steps_per_second': 2.639, 'epoch': 6.74}
86
+ {'loss': 0.6306, 'learning_rate': 6.345541401273886e-06, 'epoch': 6.84}
87
+ {'loss': 0.6146, 'learning_rate': 6.1464968152866244e-06, 'epoch': 6.94}
88
+ {'loss': 0.5998, 'learning_rate': 5.947452229299363e-06, 'epoch': 7.04}
89
+ {'loss': 0.5837, 'learning_rate': 5.748407643312103e-06, 'epoch': 7.14}