Training in progress, step 36000

Files changed (8) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:901549b07aaf63b12bd802d712d91a5646fb35f565d1b05050f243886d4ebaa7
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:9920ab3b7fcf347744f3df6a4d73d6e5424b66f7f9cac3561d3623f5918f8297
 size 995604017

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4098fae9838da744ec4fa71d34991cfebeae0470f963eb11f11a440dd2e9bcd
 size 510396521

 version https://git-lfs.github.com/spec/v1
+oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
 size 510396521

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dfee4e5f88f9c88746033f9a3b7585327a05941de93726140dc0ca30595f09e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac2d72c15bf5b0aed76c4f006a7cbdf876848f08a7f53ee5df9432db53697c40
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04256808ab66f22115493a5a889d290b950c9ad2888c8968213907e22a6aa234
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:e96cb9ff72c5907038895fe00e41f20b04c0701ff63b53fc3111d5dcbacfe411
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": 3.926462411880493,
   "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
-  "epoch": 6.740681998413957,
-  "global_step": 34000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -550,11 +550,43 @@
       "eval_samples_per_second": 42.212,
       "eval_steps_per_second": 2.639,
       "step": 34000
     }
   ],
   "max_steps": 50440,
   "num_train_epochs": 10,
-  "total_flos": 4.16374466688e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": 3.926462411880493,
   "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
+  "epoch": 7.137192704203014,
+  "global_step": 36000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.212,
       "eval_steps_per_second": 2.639,
       "step": 34000
+    },
+    {
+      "epoch": 6.84,
+      "learning_rate": 6.345541401273886e-06,
+      "loss": 0.6306,
+      "step": 34500
+    },
+    {
+      "epoch": 6.94,
+      "learning_rate": 6.1464968152866244e-06,
+      "loss": 0.6146,
+      "step": 35000
+    },
+    {
+      "epoch": 7.04,
+      "learning_rate": 5.947452229299363e-06,
+      "loss": 0.5998,
+      "step": 35500
+    },
+    {
+      "epoch": 7.14,
+      "learning_rate": 5.748407643312103e-06,
+      "loss": 0.5837,
+      "step": 36000
+    },
+    {
+      "epoch": 7.14,
+      "eval_loss": 5.752773761749268,
+      "eval_runtime": 959.2742,
+      "eval_samples_per_second": 42.2,
+      "eval_steps_per_second": 2.638,
+      "step": 36000
     }
   ],
   "max_steps": 50440,
   "num_train_epochs": 10,
+  "total_flos": 4.40860643136e+16,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4098fae9838da744ec4fa71d34991cfebeae0470f963eb11f11a440dd2e9bcd
 size 510396521

 version https://git-lfs.github.com/spec/v1
+oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
 size 510396521

stderr.slurm CHANGED Viewed

The diff for this file is too large to render. See raw diff

stdout.slurm CHANGED Viewed

@@ -82,3 +82,8 @@
 {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
 {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
 {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}

 {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
 {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
 {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}
+{'eval_loss': 5.690598011016846, 'eval_runtime': 958.9891, 'eval_samples_per_second': 42.212, 'eval_steps_per_second': 2.639, 'epoch': 6.74}
+{'loss': 0.6306, 'learning_rate': 6.345541401273886e-06, 'epoch': 6.84}
+{'loss': 0.6146, 'learning_rate': 6.1464968152866244e-06, 'epoch': 6.94}
+{'loss': 0.5998, 'learning_rate': 5.947452229299363e-06, 'epoch': 7.04}
+{'loss': 0.5837, 'learning_rate': 5.748407643312103e-06, 'epoch': 7.14}