Training in progress, step 400000

Browse files

Files changed (14) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +311 -3
pytorch_model.bin +1 -1
runs/Jan25_18-10-29_t1v-n-91d26b69-w-0/events.out.tfevents.1674670292.t1v-n-91d26b69-w-0.1015672.0 +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16b43c9945e0ce2a283fd3b266beccfc5ef9a7c144aa23dde30ab8d128cf06e0
 size 3480942553

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0e3bf6071dc5700a6bb7a51686fa9e2b9563ee56df64975b55dd82ededfa999
 size 3480942553

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71115c415d1506074c53f32de1a48ac9cc9ec54aa63aafcaaabfa782a14a0bc7
 size 1740493675

 version https://git-lfs.github.com/spec/v1
+oid sha256:9065fc28ad294c806588c1f617b4d767cfb0a58efbd19946ba47348912d230e8
 size 1740493675

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51d1601a635d5dd526d1b3406bfa3992b00fdbf3b654ce60d678051842540cd2
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:31b0ade93916a4adb6c726d07a59e23a30743a8c09687fa5f7c62d98e6c5f322
 size 13611

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8f220426de5a076dbb6f66f54955d3a3fc0acbab10b1bd60cf9472b552bfdca
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e4ecef8b58c710458716a0153f8519567dd2a15c4728bc445f0af4d3fb15782
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.35,
-  "global_step": 350000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2162,11 +2162,319 @@
       "eval_samples_per_second": 179.583,
       "eval_steps_per_second": 2.837,
       "step": 350000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 9223372036854775807,
-  "total_flos": 2.08840704393216e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4,
+  "global_step": 400000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 179.583,
       "eval_steps_per_second": 2.837,
       "step": 350000
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.720883567456298e-05,
+      "loss": 0.8192,
+      "step": 351000
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.70699658915369e-05,
+      "loss": 0.8033,
+      "step": 352000
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.693080007570084e-05,
+      "loss": 0.8635,
+      "step": 353000
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.679133974894983e-05,
+      "loss": 0.9045,
+      "step": 354000
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.66515864363997e-05,
+      "loss": 0.9211,
+      "step": 355000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.651154166637025e-05,
+      "loss": 0.8919,
+      "step": 356000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.637120697036866e-05,
+      "loss": 0.7926,
+      "step": 357000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.623058388307269e-05,
+      "loss": 0.8066,
+      "step": 358000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.608967394231387e-05,
+      "loss": 0.7629,
+      "step": 359000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.594847868906076e-05,
+      "loss": 0.7645,
+      "step": 360000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.580699966740201e-05,
+      "loss": 0.7605,
+      "step": 361000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.566523842452958e-05,
+      "loss": 0.6428,
+      "step": 362000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.552319651072164e-05,
+      "loss": 0.7243,
+      "step": 363000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.538087547932585e-05,
+      "loss": 0.7994,
+      "step": 364000
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.52382768867422e-05,
+      "loss": 0.8268,
+      "step": 365000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.509540229240601e-05,
+      "loss": 0.8183,
+      "step": 366000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.495225325877103e-05,
+      "loss": 0.8421,
+      "step": 367000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.480883135129211e-05,
+      "loss": 0.8821,
+      "step": 368000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.466513813840825e-05,
+      "loss": 0.9094,
+      "step": 369000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.452117519152542e-05,
+      "loss": 0.9117,
+      "step": 370000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.437694408499933e-05,
+      "loss": 0.9155,
+      "step": 371000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.423244639611826e-05,
+      "loss": 0.8478,
+      "step": 372000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.408768370508576e-05,
+      "loss": 0.8097,
+      "step": 373000
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.394265759500348e-05,
+      "loss": 0.7847,
+      "step": 374000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 0.7732,
+      "step": 375000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.365182146448205e-05,
+      "loss": 0.7711,
+      "step": 376000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.350601462458024e-05,
+      "loss": 0.6801,
+      "step": 377000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.335995072666848e-05,
+      "loss": 0.7877,
+      "step": 378000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.32136313680782e-05,
+      "loss": 0.8227,
+      "step": 379000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.30670581489344e-05,
+      "loss": 0.8378,
+      "step": 380000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.292023267213835e-05,
+      "loss": 0.8633,
+      "step": 381000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.277315654334997e-05,
+      "loss": 0.8532,
+      "step": 382000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.262583137097018e-05,
+      "loss": 0.9001,
+      "step": 383000
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.247825876612353e-05,
+      "loss": 0.9273,
+      "step": 384000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.233044034264034e-05,
+      "loss": 0.915,
+      "step": 385000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.218237771703921e-05,
+      "loss": 0.9048,
+      "step": 386000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.203407250850928e-05,
+      "loss": 0.8857,
+      "step": 387000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.188552633889259e-05,
+      "loss": 0.8817,
+      "step": 388000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.173674083266624e-05,
+      "loss": 0.8899,
+      "step": 389000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 0.8255,
+      "step": 390000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.143845832136188e-05,
+      "loss": 0.8539,
+      "step": 391000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.128896457825364e-05,
+      "loss": 0.752,
+      "step": 392000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.113923802243957e-05,
+      "loss": 0.8269,
+      "step": 393000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.09892802913053e-05,
+      "loss": 0.8373,
+      "step": 394000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 0.7937,
+      "step": 395000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.068867786524116e-05,
+      "loss": 0.7235,
+      "step": 396000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.053803645765128e-05,
+      "loss": 0.7541,
+      "step": 397000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.038717044938519e-05,
+      "loss": 0.8472,
+      "step": 398000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.023608149028937e-05,
+      "loss": 0.8973,
+      "step": 399000
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.008477123264848e-05,
+      "loss": 0.9159,
+      "step": 400000
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.4945286214351654,
+      "eval_runtime": 24.5864,
+      "eval_samples_per_second": 203.364,
+      "eval_steps_per_second": 3.213,
+      "step": 400000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 9223372036854775807,
+  "total_flos": 2.38675090735104e+19,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71115c415d1506074c53f32de1a48ac9cc9ec54aa63aafcaaabfa782a14a0bc7
 size 1740493675

 version https://git-lfs.github.com/spec/v1
+oid sha256:9065fc28ad294c806588c1f617b4d767cfb0a58efbd19946ba47348912d230e8
 size 1740493675

runs/Jan25_18-10-29_t1v-n-91d26b69-w-0/events.out.tfevents.1674670292.t1v-n-91d26b69-w-0.1015672.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:341d59d5fc6f461c0c3c690204ff3ffd7089fce6480dcc98bf4f23e74ceab973
-size 45156

 version https://git-lfs.github.com/spec/v1
+oid sha256:121afe79ebfd8ce7e1ce0234cf8868be599cb43ec134dfbc673e27e05e71b069
+size 53432