Training in progress, step 5550, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4eaaf03b78bd99375228f4e3780fd0588fc02582773008769bf7177550d4b48
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d491ec5c79e068346c07ac72051b147d92ec50b0b38fc5b4f05250ed8013d65
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87baf48f45b21dd7d9a1576417255bf546e8558ecd18cd75468fb9ffa32e54f8
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4499a0d589d85027a908e783ca232b578c49e99556d4d108f652eaa7d4cd5da
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cde3dd12f91204388f748ef22c42d0af6362a11af96ae2767080c430a3556fd7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1be3d3b48fb8768a9f6d52575aecdf595860f5cf577f03b3acc8148f472cbae2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d8a06f6e764a4c806b3b6aa6930ec3c05d14769ecbf5db87f5122a0c04e591e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a533c2b46d6abcf961f28fb57a403e6e075b91a6ddf8fd09d7df3b1d5f213cea
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 234.7826086956522,
   "eval_steps": 150,
-  "global_step": 5400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5227,6 +5227,151 @@
       "EMA_steps_per_second": 25.563,
       "epoch": 234.7826086956522,
       "step": 5400
     }
   ],
   "logging_steps": 10,
@@ -5246,7 +5391,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3884011525792563e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 241.30434782608697,
   "eval_steps": 150,
+  "global_step": 5550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 25.563,
       "epoch": 234.7826086956522,
       "step": 5400
+    },
+    {
+      "epoch": 235.2173913043478,
+      "grad_norm": 2.2689285278320312,
+      "learning_rate": 1.0855486739416115e-06,
+      "loss": 0.2335,
+      "step": 5410
+    },
+    {
+      "epoch": 235.65217391304347,
+      "grad_norm": 2.205592393875122,
+      "learning_rate": 1.085547275540868e-06,
+      "loss": 0.2066,
+      "step": 5420
+    },
+    {
+      "epoch": 236.08695652173913,
+      "grad_norm": 2.256180763244629,
+      "learning_rate": 1.0855456620030405e-06,
+      "loss": 0.2442,
+      "step": 5430
+    },
+    {
+      "epoch": 236.52173913043478,
+      "grad_norm": 2.169275999069214,
+      "learning_rate": 1.0855438333287692e-06,
+      "loss": 0.1983,
+      "step": 5440
+    },
+    {
+      "epoch": 236.95652173913044,
+      "grad_norm": 2.1479029655456543,
+      "learning_rate": 1.0855417895187786e-06,
+      "loss": 0.2359,
+      "step": 5450
+    },
+    {
+      "epoch": 237.3913043478261,
+      "grad_norm": 1.7530748844146729,
+      "learning_rate": 1.0855395305738789e-06,
+      "loss": 0.2375,
+      "step": 5460
+    },
+    {
+      "epoch": 237.82608695652175,
+      "grad_norm": 1.93467116355896,
+      "learning_rate": 1.0855370564949654e-06,
+      "loss": 0.2229,
+      "step": 5470
+    },
+    {
+      "epoch": 238.2608695652174,
+      "grad_norm": 3.3168399333953857,
+      "learning_rate": 1.0855343672830188e-06,
+      "loss": 0.2231,
+      "step": 5480
+    },
+    {
+      "epoch": 238.69565217391303,
+      "grad_norm": 2.073918342590332,
+      "learning_rate": 1.085531462939105e-06,
+      "loss": 0.223,
+      "step": 5490
+    },
+    {
+      "epoch": 239.1304347826087,
+      "grad_norm": 2.3649418354034424,
+      "learning_rate": 1.085528343464375e-06,
+      "loss": 0.2133,
+      "step": 5500
+    },
+    {
+      "epoch": 239.56521739130434,
+      "grad_norm": 2.719287395477295,
+      "learning_rate": 1.0855250088600655e-06,
+      "loss": 0.2752,
+      "step": 5510
+    },
+    {
+      "epoch": 240.0,
+      "grad_norm": 5.105301380157471,
+      "learning_rate": 1.0855214591274984e-06,
+      "loss": 0.1964,
+      "step": 5520
+    },
+    {
+      "epoch": 240.43478260869566,
+      "grad_norm": 1.5361961126327515,
+      "learning_rate": 1.0855176942680803e-06,
+      "loss": 0.2309,
+      "step": 5530
+    },
+    {
+      "epoch": 240.8695652173913,
+      "grad_norm": 2.835388660430908,
+      "learning_rate": 1.0855137142833035e-06,
+      "loss": 0.2229,
+      "step": 5540
+    },
+    {
+      "epoch": 241.30434782608697,
+      "grad_norm": 2.0795018672943115,
+      "learning_rate": 1.0855095191747456e-06,
+      "loss": 0.2335,
+      "step": 5550
+    },
+    {
+      "epoch": 241.30434782608697,
+      "eval_loss": 0.9892138242721558,
+      "eval_runtime": 0.5423,
+      "eval_samples_per_second": 18.441,
+      "eval_steps_per_second": 18.441,
+      "step": 5550
+    },
+    {
+      "Start_State_loss": 0.8609819412231445,
+      "Start_State_runtime": 0.3972,
+      "Start_State_samples_per_second": 25.175,
+      "Start_State_steps_per_second": 25.175,
+      "epoch": 241.30434782608697,
+      "step": 5550
+    },
+    {
+      "Raw_Model_loss": 0.9892138242721558,
+      "Raw_Model_runtime": 0.4138,
+      "Raw_Model_samples_per_second": 24.169,
+      "Raw_Model_steps_per_second": 24.169,
+      "epoch": 241.30434782608697,
+      "step": 5550
+    },
+    {
+      "SWA_loss": 0.831312358379364,
+      "SWA_runtime": 0.4189,
+      "SWA_samples_per_second": 23.872,
+      "SWA_steps_per_second": 23.872,
+      "epoch": 241.30434782608697,
+      "step": 5550
+    },
+    {
+      "EMA_loss": 0.8599440455436707,
+      "EMA_runtime": 0.4024,
+      "EMA_samples_per_second": 24.854,
+      "EMA_steps_per_second": 24.854,
+      "epoch": 241.30434782608697,
+      "step": 5550
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.4269374356277658e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null