Training in progress, step 4280, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +200 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9576c675d3783e38d1737c6a59363ea426ec11cb7ca3a89afb856ddcdfd62aeb
 size 368988278

 version https://git-lfs.github.com/spec/v1
+oid sha256:517491d8759942f27beaaca2325a4b97a206a4bc50d01b37f43e9dbfa7a419a1
 size 368988278

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e409661b0f642fac9cad18fe922e5228292aae46d4deddb7b8d039a78c553a02
 size 1107079290

 version https://git-lfs.github.com/spec/v1
+oid sha256:149043e2c866159ce43cb83040a8df7ea2f79cc230e8dcb485ab8e3def1116c3
 size 1107079290

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83484e3d787e0e8d73b7b9884f130016f0b4f3305f1905ab1f975834ec2fd834
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ffa9145f2f47a4ee9348b70f818bdbc6838c07fe673a33bbd23b4e0aa3723fd
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9343884135836715,
   "eval_steps": 2000,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2823,6 +2823,202 @@
       "eval_samples_per_second": 1678.827,
       "eval_steps_per_second": 52.47,
       "step": 4000
     }
   ],
   "logging_steps": 10,
@@ -2837,12 +3033,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.10444663109845e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997956025345286,
   "eval_steps": 2000,
+  "global_step": 4280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1678.827,
       "eval_steps_per_second": 52.47,
       "step": 4000
+    },
+    {
+      "epoch": 0.9367243846176307,
+      "grad_norm": 46.65625,
+      "learning_rate": 9.981704618557266e-07,
+      "loss": 86.0902,
+      "step": 4010
+    },
+    {
+      "epoch": 0.93906035565159,
+      "grad_norm": 44.65625,
+      "learning_rate": 9.98165899416464e-07,
+      "loss": 85.4592,
+      "step": 4020
+    },
+    {
+      "epoch": 0.9413963266855491,
+      "grad_norm": 44.34375,
+      "learning_rate": 9.981613369772013e-07,
+      "loss": 86.5728,
+      "step": 4030
+    },
+    {
+      "epoch": 0.9437322977195083,
+      "grad_norm": 46.03125,
+      "learning_rate": 9.98156774537939e-07,
+      "loss": 87.2485,
+      "step": 4040
+    },
+    {
+      "epoch": 0.9460682687534675,
+      "grad_norm": 48.28125,
+      "learning_rate": 9.981522120986765e-07,
+      "loss": 87.1623,
+      "step": 4050
+    },
+    {
+      "epoch": 0.9484042397874266,
+      "grad_norm": 47.96875,
+      "learning_rate": 9.981476496594138e-07,
+      "loss": 86.2034,
+      "step": 4060
+    },
+    {
+      "epoch": 0.9507402108213858,
+      "grad_norm": 48.25,
+      "learning_rate": 9.981430872201514e-07,
+      "loss": 86.5078,
+      "step": 4070
+    },
+    {
+      "epoch": 0.953076181855345,
+      "grad_norm": 44.53125,
+      "learning_rate": 9.981385247808888e-07,
+      "loss": 86.3279,
+      "step": 4080
+    },
+    {
+      "epoch": 0.9554121528893041,
+      "grad_norm": 45.6875,
+      "learning_rate": 9.981339623416264e-07,
+      "loss": 86.4747,
+      "step": 4090
+    },
+    {
+      "epoch": 0.9577481239232634,
+      "grad_norm": 47.53125,
+      "learning_rate": 9.981293999023637e-07,
+      "loss": 85.3221,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9600840949572226,
+      "grad_norm": 47.15625,
+      "learning_rate": 9.981248374631013e-07,
+      "loss": 85.7835,
+      "step": 4110
+    },
+    {
+      "epoch": 0.9624200659911817,
+      "grad_norm": 45.96875,
+      "learning_rate": 9.981202750238387e-07,
+      "loss": 85.919,
+      "step": 4120
+    },
+    {
+      "epoch": 0.9647560370251409,
+      "grad_norm": 46.40625,
+      "learning_rate": 9.981157125845762e-07,
+      "loss": 86.6488,
+      "step": 4130
+    },
+    {
+      "epoch": 0.9670920080591001,
+      "grad_norm": 47.8125,
+      "learning_rate": 9.981111501453136e-07,
+      "loss": 86.7465,
+      "step": 4140
+    },
+    {
+      "epoch": 0.9694279790930592,
+      "grad_norm": 50.96875,
+      "learning_rate": 9.981065877060512e-07,
+      "loss": 85.8423,
+      "step": 4150
+    },
+    {
+      "epoch": 0.9717639501270184,
+      "grad_norm": 44.84375,
+      "learning_rate": 9.981020252667885e-07,
+      "loss": 86.4872,
+      "step": 4160
+    },
+    {
+      "epoch": 0.9740999211609777,
+      "grad_norm": 51.46875,
+      "learning_rate": 9.980974628275261e-07,
+      "loss": 86.9111,
+      "step": 4170
+    },
+    {
+      "epoch": 0.9764358921949368,
+      "grad_norm": 46.25,
+      "learning_rate": 9.980929003882635e-07,
+      "loss": 86.4476,
+      "step": 4180
+    },
+    {
+      "epoch": 0.978771863228896,
+      "grad_norm": 47.0625,
+      "learning_rate": 9.98088337949001e-07,
+      "loss": 86.3345,
+      "step": 4190
+    },
+    {
+      "epoch": 0.9811078342628552,
+      "grad_norm": 47.96875,
+      "learning_rate": 9.980837755097384e-07,
+      "loss": 87.4492,
+      "step": 4200
+    },
+    {
+      "epoch": 0.9834438052968143,
+      "grad_norm": 47.53125,
+      "learning_rate": 9.98079213070476e-07,
+      "loss": 87.3175,
+      "step": 4210
+    },
+    {
+      "epoch": 0.9857797763307735,
+      "grad_norm": 47.84375,
+      "learning_rate": 9.980746506312134e-07,
+      "loss": 85.7159,
+      "step": 4220
+    },
+    {
+      "epoch": 0.9881157473647327,
+      "grad_norm": 50.5,
+      "learning_rate": 9.98070088191951e-07,
+      "loss": 85.7232,
+      "step": 4230
+    },
+    {
+      "epoch": 0.9904517183986918,
+      "grad_norm": 47.1875,
+      "learning_rate": 9.980655257526883e-07,
+      "loss": 86.1964,
+      "step": 4240
+    },
+    {
+      "epoch": 0.992787689432651,
+      "grad_norm": 46.15625,
+      "learning_rate": 9.980609633134259e-07,
+      "loss": 86.2977,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9951236604666102,
+      "grad_norm": 44.8125,
+      "learning_rate": 9.980564008741632e-07,
+      "loss": 85.6801,
+      "step": 4260
+    },
+    {
+      "epoch": 0.9974596315005694,
+      "grad_norm": 46.15625,
+      "learning_rate": 9.980518384349008e-07,
+      "loss": 85.8044,
+      "step": 4270
+    },
+    {
+      "epoch": 0.9997956025345286,
+      "grad_norm": 46.75,
+      "learning_rate": 9.980472759956384e-07,
+      "loss": 86.1971,
+      "step": 4280
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1817578952753414e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null