Training in progress, step 81000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c95d6bd6c8ef7be74648794bf804ea6641abfdb4b683e21f548a71f857055c4d
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:136551a11d925c9b7e277363afe91252d3c90b8dc5e5c43289e61107e3d62773
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77f02393a2e68235715b9461b42f89022b048fab626fba6fe23a96b132584f0d
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:1982d8643a6b6bd91ce00559255789edb52f59cd150cda1ba9a3365e1689ee9f
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76361476c5f64c0bf49b3517f575b313775e07413868efaa9e59b739826961ff
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:53e04a561bdac90ef1f6c25ff5574afc68c7428cdda288bd54c70b9fc50dd7f9
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6827d9fffaa2c28a86003e8c710ebd966143a8a4fb5c72ae89825e5cbb629648
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:7561f948920604751f7cf826d2cf58f5e293444c817431c5d2aa2bead82cc641
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a41f160cdde62d6e3a1a90c84cc4de5e2ed3bd84b93a6360ed82af50a916d89f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae0860552be41b41d133150b6b246cf17216ad8cec8467e427463a8701a5e9f2
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a808512b91eba0cbda0edf83c48a9fdb39d09e9a32863b9796c9d7e8b7d4b81c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:2910f8dc686565600a07229cef4e507c0697eae3a7e9385ae4e913b2a8f189a5
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d73d6a55f40d828827c6493d8d4e36859284046429b1cc4d61ff3be96f72f5ef
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:38edd545bc4f01de3f608883af1908fbe14efdd33931e3adde347eb4fa00e55f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1185051757135493,
   "eval_steps": 500,
-  "global_step": 80000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -28008,6 +28008,356 @@
       "learning_rate": 0.0004803679802916738,
       "loss": 16.5126,
       "step": 80000
     }
   ],
   "logging_steps": 20,
@@ -28027,7 +28377,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.7804505344505597e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11998649040996866,
   "eval_steps": 500,
+  "global_step": 81000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004803679802916738,
       "loss": 16.5126,
       "step": 80000
+    },
+    {
+      "epoch": 0.11853480200747768,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004803630413566662,
+      "loss": 16.5302,
+      "step": 80020
+    },
+    {
+      "epoch": 0.11856442830140607,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004803581024216586,
+      "loss": 16.4854,
+      "step": 80040
+    },
+    {
+      "epoch": 0.11859405459533445,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.000480353163486651,
+      "loss": 16.4548,
+      "step": 80060
+    },
+    {
+      "epoch": 0.11862368088926284,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004803482245516435,
+      "loss": 16.5227,
+      "step": 80080
+    },
+    {
+      "epoch": 0.11865330718319123,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004803432856166359,
+      "loss": 16.4541,
+      "step": 80100
+    },
+    {
+      "epoch": 0.11868293347711961,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048033834668162836,
+      "loss": 16.4769,
+      "step": 80120
+    },
+    {
+      "epoch": 0.118712559771048,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00048033340774662075,
+      "loss": 16.433,
+      "step": 80140
+    },
+    {
+      "epoch": 0.11874218606497638,
+      "grad_norm": 10.75,
+      "learning_rate": 0.00048032846881161325,
+      "loss": 16.4778,
+      "step": 80160
+    },
+    {
+      "epoch": 0.11877181235890477,
+      "grad_norm": 8.5,
+      "learning_rate": 0.00048032352987660565,
+      "loss": 16.458,
+      "step": 80180
+    },
+    {
+      "epoch": 0.11880143865283316,
+      "grad_norm": 5.875,
+      "learning_rate": 0.0004803185909415981,
+      "loss": 16.4083,
+      "step": 80200
+    },
+    {
+      "epoch": 0.11883106494676154,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.00048031365200659054,
+      "loss": 16.5132,
+      "step": 80220
+    },
+    {
+      "epoch": 0.11886069124068993,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.000480308713071583,
+      "loss": 16.4829,
+      "step": 80240
+    },
+    {
+      "epoch": 0.11889031753461833,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004803037741365754,
+      "loss": 16.4883,
+      "step": 80260
+    },
+    {
+      "epoch": 0.11891994382854672,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00048029883520156783,
+      "loss": 16.4839,
+      "step": 80280
+    },
+    {
+      "epoch": 0.1189495701224751,
+      "grad_norm": 16.5,
+      "learning_rate": 0.0004802938962665603,
+      "loss": 16.474,
+      "step": 80300
+    },
+    {
+      "epoch": 0.11897919641640349,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004802889573315527,
+      "loss": 16.5142,
+      "step": 80320
+    },
+    {
+      "epoch": 0.11900882271033188,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004802840183965451,
+      "loss": 16.474,
+      "step": 80340
+    },
+    {
+      "epoch": 0.11903844900426026,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004802790794615375,
+      "loss": 16.4679,
+      "step": 80360
+    },
+    {
+      "epoch": 0.11906807529818865,
+      "grad_norm": 11.25,
+      "learning_rate": 0.00048027414052653,
+      "loss": 16.4848,
+      "step": 80380
+    },
+    {
+      "epoch": 0.11909770159211704,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004802692015915224,
+      "loss": 16.4804,
+      "step": 80400
+    },
+    {
+      "epoch": 0.11912732788604542,
+      "grad_norm": 9.625,
+      "learning_rate": 0.00048026426265651486,
+      "loss": 16.4446,
+      "step": 80420
+    },
+    {
+      "epoch": 0.11915695417997381,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.00048025932372150725,
+      "loss": 16.4631,
+      "step": 80440
+    },
+    {
+      "epoch": 0.1191865804739022,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048025438478649975,
+      "loss": 16.4251,
+      "step": 80460
+    },
+    {
+      "epoch": 0.11921620676783058,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048024944585149215,
+      "loss": 16.552,
+      "step": 80480
+    },
+    {
+      "epoch": 0.11924583306175897,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004802445069164846,
+      "loss": 16.4699,
+      "step": 80500
+    },
+    {
+      "epoch": 0.11927545935568735,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.00048023956798147704,
+      "loss": 16.4617,
+      "step": 80520
+    },
+    {
+      "epoch": 0.11930508564961574,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004802346290464695,
+      "loss": 16.4773,
+      "step": 80540
+    },
+    {
+      "epoch": 0.11933471194354413,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004802296901114619,
+      "loss": 16.4283,
+      "step": 80560
+    },
+    {
+      "epoch": 0.11936433823747253,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048022475117645433,
+      "loss": 16.5017,
+      "step": 80580
+    },
+    {
+      "epoch": 0.11939396453140091,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004802198122414468,
+      "loss": 16.4556,
+      "step": 80600
+    },
+    {
+      "epoch": 0.1194235908253293,
+      "grad_norm": 8.5,
+      "learning_rate": 0.00048021487330643923,
+      "loss": 16.448,
+      "step": 80620
+    },
+    {
+      "epoch": 0.11945321711925769,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004802099343714316,
+      "loss": 16.4593,
+      "step": 80640
+    },
+    {
+      "epoch": 0.11948284341318607,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.000480204995436424,
+      "loss": 16.5178,
+      "step": 80660
+    },
+    {
+      "epoch": 0.11951246970711446,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004802000565014165,
+      "loss": 16.4132,
+      "step": 80680
+    },
+    {
+      "epoch": 0.11954209600104285,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004801951175664089,
+      "loss": 16.4124,
+      "step": 80700
+    },
+    {
+      "epoch": 0.11957172229497123,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.00048019017863140136,
+      "loss": 16.4544,
+      "step": 80720
+    },
+    {
+      "epoch": 0.11960134858889962,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.00048018523969639375,
+      "loss": 16.4505,
+      "step": 80740
+    },
+    {
+      "epoch": 0.119630974882828,
+      "grad_norm": 8.0,
+      "learning_rate": 0.00048018030076138626,
+      "loss": 16.4634,
+      "step": 80760
+    },
+    {
+      "epoch": 0.11966060117675639,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.00048017536182637865,
+      "loss": 16.4973,
+      "step": 80780
+    },
+    {
+      "epoch": 0.11969022747068478,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004801704228913711,
+      "loss": 16.4454,
+      "step": 80800
+    },
+    {
+      "epoch": 0.11971985376461317,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00048016548395636354,
+      "loss": 16.3708,
+      "step": 80820
+    },
+    {
+      "epoch": 0.11974948005854155,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.000480160545021356,
+      "loss": 16.4656,
+      "step": 80840
+    },
+    {
+      "epoch": 0.11977910635246994,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004801556060863484,
+      "loss": 16.4999,
+      "step": 80860
+    },
+    {
+      "epoch": 0.11980873264639832,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048015066715134083,
+      "loss": 16.3914,
+      "step": 80880
+    },
+    {
+      "epoch": 0.11983835894032673,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004801457282163333,
+      "loss": 16.4897,
+      "step": 80900
+    },
+    {
+      "epoch": 0.11986798523425511,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.00048014078928132573,
+      "loss": 16.4603,
+      "step": 80920
+    },
+    {
+      "epoch": 0.1198976115281835,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004801358503463181,
+      "loss": 16.4485,
+      "step": 80940
+    },
+    {
+      "epoch": 0.11992723782211188,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00048013091141131057,
+      "loss": 16.4565,
+      "step": 80960
+    },
+    {
+      "epoch": 0.11995686411604027,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.000480125972476303,
+      "loss": 16.4224,
+      "step": 80980
+    },
+    {
+      "epoch": 0.11998649040996866,
+      "grad_norm": 13.0625,
+      "learning_rate": 0.0004801210335412954,
+      "loss": 16.473,
+      "step": 81000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.802706127351395e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null