Training in progress, step 96000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22f1c37cc426dc46a70e9ba9431d95b3b817b015cb7a365920fba7cb1da04131
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6d0fcc97e8f38ef4f177931a0b80e81f26fc2c3df44b1a834117199e9947a28
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dd6b659ab7b65c0d1882aa3c4598cd5d54ad5c9cccc80e4fe48f89095a0ac34
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:22ff6e0e3931ffc4543840546a265cb104c9f8d044f751f1d1689af1785ac414
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:679d5a0473f97f636733470316fd86786ed42325e42384361148a8b340e7a238
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f16967f2c5425e2d3062a5c645e3099ddca9226921c4faea80b6f226c345b14e
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5905b280e26e8d88b4f51a4002ce7641eeded8cd7d7cd7d8ae9eb69a80d70016
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a57ad377b3c1d54cf00ec0f4a45ee8dcdfa0afda14d68b22cde2fefed3a794b
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69af864234b5e9471139d16353a90ba6758d66cf728856336954ab1232ff8d66
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:db33e02267588a29827a8a403fef0f365d2173a37eee0f81da95484780955b80
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c0a03cf7d79472b567011bf3dcac8e639a555b7ca08e2eeba682b37c371ec47
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:34b2c97eeef7042413901b15e9528f4663411f959a4a8a8086a8807b96ca132e
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da8924fd9ebf1bfc34c9d222c0eeb1de5a903b56bd5f2b099e5c970eea697fbe
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bee3f55096050a1b77c497eef45ddf3e44fe16bc128ff4c2f549ae26e06537c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14072489615983977,
   "eval_steps": 500,
-  "global_step": 95000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -33258,6 +33258,356 @@
       "learning_rate": 0.00047666377903599896,
       "loss": 16.2668,
       "step": 95000
     }
   ],
   "logging_steps": 20,
@@ -33277,7 +33627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1142852403132052e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.14220621085625915,
   "eval_steps": 500,
+  "global_step": 96000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00047666377903599896,
       "loss": 16.2668,
       "step": 95000
+    },
+    {
+      "epoch": 0.14075452245376815,
+      "grad_norm": 8.0,
+      "learning_rate": 0.00047665884010099135,
+      "loss": 16.2326,
+      "step": 95020
+    },
+    {
+      "epoch": 0.14078414874769657,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004766539011659838,
+      "loss": 16.2558,
+      "step": 95040
+    },
+    {
+      "epoch": 0.14081377504162496,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.0004766489622309762,
+      "loss": 16.2363,
+      "step": 95060
+    },
+    {
+      "epoch": 0.14084340133555334,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004766440232959687,
+      "loss": 16.2672,
+      "step": 95080
+    },
+    {
+      "epoch": 0.14087302762948173,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004766390843609611,
+      "loss": 16.3032,
+      "step": 95100
+    },
+    {
+      "epoch": 0.14090265392341011,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.00047663414542595354,
+      "loss": 16.1972,
+      "step": 95120
+    },
+    {
+      "epoch": 0.1409322802173385,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.00047662920649094593,
+      "loss": 16.2712,
+      "step": 95140
+    },
+    {
+      "epoch": 0.1409619065112669,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.00047662426755593843,
+      "loss": 16.2999,
+      "step": 95160
+    },
+    {
+      "epoch": 0.14099153280519527,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004766193286209308,
+      "loss": 16.2646,
+      "step": 95180
+    },
+    {
+      "epoch": 0.14102115909912366,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004766143896859232,
+      "loss": 16.2533,
+      "step": 95200
+    },
+    {
+      "epoch": 0.14105078539305205,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.00047660945075091567,
+      "loss": 16.2281,
+      "step": 95220
+    },
+    {
+      "epoch": 0.14108041168698043,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004766045118159081,
+      "loss": 16.2489,
+      "step": 95240
+    },
+    {
+      "epoch": 0.14111003798090882,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00047659957288090056,
+      "loss": 16.2813,
+      "step": 95260
+    },
+    {
+      "epoch": 0.1411396642748372,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00047659463394589296,
+      "loss": 16.2566,
+      "step": 95280
+    },
+    {
+      "epoch": 0.1411692905687656,
+      "grad_norm": 8.75,
+      "learning_rate": 0.00047658969501088546,
+      "loss": 16.2978,
+      "step": 95300
+    },
+    {
+      "epoch": 0.14119891686269398,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00047658475607587785,
+      "loss": 16.2168,
+      "step": 95320
+    },
+    {
+      "epoch": 0.14122854315662237,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004765798171408703,
+      "loss": 16.2683,
+      "step": 95340
+    },
+    {
+      "epoch": 0.14125816945055075,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004765748782058627,
+      "loss": 16.3143,
+      "step": 95360
+    },
+    {
+      "epoch": 0.14128779574447914,
+      "grad_norm": 8.0,
+      "learning_rate": 0.0004765699392708552,
+      "loss": 16.2963,
+      "step": 95380
+    },
+    {
+      "epoch": 0.14131742203840753,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.0004765650003358476,
+      "loss": 16.2477,
+      "step": 95400
+    },
+    {
+      "epoch": 0.1413470483323359,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00047656006140084004,
+      "loss": 16.2887,
+      "step": 95420
+    },
+    {
+      "epoch": 0.1413766746262643,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00047655512246583243,
+      "loss": 16.3026,
+      "step": 95440
+    },
+    {
+      "epoch": 0.14140630092019268,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.00047655018353082493,
+      "loss": 16.2857,
+      "step": 95460
+    },
+    {
+      "epoch": 0.14143592721412107,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004765452445958173,
+      "loss": 16.2834,
+      "step": 95480
+    },
+    {
+      "epoch": 0.14146555350804946,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004765403056608098,
+      "loss": 16.1975,
+      "step": 95500
+    },
+    {
+      "epoch": 0.14149517980197784,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.00047653536672580217,
+      "loss": 16.2638,
+      "step": 95520
+    },
+    {
+      "epoch": 0.14152480609590623,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004765304277907946,
+      "loss": 16.209,
+      "step": 95540
+    },
+    {
+      "epoch": 0.14155443238983462,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00047652548885578706,
+      "loss": 16.2836,
+      "step": 95560
+    },
+    {
+      "epoch": 0.141584058683763,
+      "grad_norm": 9.375,
+      "learning_rate": 0.00047652054992077946,
+      "loss": 16.2385,
+      "step": 95580
+    },
+    {
+      "epoch": 0.1416136849776914,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.00047651561098577196,
+      "loss": 16.3201,
+      "step": 95600
+    },
+    {
+      "epoch": 0.14164331127161978,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00047651067205076435,
+      "loss": 16.2798,
+      "step": 95620
+    },
+    {
+      "epoch": 0.14167293756554816,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004765057331157568,
+      "loss": 16.3551,
+      "step": 95640
+    },
+    {
+      "epoch": 0.14170256385947655,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004765007941807492,
+      "loss": 16.244,
+      "step": 95660
+    },
+    {
+      "epoch": 0.14173219015340496,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004764958552457417,
+      "loss": 16.2651,
+      "step": 95680
+    },
+    {
+      "epoch": 0.14176181644733335,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.0004764909163107341,
+      "loss": 16.2386,
+      "step": 95700
+    },
+    {
+      "epoch": 0.14179144274126174,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00047648597737572654,
+      "loss": 16.2521,
+      "step": 95720
+    },
+    {
+      "epoch": 0.14182106903519012,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00047648103844071893,
+      "loss": 16.2774,
+      "step": 95740
+    },
+    {
+      "epoch": 0.1418506953291185,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00047647609950571143,
+      "loss": 16.2116,
+      "step": 95760
+    },
+    {
+      "epoch": 0.1418803216230469,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004764711605707038,
+      "loss": 16.2365,
+      "step": 95780
+    },
+    {
+      "epoch": 0.14190994791697528,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004764662216356963,
+      "loss": 16.267,
+      "step": 95800
+    },
+    {
+      "epoch": 0.14193957421090367,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.00047646128270068867,
+      "loss": 16.382,
+      "step": 95820
+    },
+    {
+      "epoch": 0.14196920050483205,
+      "grad_norm": 10.5,
+      "learning_rate": 0.00047645634376568117,
+      "loss": 16.2225,
+      "step": 95840
+    },
+    {
+      "epoch": 0.14199882679876044,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00047645140483067356,
+      "loss": 16.2772,
+      "step": 95860
+    },
+    {
+      "epoch": 0.14202845309268883,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00047644646589566596,
+      "loss": 16.2962,
+      "step": 95880
+    },
+    {
+      "epoch": 0.14205807938661721,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.00047644152696065846,
+      "loss": 16.3097,
+      "step": 95900
+    },
+    {
+      "epoch": 0.1420877056805456,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00047643658802565085,
+      "loss": 16.2627,
+      "step": 95920
+    },
+    {
+      "epoch": 0.142117331974474,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004764316490906433,
+      "loss": 16.2114,
+      "step": 95940
+    },
+    {
+      "epoch": 0.14214695826840237,
+      "grad_norm": 20.25,
+      "learning_rate": 0.0004764267101556357,
+      "loss": 16.1394,
+      "step": 95960
+    },
+    {
+      "epoch": 0.14217658456233076,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004764217712206282,
+      "loss": 16.261,
+      "step": 95980
+    },
+    {
+      "epoch": 0.14220621085625915,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004764168322856206,
+      "loss": 16.226,
+      "step": 96000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 2.1365408984116442e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null