Training in progress, step 55000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:032e5ecb2dab53150bbaf3c11fbb8a4e9ba7451f8029ca8a4a75b9c764ed4ca3
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:436bf79533e258070c96b4760436afa1f9251b1590c7ae2a2f60dc7519e9b64b
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc3071b91c3d9f34aa409b00330a89c7035dd64aec3ad8af8c2d0d3d08e04916
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab2a7667c52f9cc64e61b137dc5df66439292fcc32acda2e7782c8372f9c8172
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02330612259bf0ffdcebbdc862309eac5e6da0f6e632646393673582eff58b76
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:61a042d8f729c4f51ba538ba4c747cf1d8cbb1b59cf032f3422995a579b49f8a
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f8c3d8df5bbb17e62aa6823857f159f584d750ec4ae412f9867691a7828e5c9
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:63b64a2edbbd5cf896abcb8f817b204e5a511d27c7efe13e0a92b23dc6a3b777
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:575c0a93289c37982b1579a31f28f9195ea8ec12a0c5b7b286d351b318ae6d53
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0540aa39d91dc61087d3dd380d7b7750dc1d19afff10c530b1d0895a416cf32f
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cdf30ebe94a891d128b17692f16c1969a63407269859c9cbaf399c6ea35d8af7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:47442bd8ad617950fc9791e10321850b084b057926e59d24f6b5e09aefa3043b
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfaaa443ff50c514e8d740e179deb3f101e73d9201b92424d8bf52ab5c7dfc99
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1887d8c6d2dc250cfb0b7c57e61e4fa0abc40fda0dbe8977a6841b90daceb70
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07999099360664577,
   "eval_steps": 500,
-  "global_step": 54000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -18908,6 +18908,356 @@
       "learning_rate": 0.0004867885958015101,
       "loss": 16.9579,
       "step": 54000
     }
   ],
   "logging_steps": 20,
@@ -18927,7 +19277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2018037472162847e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08147230830306514,
   "eval_steps": 500,
+  "global_step": 55000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004867885958015101,
       "loss": 16.9579,
       "step": 54000
+    },
+    {
+      "epoch": 0.08002061990057416,
+      "grad_norm": 8.375,
+      "learning_rate": 0.00048678365686650256,
+      "loss": 16.8883,
+      "step": 54020
+    },
+    {
+      "epoch": 0.08005024619450254,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.000486778717931495,
+      "loss": 16.9452,
+      "step": 54040
+    },
+    {
+      "epoch": 0.08007987248843093,
+      "grad_norm": 8.25,
+      "learning_rate": 0.00048677377899648746,
+      "loss": 16.8946,
+      "step": 54060
+    },
+    {
+      "epoch": 0.08010949878235932,
+      "grad_norm": 8.5,
+      "learning_rate": 0.00048676884006147985,
+      "loss": 16.9801,
+      "step": 54080
+    },
+    {
+      "epoch": 0.0801391250762877,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004867639011264723,
+      "loss": 16.9925,
+      "step": 54100
+    },
+    {
+      "epoch": 0.08016875137021609,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048675896219146475,
+      "loss": 16.9666,
+      "step": 54120
+    },
+    {
+      "epoch": 0.08019837766414448,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004867540232564572,
+      "loss": 16.9161,
+      "step": 54140
+    },
+    {
+      "epoch": 0.08022800395807286,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004867490843214496,
+      "loss": 17.0152,
+      "step": 54160
+    },
+    {
+      "epoch": 0.08025763025200125,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048674414538644204,
+      "loss": 16.9116,
+      "step": 54180
+    },
+    {
+      "epoch": 0.08028725654592965,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004867392064514345,
+      "loss": 16.9337,
+      "step": 54200
+    },
+    {
+      "epoch": 0.08031688283985804,
+      "grad_norm": 8.375,
+      "learning_rate": 0.00048673426751642693,
+      "loss": 16.9931,
+      "step": 54220
+    },
+    {
+      "epoch": 0.08034650913378642,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004867293285814193,
+      "loss": 17.0016,
+      "step": 54240
+    },
+    {
+      "epoch": 0.08037613542771481,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004867243896464118,
+      "loss": 16.9347,
+      "step": 54260
+    },
+    {
+      "epoch": 0.0804057617216432,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004867194507114042,
+      "loss": 16.9192,
+      "step": 54280
+    },
+    {
+      "epoch": 0.08043538801557158,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004867145117763966,
+      "loss": 16.8667,
+      "step": 54300
+    },
+    {
+      "epoch": 0.08046501430949997,
+      "grad_norm": 11.875,
+      "learning_rate": 0.00048670957284138906,
+      "loss": 16.9558,
+      "step": 54320
+    },
+    {
+      "epoch": 0.08049464060342836,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004867046339063815,
+      "loss": 17.0024,
+      "step": 54340
+    },
+    {
+      "epoch": 0.08052426689735674,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048669969497137396,
+      "loss": 16.9562,
+      "step": 54360
+    },
+    {
+      "epoch": 0.08055389319128513,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048669475603636635,
+      "loss": 16.9164,
+      "step": 54380
+    },
+    {
+      "epoch": 0.08058351948521351,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004866898171013588,
+      "loss": 16.9818,
+      "step": 54400
+    },
+    {
+      "epoch": 0.0806131457791419,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048668487816635125,
+      "loss": 16.9353,
+      "step": 54420
+    },
+    {
+      "epoch": 0.08064277207307029,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004866799392313437,
+      "loss": 16.9368,
+      "step": 54440
+    },
+    {
+      "epoch": 0.08067239836699867,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004866750002963361,
+      "loss": 16.9073,
+      "step": 54460
+    },
+    {
+      "epoch": 0.08070202466092706,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048667006136132854,
+      "loss": 16.9283,
+      "step": 54480
+    },
+    {
+      "epoch": 0.08073165095485545,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.000486665122426321,
+      "loss": 16.9585,
+      "step": 54500
+    },
+    {
+      "epoch": 0.08076127724878385,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048666018349131343,
+      "loss": 16.9718,
+      "step": 54520
+    },
+    {
+      "epoch": 0.08079090354271223,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004866552445563058,
+      "loss": 16.974,
+      "step": 54540
+    },
+    {
+      "epoch": 0.08082052983664062,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004866503056212983,
+      "loss": 16.9887,
+      "step": 54560
+    },
+    {
+      "epoch": 0.080850156130569,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004866453666862907,
+      "loss": 17.0524,
+      "step": 54580
+    },
+    {
+      "epoch": 0.08087978242449739,
+      "grad_norm": 7.375,
+      "learning_rate": 0.00048664042775128317,
+      "loss": 16.9761,
+      "step": 54600
+    },
+    {
+      "epoch": 0.08090940871842578,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048663548881627556,
+      "loss": 16.8978,
+      "step": 54620
+    },
+    {
+      "epoch": 0.08093903501235417,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.000486630549881268,
+      "loss": 16.9383,
+      "step": 54640
+    },
+    {
+      "epoch": 0.08096866130628255,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048662561094626046,
+      "loss": 16.9231,
+      "step": 54660
+    },
+    {
+      "epoch": 0.08099828760021094,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.00048662067201125285,
+      "loss": 16.9352,
+      "step": 54680
+    },
+    {
+      "epoch": 0.08102791389413933,
+      "grad_norm": 11.6875,
+      "learning_rate": 0.0004866157330762453,
+      "loss": 16.9842,
+      "step": 54700
+    },
+    {
+      "epoch": 0.08105754018806771,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048661079414123775,
+      "loss": 16.9285,
+      "step": 54720
+    },
+    {
+      "epoch": 0.0810871664819961,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004866058552062302,
+      "loss": 16.9533,
+      "step": 54740
+    },
+    {
+      "epoch": 0.08111679277592448,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004866009162712226,
+      "loss": 16.9167,
+      "step": 54760
+    },
+    {
+      "epoch": 0.08114641906985287,
+      "grad_norm": 12.5,
+      "learning_rate": 0.00048659597733621504,
+      "loss": 16.8854,
+      "step": 54780
+    },
+    {
+      "epoch": 0.08117604536378126,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004865910384012075,
+      "loss": 16.9078,
+      "step": 54800
+    },
+    {
+      "epoch": 0.08120567165770964,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048658609946619993,
+      "loss": 16.9198,
+      "step": 54820
+    },
+    {
+      "epoch": 0.08123529795163804,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004865811605311923,
+      "loss": 16.9367,
+      "step": 54840
+    },
+    {
+      "epoch": 0.08126492424556643,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004865762215961848,
+      "loss": 16.9474,
+      "step": 54860
+    },
+    {
+      "epoch": 0.08129455053949482,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004865712826611772,
+      "loss": 16.93,
+      "step": 54880
+    },
+    {
+      "epoch": 0.0813241768334232,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048656634372616967,
+      "loss": 16.9968,
+      "step": 54900
+    },
+    {
+      "epoch": 0.08135380312735159,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048656140479116206,
+      "loss": 16.9388,
+      "step": 54920
+    },
+    {
+      "epoch": 0.08138342942127998,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048655646585615456,
+      "loss": 16.9977,
+      "step": 54940
+    },
+    {
+      "epoch": 0.08141305571520836,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048655152692114696,
+      "loss": 16.9571,
+      "step": 54960
+    },
+    {
+      "epoch": 0.08144268200913675,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.00048654658798613935,
+      "loss": 16.9437,
+      "step": 54980
+    },
+    {
+      "epoch": 0.08147230830306514,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004865416490511318,
+      "loss": 16.92,
+      "step": 55000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.224059432458917e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null