Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:673c0fd25aad70c75d3f499f30aac14e17d1e7abc034f7c31a39eb9b65ddbb65
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:f38f215518259f735781b211269fb82f23fcb38f060482603f3c091fb912bd41
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:926a2278a8b67cd44d1cac56d23a2e20d212cb120fd474aae48e6c92a8e345f8
 size 180543866

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee2a21035e37876689ed9e488a2543ef20f3dca73daf2b2adc44735daaa6bc2a
 size 180543866

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b82a82b62fd7dafca6561e8fe621552ce66691b2085d5f41737544c535236043
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:916b6e6ff54130f9bc5a23621f0060f1dbe84ec9ee8f86c0e90a5e512d62b29d
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8c520ea3a5a077b393ca2acec5a9a074f6659506623b2e8a0859e3f7b98745d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f2a5a9968751ed0fe6b385bb4ad5fda107a081be940fe6d46cd01af0cd8f0ef
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d69530eaa2827d30b997bf402cb2c5b9adeff9da52871b01616f5c6fb19c984
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0535a7cbd13e63e35725ebb04dd72895198d09d6fc3ab0428ef1a66109f14657
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ee64e973b427d161e87f05ca8566825f286aa5acf05f1b643dd631b376f5076
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbcf8640bf32df4cc46ad2c7fad413cd358157cf8645db24f942ec10981d4623
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6616689562797546,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.2991772625280479,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 114.167,
       "eval_steps_per_second": 14.398,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.91521434075136e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6534409523010254,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.5983545250560958,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 114.167,
       "eval_steps_per_second": 14.398,
       "step": 25
+    },
+    {
+      "epoch": 0.31114435302916976,
+      "grad_norm": 3.2467966079711914,
+      "learning_rate": 5e-05,
+      "loss": 0.7302,
+      "step": 26
+    },
+    {
+      "epoch": 0.3231114435302917,
+      "grad_norm": 2.911585807800293,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.6855,
+      "step": 27
+    },
+    {
+      "epoch": 0.33507853403141363,
+      "grad_norm": 1.2173607349395752,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.6575,
+      "step": 28
+    },
+    {
+      "epoch": 0.3470456245325355,
+      "grad_norm": 0.9109578728675842,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.6314,
+      "step": 29
+    },
+    {
+      "epoch": 0.35901271503365745,
+      "grad_norm": 2.12790584564209,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.6653,
+      "step": 30
+    },
+    {
+      "epoch": 0.3709798055347794,
+      "grad_norm": 1.9173645973205566,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.6826,
+      "step": 31
+    },
+    {
+      "epoch": 0.38294689603590126,
+      "grad_norm": 1.5864529609680176,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.6398,
+      "step": 32
+    },
+    {
+      "epoch": 0.3949139865370232,
+      "grad_norm": 0.49075672030448914,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.6084,
+      "step": 33
+    },
+    {
+      "epoch": 0.4068810770381451,
+      "grad_norm": 1.548568606376648,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.625,
+      "step": 34
+    },
+    {
+      "epoch": 0.418848167539267,
+      "grad_norm": 1.5237716436386108,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.6102,
+      "step": 35
+    },
+    {
+      "epoch": 0.43081525804038895,
+      "grad_norm": 1.6438311338424683,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.6662,
+      "step": 36
+    },
+    {
+      "epoch": 0.44278234854151083,
+      "grad_norm": 0.4927654564380646,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.6654,
+      "step": 37
+    },
+    {
+      "epoch": 0.45474943904263276,
+      "grad_norm": 1.176863670349121,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.6997,
+      "step": 38
+    },
+    {
+      "epoch": 0.4667165295437547,
+      "grad_norm": 1.7410774230957031,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.6752,
+      "step": 39
+    },
+    {
+      "epoch": 0.4786836200448766,
+      "grad_norm": 0.8015233874320984,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.6267,
+      "step": 40
+    },
+    {
+      "epoch": 0.4906507105459985,
+      "grad_norm": 0.6555165648460388,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.6412,
+      "step": 41
+    },
+    {
+      "epoch": 0.5026178010471204,
+      "grad_norm": 1.1170567274093628,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.6525,
+      "step": 42
+    },
+    {
+      "epoch": 0.5145848915482424,
+      "grad_norm": 0.7687378525733948,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.6476,
+      "step": 43
+    },
+    {
+      "epoch": 0.5265519820493643,
+      "grad_norm": 0.5519921183586121,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.6052,
+      "step": 44
+    },
+    {
+      "epoch": 0.5385190725504861,
+      "grad_norm": 1.3455817699432373,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.6142,
+      "step": 45
+    },
+    {
+      "epoch": 0.550486163051608,
+      "grad_norm": 1.2510255575180054,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.6408,
+      "step": 46
+    },
+    {
+      "epoch": 0.56245325355273,
+      "grad_norm": 1.7463011741638184,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.6705,
+      "step": 47
+    },
+    {
+      "epoch": 0.5744203440538519,
+      "grad_norm": 1.1176273822784424,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.5998,
+      "step": 48
+    },
+    {
+      "epoch": 0.5863874345549738,
+      "grad_norm": 1.4608560800552368,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.6772,
+      "step": 49
+    },
+    {
+      "epoch": 0.5983545250560958,
+      "grad_norm": 1.8109873533248901,
+      "learning_rate": 0.0,
+      "loss": 0.6619,
+      "step": 50
+    },
+    {
+      "epoch": 0.5983545250560958,
+      "eval_loss": 0.6534409523010254,
+      "eval_runtime": 5.1055,
+      "eval_samples_per_second": 110.274,
+      "eval_steps_per_second": 13.907,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.83042868150272e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null