Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e762cf6a3a2ab8c7ef6d797dd3046d5202edc07279fd3934be764ab4494e860e
 size 628216

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2b843e151cf6cc050775016450368ab902a8c5210bcf1e434a77e4e5db56396
 size 628216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:817f229bf6104f21d245d238139b7b01c02895cda19d6fa3fa446965e577338a
 size 352122

 version https://git-lfs.github.com/spec/v1
+oid sha256:f627aa887a52e41a3875178e7cfd439b0d0e0b932a590286b92421354b0669b7
 size 352122

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd263ef204c71aa9702008ec3c5eb1f40817d75dec957fe2a4f68fdb00b0c3a7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1123d437e2018100b779616cd1029668a10695d67192092e06b2bcb9892a29fa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5f2068254def458b7bc10b909f0d55b265dc2a0afb1369a10fd37948299a71a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d094abeb22730ed8a1a30db0af3a1ac1bd1a9d1fcc8794319a4b828d3c7261c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.71716022491455,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.03713330857779428,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 132.837,
       "eval_steps_per_second": 33.268,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 17077528166400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.709113121032715,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.055699962866691426,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 132.837,
       "eval_steps_per_second": 33.268,
       "step": 100
+    },
+    {
+      "epoch": 0.03750464166357222,
+      "grad_norm": 0.30814334750175476,
+      "learning_rate": 5.246999999999999e-05,
+      "loss": 10.7375,
+      "step": 101
+    },
+    {
+      "epoch": 0.037875974749350165,
+      "grad_norm": 0.34333688020706177,
+      "learning_rate": 5.194e-05,
+      "loss": 10.7405,
+      "step": 102
+    },
+    {
+      "epoch": 0.03824730783512811,
+      "grad_norm": 0.396445631980896,
+      "learning_rate": 5.141e-05,
+      "loss": 10.7225,
+      "step": 103
+    },
+    {
+      "epoch": 0.038618640920906055,
+      "grad_norm": 0.3750787079334259,
+      "learning_rate": 5.088e-05,
+      "loss": 10.7264,
+      "step": 104
+    },
+    {
+      "epoch": 0.038989974006684,
+      "grad_norm": 0.30554020404815674,
+      "learning_rate": 5.035e-05,
+      "loss": 10.7261,
+      "step": 105
+    },
+    {
+      "epoch": 0.03936130709246194,
+      "grad_norm": 0.35283979773521423,
+      "learning_rate": 4.9819999999999994e-05,
+      "loss": 10.7309,
+      "step": 106
+    },
+    {
+      "epoch": 0.03973264017823988,
+      "grad_norm": 0.39950183033943176,
+      "learning_rate": 4.929e-05,
+      "loss": 10.7223,
+      "step": 107
+    },
+    {
+      "epoch": 0.04010397326401782,
+      "grad_norm": 0.38336852192878723,
+      "learning_rate": 4.876e-05,
+      "loss": 10.7209,
+      "step": 108
+    },
+    {
+      "epoch": 0.040475306349795764,
+      "grad_norm": 0.362358421087265,
+      "learning_rate": 4.823e-05,
+      "loss": 10.735,
+      "step": 109
+    },
+    {
+      "epoch": 0.04084663943557371,
+      "grad_norm": 0.397513747215271,
+      "learning_rate": 4.7699999999999994e-05,
+      "loss": 10.7304,
+      "step": 110
+    },
+    {
+      "epoch": 0.041217972521351655,
+      "grad_norm": 0.4068267047405243,
+      "learning_rate": 4.717e-05,
+      "loss": 10.7297,
+      "step": 111
+    },
+    {
+      "epoch": 0.0415893056071296,
+      "grad_norm": 0.38471511006355286,
+      "learning_rate": 4.6639999999999994e-05,
+      "loss": 10.754,
+      "step": 112
+    },
+    {
+      "epoch": 0.04196063869290754,
+      "grad_norm": 0.3469901978969574,
+      "learning_rate": 4.611e-05,
+      "loss": 10.7255,
+      "step": 113
+    },
+    {
+      "epoch": 0.04233197177868548,
+      "grad_norm": 0.3917114734649658,
+      "learning_rate": 4.558e-05,
+      "loss": 10.72,
+      "step": 114
+    },
+    {
+      "epoch": 0.04270330486446342,
+      "grad_norm": 0.3135025203227997,
+      "learning_rate": 4.505e-05,
+      "loss": 10.7204,
+      "step": 115
+    },
+    {
+      "epoch": 0.043074637950241364,
+      "grad_norm": 0.32310202717781067,
+      "learning_rate": 4.4519999999999994e-05,
+      "loss": 10.7334,
+      "step": 116
+    },
+    {
+      "epoch": 0.04344597103601931,
+      "grad_norm": 0.316651314496994,
+      "learning_rate": 4.399e-05,
+      "loss": 10.7357,
+      "step": 117
+    },
+    {
+      "epoch": 0.043817304121797254,
+      "grad_norm": 0.3423415422439575,
+      "learning_rate": 4.346e-05,
+      "loss": 10.7195,
+      "step": 118
+    },
+    {
+      "epoch": 0.044188637207575196,
+      "grad_norm": 0.3235597610473633,
+      "learning_rate": 4.293e-05,
+      "loss": 10.7321,
+      "step": 119
+    },
+    {
+      "epoch": 0.04455997029335314,
+      "grad_norm": 0.33004501461982727,
+      "learning_rate": 4.2399999999999994e-05,
+      "loss": 10.7148,
+      "step": 120
+    },
+    {
+      "epoch": 0.04493130337913108,
+      "grad_norm": 0.3902532756328583,
+      "learning_rate": 4.187e-05,
+      "loss": 10.7344,
+      "step": 121
+    },
+    {
+      "epoch": 0.04530263646490902,
+      "grad_norm": 0.30236899852752686,
+      "learning_rate": 4.134e-05,
+      "loss": 10.7194,
+      "step": 122
+    },
+    {
+      "epoch": 0.04567396955068696,
+      "grad_norm": 0.38607534766197205,
+      "learning_rate": 4.081e-05,
+      "loss": 10.7046,
+      "step": 123
+    },
+    {
+      "epoch": 0.04604530263646491,
+      "grad_norm": 0.3628384470939636,
+      "learning_rate": 4.028e-05,
+      "loss": 10.7198,
+      "step": 124
+    },
+    {
+      "epoch": 0.046416635722242854,
+      "grad_norm": 0.36593976616859436,
+      "learning_rate": 3.975e-05,
+      "loss": 10.7281,
+      "step": 125
+    },
+    {
+      "epoch": 0.046787968808020795,
+      "grad_norm": 0.3433733880519867,
+      "learning_rate": 3.9219999999999994e-05,
+      "loss": 10.7148,
+      "step": 126
+    },
+    {
+      "epoch": 0.04715930189379874,
+      "grad_norm": 0.37441202998161316,
+      "learning_rate": 3.869e-05,
+      "loss": 10.7132,
+      "step": 127
+    },
+    {
+      "epoch": 0.04753063497957668,
+      "grad_norm": 0.3574247658252716,
+      "learning_rate": 3.816e-05,
+      "loss": 10.7207,
+      "step": 128
+    },
+    {
+      "epoch": 0.04790196806535462,
+      "grad_norm": 0.3976617157459259,
+      "learning_rate": 3.763e-05,
+      "loss": 10.7027,
+      "step": 129
+    },
+    {
+      "epoch": 0.04827330115113257,
+      "grad_norm": 0.4094492495059967,
+      "learning_rate": 3.7099999999999994e-05,
+      "loss": 10.7168,
+      "step": 130
+    },
+    {
+      "epoch": 0.04864463423691051,
+      "grad_norm": 0.3910907506942749,
+      "learning_rate": 3.657e-05,
+      "loss": 10.6894,
+      "step": 131
+    },
+    {
+      "epoch": 0.04901596732268845,
+      "grad_norm": 0.347520112991333,
+      "learning_rate": 3.604e-05,
+      "loss": 10.7075,
+      "step": 132
+    },
+    {
+      "epoch": 0.049387300408466395,
+      "grad_norm": 0.33360084891319275,
+      "learning_rate": 3.551e-05,
+      "loss": 10.7175,
+      "step": 133
+    },
+    {
+      "epoch": 0.049758633494244336,
+      "grad_norm": 0.3098675608634949,
+      "learning_rate": 3.498e-05,
+      "loss": 10.6943,
+      "step": 134
+    },
+    {
+      "epoch": 0.05012996658002228,
+      "grad_norm": 0.417607843875885,
+      "learning_rate": 3.445e-05,
+      "loss": 10.696,
+      "step": 135
+    },
+    {
+      "epoch": 0.05050129966580022,
+      "grad_norm": 0.37847384810447693,
+      "learning_rate": 3.392e-05,
+      "loss": 10.7031,
+      "step": 136
+    },
+    {
+      "epoch": 0.05087263275157817,
+      "grad_norm": 0.34361228346824646,
+      "learning_rate": 3.339e-05,
+      "loss": 10.714,
+      "step": 137
+    },
+    {
+      "epoch": 0.05124396583735611,
+      "grad_norm": 0.4933507740497589,
+      "learning_rate": 3.286e-05,
+      "loss": 10.6758,
+      "step": 138
+    },
+    {
+      "epoch": 0.05161529892313405,
+      "grad_norm": 0.412986695766449,
+      "learning_rate": 3.233e-05,
+      "loss": 10.6963,
+      "step": 139
+    },
+    {
+      "epoch": 0.051986632008911994,
+      "grad_norm": 0.43772485852241516,
+      "learning_rate": 3.1799999999999994e-05,
+      "loss": 10.7149,
+      "step": 140
+    },
+    {
+      "epoch": 0.052357965094689936,
+      "grad_norm": 0.37518948316574097,
+      "learning_rate": 3.1270000000000004e-05,
+      "loss": 10.6951,
+      "step": 141
+    },
+    {
+      "epoch": 0.05272929818046788,
+      "grad_norm": 0.36650022864341736,
+      "learning_rate": 3.074e-05,
+      "loss": 10.6925,
+      "step": 142
+    },
+    {
+      "epoch": 0.05310063126624582,
+      "grad_norm": 0.4543534219264984,
+      "learning_rate": 3.0209999999999997e-05,
+      "loss": 10.683,
+      "step": 143
+    },
+    {
+      "epoch": 0.05347196435202377,
+      "grad_norm": 0.47826236486434937,
+      "learning_rate": 2.9679999999999997e-05,
+      "loss": 10.7077,
+      "step": 144
+    },
+    {
+      "epoch": 0.05384329743780171,
+      "grad_norm": 0.3722932040691376,
+      "learning_rate": 2.915e-05,
+      "loss": 10.6916,
+      "step": 145
+    },
+    {
+      "epoch": 0.05421463052357965,
+      "grad_norm": 0.5447331666946411,
+      "learning_rate": 2.8619999999999997e-05,
+      "loss": 10.7065,
+      "step": 146
+    },
+    {
+      "epoch": 0.05458596360935759,
+      "grad_norm": 0.42991727590560913,
+      "learning_rate": 2.8089999999999997e-05,
+      "loss": 10.6734,
+      "step": 147
+    },
+    {
+      "epoch": 0.054957296695135535,
+      "grad_norm": 0.5424541234970093,
+      "learning_rate": 2.756e-05,
+      "loss": 10.6609,
+      "step": 148
+    },
+    {
+      "epoch": 0.05532862978091348,
+      "grad_norm": 0.6233406066894531,
+      "learning_rate": 2.703e-05,
+      "loss": 10.6865,
+      "step": 149
+    },
+    {
+      "epoch": 0.055699962866691426,
+      "grad_norm": 0.635688066482544,
+      "learning_rate": 2.6499999999999997e-05,
+      "loss": 10.6352,
+      "step": 150
+    },
+    {
+      "epoch": 0.055699962866691426,
+      "eval_loss": 10.709113121032715,
+      "eval_runtime": 8.5126,
+      "eval_samples_per_second": 133.215,
+      "eval_steps_per_second": 33.362,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 25616292249600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null