Training in progress, step 2520, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +144 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79dbc21c20485148f3d5103ccb74438d521b3e24f1af3ff2eb11ab60a18d09c6
 size 50503544

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd41ebaf0126bcbc9cb857ac5378df1eabfc3bdefaf0428fc58c9548b03fb716
 size 50503544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30cdb38be8ef4f72aaf8bd15a8abf5ecc17f7245cde262cdfc919b1aedf403fc
 size 25986468

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e1fc2cc37d953fa48873d103c237d567951d0ea49cb5dad104417190acaae8c
 size 25986468

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cad609a9755c89c6b502f894c0f517f77e1ef69f34e4a31f5b485de169798ec
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7f712a67228f3bcffa37c019c35e988ecd34b25e0960d7fa983f4ae67e4c1e5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b638b56b9dc8398196af3daa4ccf42a12d198b3dc3ab675be9fa49de3b5e01a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd166862c66a199e9571546b71d0678fd9a94525d7d5a7b2b2e955704972144
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.023890294134616852,
   "best_model_checkpoint": "miner_id_24/checkpoint-2500",
-  "epoch": 1.3958682300390843,
   "eval_steps": 100,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -17715,6 +17715,146 @@
       "eval_samples_per_second": 27.795,
       "eval_steps_per_second": 6.949,
       "step": 2500
     }
   ],
   "logging_steps": 1,
@@ -17738,12 +17878,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.02936877203456e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.023890294134616852,
   "best_model_checkpoint": "miner_id_24/checkpoint-2500",
+  "epoch": 1.4070351758793969,
   "eval_steps": 100,
+  "global_step": 2520,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.795,
       "eval_steps_per_second": 6.949,
       "step": 2500
+    },
+    {
+      "epoch": 1.3964265773311,
+      "grad_norm": 0.06104286387562752,
+      "learning_rate": 2.8275418474443814e-08,
+      "loss": 0.0183,
+      "step": 2501
+    },
+    {
+      "epoch": 1.3969849246231156,
+      "grad_norm": 0.06953344494104385,
+      "learning_rate": 2.537750647535475e-08,
+      "loss": 0.0209,
+      "step": 2502
+    },
+    {
+      "epoch": 1.3975432719151313,
+      "grad_norm": 0.08410750329494476,
+      "learning_rate": 2.26362126006352e-08,
+      "loss": 0.0195,
+      "step": 2503
+    },
+    {
+      "epoch": 1.398101619207147,
+      "grad_norm": 0.07485437393188477,
+      "learning_rate": 2.0051541144749943e-08,
+      "loss": 0.0232,
+      "step": 2504
+    },
+    {
+      "epoch": 1.3986599664991624,
+      "grad_norm": 0.06569929420948029,
+      "learning_rate": 1.762349615677117e-08,
+      "loss": 0.0212,
+      "step": 2505
+    },
+    {
+      "epoch": 1.3992183137911782,
+      "grad_norm": 0.05091328173875809,
+      "learning_rate": 1.5352081440422884e-08,
+      "loss": 0.016,
+      "step": 2506
+    },
+    {
+      "epoch": 1.3997766610831937,
+      "grad_norm": 0.045666009187698364,
+      "learning_rate": 1.3237300554069798e-08,
+      "loss": 0.0142,
+      "step": 2507
+    },
+    {
+      "epoch": 1.4003350083752093,
+      "grad_norm": 0.05261168256402016,
+      "learning_rate": 1.1279156810684034e-08,
+      "loss": 0.0145,
+      "step": 2508
+    },
+    {
+      "epoch": 1.400893355667225,
+      "grad_norm": 0.05364307388663292,
+      "learning_rate": 9.477653277834009e-09,
+      "loss": 0.0143,
+      "step": 2509
+    },
+    {
+      "epoch": 1.4014517029592406,
+      "grad_norm": 0.07531214505434036,
+      "learning_rate": 7.83279277773996e-09,
+      "loss": 0.0189,
+      "step": 2510
+    },
+    {
+      "epoch": 1.4020100502512562,
+      "grad_norm": 0.07607190310955048,
+      "learning_rate": 6.344577887185121e-09,
+      "loss": 0.0206,
+      "step": 2511
+    },
+    {
+      "epoch": 1.402568397543272,
+      "grad_norm": 0.08191350847482681,
+      "learning_rate": 5.01301093758233e-09,
+      "loss": 0.0211,
+      "step": 2512
+    },
+    {
+      "epoch": 1.4031267448352875,
+      "grad_norm": 0.07381971925497055,
+      "learning_rate": 3.838094014940729e-09,
+      "loss": 0.0184,
+      "step": 2513
+    },
+    {
+      "epoch": 1.4036850921273032,
+      "grad_norm": 0.06222749873995781,
+      "learning_rate": 2.8198289598435625e-09,
+      "loss": 0.0184,
+      "step": 2514
+    },
+    {
+      "epoch": 1.4042434394193188,
+      "grad_norm": 0.07456585019826889,
+      "learning_rate": 1.958217367514781e-09,
+      "loss": 0.0239,
+      "step": 2515
+    },
+    {
+      "epoch": 1.4048017867113345,
+      "grad_norm": 0.07716374844312668,
+      "learning_rate": 1.2532605877080273e-09,
+      "loss": 0.0228,
+      "step": 2516
+    },
+    {
+      "epoch": 1.4053601340033501,
+      "grad_norm": 0.08897180110216141,
+      "learning_rate": 7.049597248065532e-10,
+      "loss": 0.0247,
+      "step": 2517
+    },
+    {
+      "epoch": 1.4059184812953658,
+      "grad_norm": 0.061453305184841156,
+      "learning_rate": 3.13315637756606e-10,
+      "loss": 0.0199,
+      "step": 2518
+    },
+    {
+      "epoch": 1.4064768285873814,
+      "grad_norm": 0.07491685450077057,
+      "learning_rate": 7.832894011183811e-11,
+      "loss": 0.0209,
+      "step": 2519
+    },
+    {
+      "epoch": 1.4070351758793969,
+      "grad_norm": 0.10247643291950226,
+      "learning_rate": 0.0,
+      "loss": 0.0198,
+      "step": 2520
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0376037222108365e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null