Training in progress, step 5000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:035738fa0b0cf26c1523a2cbaaa821bc209024897fb82257fbb088cffe5dd105
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:af33858c5c886a868e427a6a837ff04fb423bca594c03aabb38293223f09a777
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcdfe18bd5d30faf69277fc553b17fc82cc47a083c89239fa36292531ae3105b
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:00184d8d018217b8c5c28eb0dbe786eac72e9d32d132ef69b0bfab9f29bee0c3
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d8f12723c1cb91835f294867ed69b1fbd53831c2f03d745ef893e5654a0aed9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5bb9a16abc726ae10edbd1c8badac548eb870d1e2b85f486141fc47114a01df
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.4644030668127055,
   "eval_steps": 500,
-  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -322,6 +322,41 @@
       "learning_rate": 9.399852479563775e-06,
       "loss": 0.494,
       "step": 4500
     }
   ],
   "logging_steps": 100,
@@ -341,7 +376,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1638170935806853e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.738225629791895,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.399852479563775e-06,
       "loss": 0.494,
       "step": 4500
+    },
+    {
+      "epoch": 2.5191675794085433,
+      "grad_norm": 1.5433528423309326,
+      "learning_rate": 7.623880462991801e-06,
+      "loss": 0.475,
+      "step": 4600
+    },
+    {
+      "epoch": 2.573932092004381,
+      "grad_norm": 1.8533953428268433,
+      "learning_rate": 6.019928913893208e-06,
+      "loss": 0.4692,
+      "step": 4700
+    },
+    {
+      "epoch": 2.628696604600219,
+      "grad_norm": 1.9672038555145264,
+      "learning_rate": 4.594508869576164e-06,
+      "loss": 0.4806,
+      "step": 4800
+    },
+    {
+      "epoch": 2.6834611171960567,
+      "grad_norm": 1.722920536994934,
+      "learning_rate": 3.35340664141246e-06,
+      "loss": 0.4629,
+      "step": 4900
+    },
+    {
+      "epoch": 2.738225629791895,
+      "grad_norm": 2.070141315460205,
+      "learning_rate": 2.301660326042443e-06,
+      "loss": 0.4596,
+      "step": 5000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 2.406826165707178e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null