Training in progress, step 635, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3e2031f7a699e8ef5bcbb1fa8c3e083bf4011adc2087af257fdd7fcd1d2160e
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d9ca924d8d90e795e86de8369f2c6d48dc26135b122cf5d4b6c82671b86a4ed
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:071fd869a99418f429ea9573dbc75263ba429d3b76f73105073623a19e90d13e
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:129495fdf41d513360a3ce9d47abca649f2bf128467121d3f900e1098d9d5e75
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2d7c573c8b86ae1dd5b87ff18be79cde55bc2e7808d0b786a684f7321060ea8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:098658a8b5f74b202fbb5e29652962f5112c3b574b4ce341756735375ee72208
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2580ef175e3eaadc95b4a9970cf624730cbbd91e97d33400a87932d531bfdd4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:17863c6ebccc1ff43b06a14aeb4d77d5b06180d5676f46d6243e5056d6bc48af
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8152701391135555,
   "eval_steps": 386,
-  "global_step": 630,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4433,6 +4433,41 @@
       "learning_rate": 9.439013679899262e-05,
       "loss": 0.8106,
       "step": 630
     }
   ],
   "logging_steps": 1,
@@ -4452,7 +4487,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.043004181669478e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8217405370430282,
   "eval_steps": 386,
+  "global_step": 635,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.439013679899262e-05,
       "loss": 0.8106,
       "step": 630
+    },
+    {
+      "epoch": 0.8165642186994501,
+      "grad_norm": 0.7175205945968628,
+      "learning_rate": 9.437114747270612e-05,
+      "loss": 0.8622,
+      "step": 631
+    },
+    {
+      "epoch": 0.8178582982853445,
+      "grad_norm": 0.8083619475364685,
+      "learning_rate": 9.435212797814798e-05,
+      "loss": 0.8608,
+      "step": 632
+    },
+    {
+      "epoch": 0.8191523778712391,
+      "grad_norm": 0.9113181233406067,
+      "learning_rate": 9.433307832824974e-05,
+      "loss": 0.9445,
+      "step": 633
+    },
+    {
+      "epoch": 0.8204464574571336,
+      "grad_norm": 0.7915831208229065,
+      "learning_rate": 9.431399853596336e-05,
+      "loss": 0.8651,
+      "step": 634
+    },
+    {
+      "epoch": 0.8217405370430282,
+      "grad_norm": 0.7277971506118774,
+      "learning_rate": 9.429488861426137e-05,
+      "loss": 0.8799,
+      "step": 635
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.098901040254157e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null