Training in progress, step 695, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a3b487bcd9f0f8a52ebb009ad90b8ecda6c53af66c9e15949f86cd4ee4c5493
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2818821d2bd85c742350df6f93ddc6ceaa5a28fbd9c64e00f2455a5ea94572d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:963768aacc28d56da4770e8791826eb46d62fef62323535d71a32db43b751e99
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3c3d6d6a41f72e4e2daea557637ea5b684d9fb7a2db23f4008a8c201310880c
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:679bf09455ce8b50952b872f5ea495d76ca0e5689b5a5b2ac13810d836dad521
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c791046602634dc7125db7c8a6fa9643238ca36be61916506dd41a5029b29c2a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb6690e503326fa9d439d70768c7431f28556349fb205663edc4009a63fe4c77
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:743b3e06741f3e617852c286974506bffbbfec118a8156be26ba7327d67612e1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8929149142672275,
   "eval_steps": 386,
-  "global_step": 690,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4853,6 +4853,41 @@
       "learning_rate": 9.319783184592142e-05,
       "loss": 0.9691,
       "step": 690
     }
   ],
   "logging_steps": 1,
@@ -4872,7 +4907,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.713766484685619e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8993853121967001,
   "eval_steps": 386,
+  "global_step": 695,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.319783184592142e-05,
       "loss": 0.9691,
       "step": 690
+    },
+    {
+      "epoch": 0.8942089938531219,
+      "grad_norm": 0.6824143528938293,
+      "learning_rate": 9.317705600932164e-05,
+      "loss": 0.7027,
+      "step": 691
+    },
+    {
+      "epoch": 0.8955030734390165,
+      "grad_norm": 0.8860282301902771,
+      "learning_rate": 9.315625081632191e-05,
+      "loss": 0.9419,
+      "step": 692
+    },
+    {
+      "epoch": 0.896797153024911,
+      "grad_norm": 0.9011827707290649,
+      "learning_rate": 9.313541628106777e-05,
+      "loss": 0.8468,
+      "step": 693
+    },
+    {
+      "epoch": 0.8980912326108056,
+      "grad_norm": 0.836565375328064,
+      "learning_rate": 9.311455241772482e-05,
+      "loss": 1.0456,
+      "step": 694
+    },
+    {
+      "epoch": 0.8993853121967001,
+      "grad_norm": 0.819817841053009,
+      "learning_rate": 9.309365924047853e-05,
+      "loss": 0.9606,
+      "step": 695
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.769663343270298e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null