Training in progress, step 400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.5792, 0.1311],
-#         [0.5792, 1.0000, 0.1977],
-#         [0.1311, 0.1977, 1.0000]])
 ```
 <!--
@@ -1228,6 +1228,10 @@ You can finetune this model on your own dataset.
 | 0.0018 | 100  | 1.1661        |
 | 0.0027 | 150  | 1.2107        |
 | 0.0035 | 200  | 0.9855        |
 ### Framework Versions

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.5864, 0.0912],
+#         [0.5864, 1.0000, 0.1606],
+#         [0.0912, 0.1606, 1.0000]])
 ```
 <!--
 | 0.0018 | 100  | 1.1661        |
 | 0.0027 | 150  | 1.2107        |
 | 0.0035 | 200  | 0.9855        |
+| 0.0044 | 250  | 0.9431        |
+| 0.0053 | 300  | 0.8566        |
+| 0.0062 | 350  | 0.8697        |
+| 0.0071 | 400  | 0.8099        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f953fb0f9519003bc3cc56f7c0e14b4c7a0cf25c6b003c8ae75b96f4ae4d170b
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7d724aabf27686f2b045b9371c8395667163c1ae289641d98c04d67af681113
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c1aa2b4c3b6dfcc06be400285e90b262bb36d49374d8348b40cd77fa14c0c90
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:3833c1087a6615d0a9885551a669b19c4bbab287f26ffb8c0f28cb2d0ecbe7c9
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:771b8080df542b78be2854f4e9393672f71c5efd3b22a27b765fd12c00d11b11
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:254790d1ccb1862da57b7a7e387a7afb33ae0567b70cc27cd18377286c351f30
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:30858f23bcb22d0baef45bd4add9d6fa474141308c12653c706077b87d932e49
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42b8c82da689c3defc876679e8ba8bd56df03c2bb1d400cb4fa8209aae1fd7e2
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c72cad2d603ca6a379ea320d48002510dd9f27b4826fefb43b627b20d6176f70
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.003534130868866074,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -36,6 +36,34 @@
       "learning_rate": 1.7579505300353357e-06,
       "loss": 0.9855,
       "step": 200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.007068261737732148,
   "eval_steps": 500,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.7579505300353357e-06,
       "loss": 0.9855,
       "step": 200
+    },
+    {
+      "epoch": 0.004417663586082593,
+      "grad_norm": 3.2607851028442383,
+      "learning_rate": 2.199646643109541e-06,
+      "loss": 0.9431,
+      "step": 250
+    },
+    {
+      "epoch": 0.005301196303299111,
+      "grad_norm": 6.517599105834961,
+      "learning_rate": 2.6413427561837457e-06,
+      "loss": 0.8566,
+      "step": 300
+    },
+    {
+      "epoch": 0.00618472902051563,
+      "grad_norm": 2.8523333072662354,
+      "learning_rate": 3.0830388692579506e-06,
+      "loss": 0.8697,
+      "step": 350
+    },
+    {
+      "epoch": 0.007068261737732148,
+      "grad_norm": 3.460226058959961,
+      "learning_rate": 3.5247349823321555e-06,
+      "loss": 0.8099,
+      "step": 400
     }
   ],
   "logging_steps": 50,