Training in progress, step 26600

Files changed (8) hide show

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be84fb38c3787d4d9a0a842fe86f337de52b2ffe7a5d69a4f2be10ca449aaede
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:a66649ed360eedf7f38576a034cb81a9f0d0d85b4e760a08a7af7c644b300130
 size 167832240

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "o_proj",
-    "gate_proj",
     "k_proj",
     "up_proj",
     "q_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "down_proj",
     "up_proj",
     "q_proj",
+    "gate_proj",
+    "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be84fb38c3787d4d9a0a842fe86f337de52b2ffe7a5d69a4f2be10ca449aaede
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:09e7569231f9d48775234e444dc3002609a2aaf23f9fbb03afc31dd08b174acd
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c96cb8f0f1a681c15e8ed2760660f6d33cb92861d81a47b036784d07497c5099
 size 85736914

 version https://git-lfs.github.com/spec/v1
+oid sha256:1795d0f5907365f50ee8bf592e588c51cf0c1336607a8958053b6df1866c85ef
 size 85736914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20c68222f3372262392758b273879ed4f661c3fb7cc6b05e1cf45273c18d2035
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1e0be0fbdc67baa64bde42b3f62fb0cbfa6b616dea815616465446e745a61cd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40e9ee5e09ade4ca277911e9ac0e424525fe43efa67c8554ac479a544373b55d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:afd4eb6473181bbf6da6b1613c063360450d1f8ec75384d51547bd12664e8b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.31412327702564524,
   "eval_steps": 2000,
-  "global_step": 26400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -910,35 +910,14 @@
       "learning_rate": 1.998839744298062e-05,
       "loss": 1.5509,
       "step": 25800
-    },
-    {
-      "epoch": 0.31,
-      "grad_norm": 0.8327608704566956,
-      "learning_rate": 1.9812018045085563e-05,
-      "loss": 1.5483,
-      "step": 26000
-    },
-    {
-      "epoch": 0.31,
-      "grad_norm": 0.7521975636482239,
-      "learning_rate": 1.980912147882786e-05,
-      "loss": 1.5296,
-      "step": 26200
-    },
-    {
-      "epoch": 0.31,
-      "grad_norm": 0.7781311869621277,
-      "learning_rate": 1.9806202981642514e-05,
-      "loss": 1.5346,
-      "step": 26400
     }
   ],
   "logging_steps": 200,
-  "max_steps": 420215,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
-  "total_flos": 7.248865905092444e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07674602790967469,
   "eval_steps": 2000,
+  "global_step": 25800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.998839744298062e-05,
       "loss": 1.5509,
       "step": 25800
     }
   ],
   "logging_steps": 200,
+  "max_steps": 1680865,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
+  "total_flos": 5.996940170325443e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c09e4d0caa94d3d4acabbf865ee69cc816a077a03533c04bf2c5e62bf08ec171
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:e88463ac0cd6182d9b1cd0ac83ec16eba34109ec5568773d1ab337bb23a66942
 size 4920