Training in progress, step 200

Files changed (5) hide show

adapter_config.json CHANGED Viewed

@@ -19,36 +19,44 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "34.self_attn.q_proj",
-    "cross_attn.v_proj",
-    "model.layers.29.self_attn.q_proj",
-    "39.self_attn.q_proj",
-    "32.self_attn.v_proj",
     "37.self_attn.q_proj",
-    "36.self_attn.q_proj",
-    "32.self_attn.q_proj",
-    "39.self_attn.v_proj",
-    "38.self_attn.q_proj",
-    "model.layers.30.self_attn.v_proj",
-    "model.layers.30.self_attn.q_proj",
-    "36.self_attn.v_proj",
     "model.layers.31.self_attn.v_proj",
     "model.layers.29.self_attn.v_proj",
-    "model.layers.28.self_attn.v_proj",
-    "35.self_attn.q_proj",
-    "33.self_attn.v_proj",
-    "34.self_attn.v_proj",
     "cross_attn.q_proj",
-    "35.self_attn.v_proj",
     "model.layers.31.self_attn.q_proj",
-    "33.self_attn.q_proj",
-    "model.layers.28.self_attn.q_proj",
     "37.self_attn.v_proj",
-    "38.self_attn.v_proj"
   ],
   "task_type": null,
   "use_dora": false,

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "model.layers.26.self_attn.q_proj",
+    "model.layers.26.self_attn.v_proj",
     "37.self_attn.q_proj",
+    "model.layers.28.self_attn.q_proj",
+    "33.self_attn.q_proj",
+    "model.layers.25.self_attn.v_proj",
+    "34.self_attn.v_proj",
+    "35.self_attn.q_proj",
+    "model.layers.24.self_attn.v_proj",
+    "model.layers.24.self_attn.q_proj",
+    "32.self_attn.v_proj",
     "model.layers.31.self_attn.v_proj",
+    "model.layers.27.self_attn.q_proj",
+    "32.self_attn.q_proj",
     "model.layers.29.self_attn.v_proj",
     "cross_attn.q_proj",
+    "model.layers.25.self_attn.q_proj",
+    "34.self_attn.q_proj",
+    "39.self_attn.q_proj",
+    "model.layers.30.self_attn.q_proj",
+    "model.layers.30.self_attn.v_proj",
     "model.layers.31.self_attn.q_proj",
+    "cross_attn.v_proj",
+    "33.self_attn.v_proj",
+    "36.self_attn.q_proj",
+    "model.layers.28.self_attn.v_proj",
+    "model.layers.29.self_attn.q_proj",
+    "model.layers.27.self_attn.v_proj",
+    "38.self_attn.q_proj",
+    "38.self_attn.v_proj",
+    "35.self_attn.v_proj",
     "37.self_attn.v_proj",
+    "36.self_attn.v_proj",
+    "39.self_attn.v_proj"
   ],
   "task_type": null,
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fb2c01ce3a29c117fb69882b98e855a9ea37150b438dd3f22a5980f1747a00c
-size 57944064

 version https://git-lfs.github.com/spec/v1
+oid sha256:da9afacf9cc90f0c016c9712112ee9d3c60394c8c3a0b0ba34a2c64ff346590c
+size 143143408

runs/Oct04_03-27-21_8c14532e02d0/events.out.tfevents.1728012459.8c14532e02d0.4026.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b767b0cf2a51e3f5fbafd697c2ce7a404938ba81d50d7bf5e06e142d7abe960
+size 8280

runs/Oct04_03-28-57_8c14532e02d0/events.out.tfevents.1728012544.8c14532e02d0.10929.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1da97c642d363c9ae81e174583a0eb2fb91bd270c4ea763ab13fc804f943bc8f
+size 9947

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:011bed7360ee4582bebfe264b69968e7878c8ebaeb6d56a75eadf257ae0c0161
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbf94266cd0875964cb83f3aaf7579414b5fc7dc247f9807d79605262ca24f50
 size 5240