Training in progress, step 200
Browse files
adapter_config.json
CHANGED
@@ -19,36 +19,44 @@
|
|
19 |
"megatron_core": "megatron.core",
|
20 |
"modules_to_save": null,
|
21 |
"peft_type": "LORA",
|
22 |
-
"r":
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"model.layers.29.self_attn.q_proj",
|
29 |
-
"39.self_attn.q_proj",
|
30 |
-
"32.self_attn.v_proj",
|
31 |
"37.self_attn.q_proj",
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"model.layers.
|
38 |
-
"
|
|
|
39 |
"model.layers.31.self_attn.v_proj",
|
|
|
|
|
40 |
"model.layers.29.self_attn.v_proj",
|
41 |
-
"model.layers.28.self_attn.v_proj",
|
42 |
-
"35.self_attn.q_proj",
|
43 |
-
"33.self_attn.v_proj",
|
44 |
-
"34.self_attn.v_proj",
|
45 |
"cross_attn.q_proj",
|
46 |
-
"
|
|
|
|
|
|
|
|
|
47 |
"model.layers.31.self_attn.q_proj",
|
48 |
-
"
|
49 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
"37.self_attn.v_proj",
|
51 |
-
"
|
|
|
52 |
],
|
53 |
"task_type": null,
|
54 |
"use_dora": false,
|
|
|
19 |
"megatron_core": "megatron.core",
|
20 |
"modules_to_save": null,
|
21 |
"peft_type": "LORA",
|
22 |
+
"r": 128,
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"model.layers.26.self_attn.q_proj",
|
27 |
+
"model.layers.26.self_attn.v_proj",
|
|
|
|
|
|
|
28 |
"37.self_attn.q_proj",
|
29 |
+
"model.layers.28.self_attn.q_proj",
|
30 |
+
"33.self_attn.q_proj",
|
31 |
+
"model.layers.25.self_attn.v_proj",
|
32 |
+
"34.self_attn.v_proj",
|
33 |
+
"35.self_attn.q_proj",
|
34 |
+
"model.layers.24.self_attn.v_proj",
|
35 |
+
"model.layers.24.self_attn.q_proj",
|
36 |
+
"32.self_attn.v_proj",
|
37 |
"model.layers.31.self_attn.v_proj",
|
38 |
+
"model.layers.27.self_attn.q_proj",
|
39 |
+
"32.self_attn.q_proj",
|
40 |
"model.layers.29.self_attn.v_proj",
|
|
|
|
|
|
|
|
|
41 |
"cross_attn.q_proj",
|
42 |
+
"model.layers.25.self_attn.q_proj",
|
43 |
+
"34.self_attn.q_proj",
|
44 |
+
"39.self_attn.q_proj",
|
45 |
+
"model.layers.30.self_attn.q_proj",
|
46 |
+
"model.layers.30.self_attn.v_proj",
|
47 |
"model.layers.31.self_attn.q_proj",
|
48 |
+
"cross_attn.v_proj",
|
49 |
+
"33.self_attn.v_proj",
|
50 |
+
"36.self_attn.q_proj",
|
51 |
+
"model.layers.28.self_attn.v_proj",
|
52 |
+
"model.layers.29.self_attn.q_proj",
|
53 |
+
"model.layers.27.self_attn.v_proj",
|
54 |
+
"38.self_attn.q_proj",
|
55 |
+
"38.self_attn.v_proj",
|
56 |
+
"35.self_attn.v_proj",
|
57 |
"37.self_attn.v_proj",
|
58 |
+
"36.self_attn.v_proj",
|
59 |
+
"39.self_attn.v_proj"
|
60 |
],
|
61 |
"task_type": null,
|
62 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da9afacf9cc90f0c016c9712112ee9d3c60394c8c3a0b0ba34a2c64ff346590c
|
3 |
+
size 143143408
|
runs/Oct04_03-27-21_8c14532e02d0/events.out.tfevents.1728012459.8c14532e02d0.4026.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b767b0cf2a51e3f5fbafd697c2ce7a404938ba81d50d7bf5e06e142d7abe960
|
3 |
+
size 8280
|
runs/Oct04_03-28-57_8c14532e02d0/events.out.tfevents.1728012544.8c14532e02d0.10929.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1da97c642d363c9ae81e174583a0eb2fb91bd270c4ea763ab13fc804f943bc8f
|
3 |
+
size 9947
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbf94266cd0875964cb83f3aaf7579414b5fc7dc247f9807d79605262ca24f50
|
3 |
size 5240
|