Upload folder using huggingface_hub
Browse files- README.md +7 -7
- config.json +2 -2
- mergekit_config.yml +9 -16
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +1 -1
README.md
CHANGED
@@ -3,26 +3,26 @@ tags:
|
|
3 |
- merge
|
4 |
- mergekit
|
5 |
- lazymergekit
|
6 |
-
- llm-jp-1.3b
|
7 |
base_model:
|
8 |
-
- llm-jp-1.3b
|
9 |
-
- llm-jp-1.3b
|
10 |
---
|
11 |
|
12 |
# llmjp-linear
|
13 |
|
14 |
llmjp-linear is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
|
15 |
-
* [llm-jp-1.3b](https://huggingface.co/llm-jp-1.3b)
|
16 |
-
* [llm-jp-1.3b](https://huggingface.co/llm-jp-1.3b)
|
17 |
|
18 |
## 🧩 Configuration
|
19 |
|
20 |
```yaml
|
21 |
models:
|
22 |
-
- model: llm-jp-1.3b
|
23 |
parameters:
|
24 |
weight: 0.5
|
25 |
-
- model: llm-jp-1.3b
|
26 |
parameters:
|
27 |
weight: 0.5
|
28 |
merge_method: linear
|
|
|
3 |
- merge
|
4 |
- mergekit
|
5 |
- lazymergekit
|
6 |
+
- llm-jp/llm-jp-1.3b-v1.0
|
7 |
base_model:
|
8 |
+
- llm-jp/llm-jp-1.3b-v1.0
|
9 |
+
- llm-jp/llm-jp-1.3b-v1.0
|
10 |
---
|
11 |
|
12 |
# llmjp-linear
|
13 |
|
14 |
llmjp-linear is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
|
15 |
+
* [llm-jp/llm-jp-1.3b-v1.0](https://huggingface.co/llm-jp/llm-jp-1.3b-v1.0)
|
16 |
+
* [llm-jp/llm-jp-1.3b-v1.0](https://huggingface.co/llm-jp/llm-jp-1.3b-v1.0)
|
17 |
|
18 |
## 🧩 Configuration
|
19 |
|
20 |
```yaml
|
21 |
models:
|
22 |
+
- model: llm-jp/llm-jp-1.3b-v1.0
|
23 |
parameters:
|
24 |
weight: 0.5
|
25 |
+
- model: llm-jp/llm-jp-1.3b-v1.0
|
26 |
parameters:
|
27 |
weight: 0.5
|
28 |
merge_method: linear
|
config.json
CHANGED
@@ -16,7 +16,7 @@
|
|
16 |
"n_embd": 2048,
|
17 |
"n_head": 16,
|
18 |
"n_inner": 8192,
|
19 |
-
"n_layer":
|
20 |
"n_positions": 2048,
|
21 |
"reorder_and_upcast_attn": false,
|
22 |
"resid_pdrop": 0.1,
|
@@ -27,7 +27,7 @@
|
|
27 |
"summary_proj_to_labels": true,
|
28 |
"summary_type": "cls_index",
|
29 |
"summary_use_proj": true,
|
30 |
-
"torch_dtype": "
|
31 |
"transformers_version": "4.41.2",
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 50688
|
|
|
16 |
"n_embd": 2048,
|
17 |
"n_head": 16,
|
18 |
"n_inner": 8192,
|
19 |
+
"n_layer": 24,
|
20 |
"n_positions": 2048,
|
21 |
"reorder_and_upcast_attn": false,
|
22 |
"resid_pdrop": 0.1,
|
|
|
27 |
"summary_proj_to_labels": true,
|
28 |
"summary_type": "cls_index",
|
29 |
"summary_use_proj": true,
|
30 |
+
"torch_dtype": "float16",
|
31 |
"transformers_version": "4.41.2",
|
32 |
"use_cache": true,
|
33 |
"vocab_size": 50688
|
mergekit_config.yml
CHANGED
@@ -1,17 +1,10 @@
|
|
1 |
|
2 |
-
|
3 |
-
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
t:
|
12 |
-
- filter: self_attn
|
13 |
-
value: [0, 0.5, 0.3, 0.7, 1]
|
14 |
-
- filter: mlp
|
15 |
-
value: [1, 0.5, 0.7, 0.3, 0]
|
16 |
-
- value: 0.5
|
17 |
-
dtype: bfloat16
|
|
|
1 |
|
2 |
+
models:
|
3 |
+
- model: llm-jp/llm-jp-1.3b-v1.0
|
4 |
+
parameters:
|
5 |
+
weight: 0.5
|
6 |
+
- model: llm-jp/llm-jp-1.3b-v1.0
|
7 |
+
parameters:
|
8 |
+
weight: 0.5
|
9 |
+
merge_method: linear
|
10 |
+
dtype: float16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03388c67f4332f079a7dda8beb61739340414ac249cfbc7cb3903b63f6837cdd
|
3 |
+
size 973623320
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:126a50fd40f9f0029d965abebd01582597d002ee3ed1eb3f08b09b01fd9e5478
|
3 |
+
size 973619112
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:915ae68df4d16804e481e02ac58c8c81136fa42f055802f498bd7f02c42cf399
|
3 |
+
size 685999640
|
model.safetensors.index.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"metadata": {"mergekit_version": "0.0.4.2", "total_size": 2431778816}, "weight_map": {"transformer.h.0.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.6.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.6.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.6.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.6.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.7.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.7.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.7.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.7.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.ln_f.bias": "model-00003-of-00003.safetensors", "transformer.ln_f.weight": "model-00003-of-00003.safetensors", "transformer.wpe.weight": "model-00003-of-00003.safetensors", "transformer.wte.weight": "model-00003-of-00003.safetensors"}}
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.4.2", "total_size": 2633211904}, "weight_map": {"transformer.h.0.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.22.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.22.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.22.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.22.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.23.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.23.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.23.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.23.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.6.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.6.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.6.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.6.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.7.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.7.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.7.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.7.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.ln_f.bias": "model-00003-of-00003.safetensors", "transformer.ln_f.weight": "model-00003-of-00003.safetensors", "transformer.wpe.weight": "model-00003-of-00003.safetensors", "transformer.wte.weight": "model-00003-of-00003.safetensors"}}
|