LilRg commited on
Commit
74c8364
1 Parent(s): 92ab15d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - microsoft/Phi-3-small-128k-instruct
4
+ - microsoft/Phi-3-small-8k-instruct
5
+ tags:
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - microsoft/Phi-3-small-128k-instruct
10
+ - microsoft/Phi-3-small-8k-instruct
11
+ ---
12
+
13
+ # PRYMMAL-ECE-7B-SLERP-V1
14
+
15
+ PRYMMAL-ECE-7B-SLERP-V1 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
16
+ * [microsoft/Phi-3-small-128k-instruct](https://huggingface.co/microsoft/Phi-3-small-128k-instruct)
17
+ * [microsoft/Phi-3-small-8k-instruct](https://huggingface.co/microsoft/Phi-3-small-8k-instruct)
18
+
19
+ ## 🧩 Configuration
20
+
21
+ ```yaml
22
+ slices:
23
+ - sources:
24
+ - model: microsoft/Phi-3-small-128k-instruct
25
+ layer_range: [0, 32]
26
+ - model: microsoft/Phi-3-small-8k-instruct
27
+ layer_range: [0, 32]
28
+ merge_method: slerp
29
+ base_model: microsoft/Phi-3-small-128k-instruct
30
+ parameters:
31
+ t:
32
+ - filter: self_attn
33
+ value: [0, 0.5, 0.3, 0.7, 1]
34
+ - filter: mlp
35
+ value: [1, 0.5, 0.7, 0.3, 0]
36
+ - value: 0.5
37
+ dtype: bfloat16
38
+ ```
39
+
40
+ ## 💻 Usage
41
+
42
+ ```python
43
+ !pip install -qU transformers accelerate
44
+
45
+ from transformers import AutoTokenizer
46
+ import transformers
47
+ import torch
48
+
49
+ model = "LilRg/PRYMMAL-ECE-7B-SLERP-V1"
50
+ messages = [{"role": "user", "content": "What is a large language model?"}]
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model)
53
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
+ pipeline = transformers.pipeline(
55
+ "text-generation",
56
+ model=model,
57
+ torch_dtype=torch.float16,
58
+ device_map="auto",
59
+ )
60
+
61
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
62
+ print(outputs[0]["generated_text"])
63
+ ```
cl100k_base.tiktoken ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/Phi-3-small-128k-instruct",
3
+ "architectures": [
4
+ "Phi3SmallForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout_prob": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "microsoft/Phi-3-small-128k-instruct--configuration_phi3_small.Phi3SmallConfig",
10
+ "AutoModelForCausalLM": "microsoft/Phi-3-small-128k-instruct--modeling_phi3_small.Phi3SmallForCausalLM",
11
+ "AutoTokenizer": "microsoft/Phi-3-small-128k-instruct--tokenization_phi3_small.Phi3SmallTokenizer"
12
+ },
13
+ "blocksparse_block_size": 64,
14
+ "blocksparse_homo_head_pattern": false,
15
+ "blocksparse_num_local_blocks": 16,
16
+ "blocksparse_triton_kernel_block_size": 64,
17
+ "blocksparse_vert_stride": 8,
18
+ "bos_token_id": 100257,
19
+ "dense_attention_every_n_layers": 2,
20
+ "embedding_dropout_prob": 0.1,
21
+ "eos_token_id": 100257,
22
+ "ff_dim_multiplier": null,
23
+ "ff_intermediate_size": 14336,
24
+ "ffn_dropout_prob": 0.1,
25
+ "gegelu_limit": 20.0,
26
+ "gegelu_pad_to_256": true,
27
+ "hidden_act": "gegelu",
28
+ "hidden_size": 4096,
29
+ "initializer_range": 0.02,
30
+ "layer_norm_epsilon": 1e-05,
31
+ "max_position_embeddings": 131072,
32
+ "model_type": "phi3small",
33
+ "mup_attn_multiplier": 1.0,
34
+ "mup_embedding_multiplier": 10.0,
35
+ "mup_use_scaling": true,
36
+ "mup_width_multiplier": 8.0,
37
+ "num_attention_heads": 32,
38
+ "num_hidden_layers": 32,
39
+ "num_key_value_heads": 8,
40
+ "original_max_position_embeddings": 8192,
41
+ "pad_sequence_to_multiple_of_64": true,
42
+ "reorder_and_upcast_attn": false,
43
+ "rope_embedding_base": 1000000,
44
+ "rope_position_scale": 1.0,
45
+ "rope_scaling": {
46
+ "long_factor": [
47
+ 1.0,
48
+ 1.01,
49
+ 1.01,
50
+ 1.02,
51
+ 1.04,
52
+ 1.04,
53
+ 1.04,
54
+ 1.05,
55
+ 1.05,
56
+ 1.06,
57
+ 1.07,
58
+ 1.08,
59
+ 1.08,
60
+ 1.08,
61
+ 1.08,
62
+ 1.08,
63
+ 1.08,
64
+ 1.08,
65
+ 1.09,
66
+ 1.09,
67
+ 1.2,
68
+ 2.31,
69
+ 3.76,
70
+ 9.38,
71
+ 10.1,
72
+ 10.8,
73
+ 18.1,
74
+ 25.2,
75
+ 25.3,
76
+ 26.1,
77
+ 26.6,
78
+ 30.2,
79
+ 33.0,
80
+ 41.5,
81
+ 44.4,
82
+ 44.8,
83
+ 50.2,
84
+ 51.9,
85
+ 59.3,
86
+ 62.7,
87
+ 66.1,
88
+ 66.3,
89
+ 85.8,
90
+ 89.3,
91
+ 90.0,
92
+ 99.9,
93
+ 107.0,
94
+ 110.0,
95
+ 111.0,
96
+ 117.0,
97
+ 118.0,
98
+ 121.0,
99
+ 122.0,
100
+ 127.0,
101
+ 127.0,
102
+ 128.0,
103
+ 128.0,
104
+ 128.0,
105
+ 128.0,
106
+ 128.0,
107
+ 128.0,
108
+ 129.0,
109
+ 129.0,
110
+ 129.0
111
+ ],
112
+ "long_mscale": 1.1902380714238083,
113
+ "original_max_position_embeddings": 8192,
114
+ "short_factor": [
115
+ 1.02,
116
+ 1.02,
117
+ 1.05,
118
+ 1.05,
119
+ 1.06,
120
+ 1.08,
121
+ 1.08,
122
+ 1.08,
123
+ 1.08,
124
+ 1.12,
125
+ 1.1800000000000002,
126
+ 1.1900000000000002,
127
+ 1.1900000000000002,
128
+ 1.2100000000000002,
129
+ 1.2300000000000002,
130
+ 1.2400000000000002,
131
+ 1.2400000000000002,
132
+ 1.2500000000000002,
133
+ 1.3000000000000003,
134
+ 1.3100000000000003,
135
+ 1.4600000000000004,
136
+ 1.5100000000000005,
137
+ 1.7000000000000006,
138
+ 1.9300000000000008,
139
+ 2.080000000000001,
140
+ 2.4399999999999933,
141
+ 3.2199999999999767,
142
+ 3.4499999999999718,
143
+ 3.579999999999969,
144
+ 4.669999999999946,
145
+ 4.779999999999943,
146
+ 5.999999999999917,
147
+ 6.009999999999917,
148
+ 6.4199999999999084,
149
+ 6.619999999999904,
150
+ 7.189999999999892,
151
+ 7.3099999999998895,
152
+ 7.339999999999889,
153
+ 7.479999999999886,
154
+ 9.749999999999837,
155
+ 10.919999999999812,
156
+ 11.219999999999805,
157
+ 11.749999999999794,
158
+ 11.979999999999789,
159
+ 13.239999999999762,
160
+ 13.579999999999755,
161
+ 13.669999999999753,
162
+ 13.82999999999975,
163
+ 14.009999999999746,
164
+ 14.679999999999731,
165
+ 14.889999999999727,
166
+ 15.769999999999708,
167
+ 15.769999999999708,
168
+ 15.819999999999707,
169
+ 15.839999999999707,
170
+ 15.919999999999705,
171
+ 16.029999999999703,
172
+ 16.12999999999972,
173
+ 16.44999999999977,
174
+ 16.44999999999977,
175
+ 16.77999999999982,
176
+ 16.83999999999983,
177
+ 16.83999999999983,
178
+ 16.889999999999837
179
+ ],
180
+ "short_mscale": 1.0,
181
+ "type": "su"
182
+ },
183
+ "torch_dtype": "bfloat16",
184
+ "transformers_version": "4.44.2",
185
+ "use_cache": true,
186
+ "vocab_size": 100352
187
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ slices:
3
+ - sources:
4
+ - model: microsoft/Phi-3-small-128k-instruct
5
+ layer_range: [0, 32]
6
+ - model: microsoft/Phi-3-small-8k-instruct
7
+ layer_range: [0, 32]
8
+ merge_method: slerp
9
+ base_model: microsoft/Phi-3-small-128k-instruct
10
+ parameters:
11
+ t:
12
+ - filter: self_attn
13
+ value: [0, 0.5, 0.3, 0.7, 1]
14
+ - filter: mlp
15
+ value: [1, 0.5, 0.7, 0.3, 0]
16
+ - value: 0.5
17
+ dtype: bfloat16
model-00001-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a23c9ab1479aa4ac2209ed8adad413250eeba52cf71e83c0d285b36a68ddb0
3
+ size 822083712
model-00002-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d27f87a6aa177be0cf900c6f9c44f25c7f06bfcae32e2ddb1c7619af39d523fb
3
+ size 939623272
model-00003-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98548c390b6b80525e8d86c636712d080974ca44f1d450b18f027c47c965517
3
+ size 872655528
model-00004-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193d4689c8e479b7b9cb7d75627d32935c75c7600cb2841470baac1ca89ee07e
3
+ size 872655552
model-00005-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7bdae4d8c3f61b4e1fe58e0b798039482d31a8201d63d68d5619dacdb17930f
3
+ size 872655552
model-00006-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041c2b274190a9392d9b451f47f7a7927c00a9b427ab50823d33a3bb7e0b8076
3
+ size 872655552
model-00007-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f55b5223bc20e92ceea5abf29b765d8fade6fe06023ffff34c95a03ae68ae0
3
+ size 872655552
model-00008-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4797e7e17dd3fd41a89da1e558d35ed57b55462d5f16298753a823ce1b1340
3
+ size 872655544
model-00009-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d335ea056ec36ca8558d3965e4a9ee61a783f79ffa33c2ac4439c46d875461ed
3
+ size 872655544
model-00010-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d2794841c03b740e4ccf40bf0f260d95e77a333b6869f54664d0a19c33d406
3
+ size 872655552
model-00011-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d286dbf9be63f1ec842bea32d42ae1b0d5dbe5b329759672a2163cb9ce70c886
3
+ size 872655552
model-00012-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f126e531d330f1cc99ddbf665709d3c996f285594a8cc4e88fa169c6ce5f8e4
3
+ size 872655552
model-00013-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35911ae605e95bbb4b83e3fec2e597e027d47eed850f72d499ee56206a40e7d7
3
+ size 872655552
model-00014-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cb7528c99ea141827bb220931acad54b41f53f63830f1b739b2315ad8c29a3a
3
+ size 872655536
model-00015-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:103e51b9baead04dfae6b63110a3f842e8832f0eb9889663a932adf1cea9fbb4
3
+ size 872655544
model-00016-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:556cf6496ec60a6ee4487c6e5fb4fb6594376ea601da3793e067d1a638381b09
3
+ size 872655528
model-00017-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ce15feea7c804e7af4714cd35bcbd2d84ff548ecdbb385478af7c800239c09
3
+ size 872655528
model-00018-of-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2dea660fb0e3579c77b84f82955106ac162207a078b015e62e43667607e76d9
3
+ size 755132552
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 15606628352}, "weight_map": {"lm_head.weight": "model-00001-of-00018.safetensors", "model.embed_tokens.weight": "model-00002-of-00018.safetensors", "model.final_layernorm.bias": "model-00002-of-00018.safetensors", "model.final_layernorm.weight": "model-00002-of-00018.safetensors", "model.layers.0.input_layernorm.bias": "model-00002-of-00018.safetensors", "model.layers.0.input_layernorm.weight": "model-00002-of-00018.safetensors", "model.layers.0.mlp.down_proj.bias": "model-00002-of-00018.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00002-of-00018.safetensors", "model.layers.0.mlp.up_proj.bias": "model-00002-of-00018.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00003-of-00018.safetensors", "model.layers.0.post_attention_layernorm.bias": "model-00003-of-00018.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00018.safetensors", "model.layers.0.self_attn.dense.bias": "model-00003-of-00018.safetensors", "model.layers.0.self_attn.dense.weight": "model-00003-of-00018.safetensors", "model.layers.0.self_attn.query_key_value.bias": "model-00003-of-00018.safetensors", "model.layers.0.self_attn.query_key_value.weight": "model-00003-of-00018.safetensors", "model.layers.1.input_layernorm.bias": "model-00003-of-00018.safetensors", "model.layers.1.input_layernorm.weight": "model-00003-of-00018.safetensors", "model.layers.1.mlp.down_proj.bias": "model-00003-of-00018.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00003-of-00018.safetensors", "model.layers.1.mlp.up_proj.bias": "model-00003-of-00018.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00003-of-00018.safetensors", "model.layers.1.post_attention_layernorm.bias": "model-00003-of-00018.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00018.safetensors", "model.layers.1.self_attn.dense.bias": "model-00003-of-00018.safetensors", "model.layers.1.self_attn.dense.weight": "model-00003-of-00018.safetensors", "model.layers.1.self_attn.query_key_value.bias": "model-00003-of-00018.safetensors", "model.layers.1.self_attn.query_key_value.weight": "model-00003-of-00018.safetensors", "model.layers.10.input_layernorm.bias": "model-00003-of-00018.safetensors", "model.layers.10.input_layernorm.weight": "model-00003-of-00018.safetensors", "model.layers.10.mlp.down_proj.bias": "model-00003-of-00018.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00003-of-00018.safetensors", "model.layers.10.mlp.up_proj.bias": "model-00003-of-00018.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00004-of-00018.safetensors", "model.layers.10.post_attention_layernorm.bias": "model-00004-of-00018.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00018.safetensors", "model.layers.10.self_attn.dense.bias": "model-00004-of-00018.safetensors", "model.layers.10.self_attn.dense.weight": "model-00004-of-00018.safetensors", "model.layers.10.self_attn.query_key_value.bias": "model-00004-of-00018.safetensors", "model.layers.10.self_attn.query_key_value.weight": "model-00004-of-00018.safetensors", "model.layers.11.input_layernorm.bias": "model-00004-of-00018.safetensors", "model.layers.11.input_layernorm.weight": "model-00004-of-00018.safetensors", "model.layers.11.mlp.down_proj.bias": "model-00004-of-00018.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00004-of-00018.safetensors", "model.layers.11.mlp.up_proj.bias": "model-00004-of-00018.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00004-of-00018.safetensors", "model.layers.11.post_attention_layernorm.bias": "model-00004-of-00018.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00018.safetensors", "model.layers.11.self_attn.dense.bias": "model-00004-of-00018.safetensors", "model.layers.11.self_attn.dense.weight": "model-00004-of-00018.safetensors", "model.layers.11.self_attn.query_key_value.bias": "model-00004-of-00018.safetensors", "model.layers.11.self_attn.query_key_value.weight": "model-00004-of-00018.safetensors", "model.layers.12.input_layernorm.bias": "model-00004-of-00018.safetensors", "model.layers.12.input_layernorm.weight": "model-00004-of-00018.safetensors", "model.layers.12.mlp.down_proj.bias": "model-00004-of-00018.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00004-of-00018.safetensors", "model.layers.12.mlp.up_proj.bias": "model-00004-of-00018.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00005-of-00018.safetensors", "model.layers.12.post_attention_layernorm.bias": "model-00005-of-00018.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00018.safetensors", "model.layers.12.self_attn.dense.bias": "model-00005-of-00018.safetensors", "model.layers.12.self_attn.dense.weight": "model-00005-of-00018.safetensors", "model.layers.12.self_attn.query_key_value.bias": "model-00005-of-00018.safetensors", "model.layers.12.self_attn.query_key_value.weight": "model-00005-of-00018.safetensors", "model.layers.13.input_layernorm.bias": "model-00005-of-00018.safetensors", "model.layers.13.input_layernorm.weight": "model-00005-of-00018.safetensors", "model.layers.13.mlp.down_proj.bias": "model-00005-of-00018.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00005-of-00018.safetensors", "model.layers.13.mlp.up_proj.bias": "model-00005-of-00018.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00005-of-00018.safetensors", "model.layers.13.post_attention_layernorm.bias": "model-00005-of-00018.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00018.safetensors", "model.layers.13.self_attn.dense.bias": "model-00005-of-00018.safetensors", "model.layers.13.self_attn.dense.weight": "model-00005-of-00018.safetensors", "model.layers.13.self_attn.query_key_value.bias": "model-00005-of-00018.safetensors", "model.layers.13.self_attn.query_key_value.weight": "model-00005-of-00018.safetensors", "model.layers.14.input_layernorm.bias": "model-00005-of-00018.safetensors", "model.layers.14.input_layernorm.weight": "model-00005-of-00018.safetensors", "model.layers.14.mlp.down_proj.bias": "model-00005-of-00018.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00005-of-00018.safetensors", "model.layers.14.mlp.up_proj.bias": "model-00005-of-00018.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00006-of-00018.safetensors", "model.layers.14.post_attention_layernorm.bias": "model-00006-of-00018.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00018.safetensors", "model.layers.14.self_attn.dense.bias": "model-00006-of-00018.safetensors", "model.layers.14.self_attn.dense.weight": "model-00006-of-00018.safetensors", "model.layers.14.self_attn.query_key_value.bias": "model-00006-of-00018.safetensors", "model.layers.14.self_attn.query_key_value.weight": "model-00006-of-00018.safetensors", "model.layers.15.input_layernorm.bias": "model-00006-of-00018.safetensors", "model.layers.15.input_layernorm.weight": "model-00006-of-00018.safetensors", "model.layers.15.mlp.down_proj.bias": "model-00006-of-00018.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00006-of-00018.safetensors", "model.layers.15.mlp.up_proj.bias": "model-00006-of-00018.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00006-of-00018.safetensors", "model.layers.15.post_attention_layernorm.bias": "model-00006-of-00018.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00018.safetensors", "model.layers.15.self_attn.dense.bias": "model-00006-of-00018.safetensors", "model.layers.15.self_attn.dense.weight": "model-00006-of-00018.safetensors", "model.layers.15.self_attn.query_key_value.bias": "model-00006-of-00018.safetensors", "model.layers.15.self_attn.query_key_value.weight": "model-00006-of-00018.safetensors", "model.layers.16.input_layernorm.bias": "model-00006-of-00018.safetensors", "model.layers.16.input_layernorm.weight": "model-00006-of-00018.safetensors", "model.layers.16.mlp.down_proj.bias": "model-00006-of-00018.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00006-of-00018.safetensors", "model.layers.16.mlp.up_proj.bias": "model-00006-of-00018.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00007-of-00018.safetensors", "model.layers.16.post_attention_layernorm.bias": "model-00007-of-00018.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00018.safetensors", "model.layers.16.self_attn.dense.bias": "model-00007-of-00018.safetensors", "model.layers.16.self_attn.dense.weight": "model-00007-of-00018.safetensors", "model.layers.16.self_attn.query_key_value.bias": "model-00007-of-00018.safetensors", "model.layers.16.self_attn.query_key_value.weight": "model-00007-of-00018.safetensors", "model.layers.17.input_layernorm.bias": "model-00007-of-00018.safetensors", "model.layers.17.input_layernorm.weight": "model-00007-of-00018.safetensors", "model.layers.17.mlp.down_proj.bias": "model-00007-of-00018.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00007-of-00018.safetensors", "model.layers.17.mlp.up_proj.bias": "model-00007-of-00018.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00007-of-00018.safetensors", "model.layers.17.post_attention_layernorm.bias": "model-00007-of-00018.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00018.safetensors", "model.layers.17.self_attn.dense.bias": "model-00007-of-00018.safetensors", "model.layers.17.self_attn.dense.weight": "model-00007-of-00018.safetensors", "model.layers.17.self_attn.query_key_value.bias": "model-00007-of-00018.safetensors", "model.layers.17.self_attn.query_key_value.weight": "model-00007-of-00018.safetensors", "model.layers.18.input_layernorm.bias": "model-00007-of-00018.safetensors", "model.layers.18.input_layernorm.weight": "model-00007-of-00018.safetensors", "model.layers.18.mlp.down_proj.bias": "model-00007-of-00018.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00007-of-00018.safetensors", "model.layers.18.mlp.up_proj.bias": "model-00007-of-00018.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00008-of-00018.safetensors", "model.layers.18.post_attention_layernorm.bias": "model-00008-of-00018.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00018.safetensors", "model.layers.18.self_attn.dense.bias": "model-00008-of-00018.safetensors", "model.layers.18.self_attn.dense.weight": "model-00008-of-00018.safetensors", "model.layers.18.self_attn.query_key_value.bias": "model-00008-of-00018.safetensors", "model.layers.18.self_attn.query_key_value.weight": "model-00008-of-00018.safetensors", "model.layers.19.input_layernorm.bias": "model-00008-of-00018.safetensors", "model.layers.19.input_layernorm.weight": "model-00008-of-00018.safetensors", "model.layers.19.mlp.down_proj.bias": "model-00008-of-00018.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00008-of-00018.safetensors", "model.layers.19.mlp.up_proj.bias": "model-00008-of-00018.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00008-of-00018.safetensors", "model.layers.19.post_attention_layernorm.bias": "model-00008-of-00018.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00018.safetensors", "model.layers.19.self_attn.dense.bias": "model-00008-of-00018.safetensors", "model.layers.19.self_attn.dense.weight": "model-00008-of-00018.safetensors", "model.layers.19.self_attn.query_key_value.bias": "model-00008-of-00018.safetensors", "model.layers.19.self_attn.query_key_value.weight": "model-00008-of-00018.safetensors", "model.layers.2.input_layernorm.bias": "model-00008-of-00018.safetensors", "model.layers.2.input_layernorm.weight": "model-00008-of-00018.safetensors", "model.layers.2.mlp.down_proj.bias": "model-00008-of-00018.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00008-of-00018.safetensors", "model.layers.2.mlp.up_proj.bias": "model-00008-of-00018.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00009-of-00018.safetensors", "model.layers.2.post_attention_layernorm.bias": "model-00009-of-00018.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00009-of-00018.safetensors", "model.layers.2.self_attn.dense.bias": "model-00009-of-00018.safetensors", "model.layers.2.self_attn.dense.weight": "model-00009-of-00018.safetensors", "model.layers.2.self_attn.query_key_value.bias": "model-00009-of-00018.safetensors", "model.layers.2.self_attn.query_key_value.weight": "model-00009-of-00018.safetensors", "model.layers.20.input_layernorm.bias": "model-00009-of-00018.safetensors", "model.layers.20.input_layernorm.weight": "model-00009-of-00018.safetensors", "model.layers.20.mlp.down_proj.bias": "model-00009-of-00018.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00009-of-00018.safetensors", "model.layers.20.mlp.up_proj.bias": "model-00009-of-00018.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00009-of-00018.safetensors", "model.layers.20.post_attention_layernorm.bias": "model-00009-of-00018.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00009-of-00018.safetensors", "model.layers.20.self_attn.dense.bias": "model-00009-of-00018.safetensors", "model.layers.20.self_attn.dense.weight": "model-00009-of-00018.safetensors", "model.layers.20.self_attn.query_key_value.bias": "model-00009-of-00018.safetensors", "model.layers.20.self_attn.query_key_value.weight": "model-00009-of-00018.safetensors", "model.layers.21.input_layernorm.bias": "model-00009-of-00018.safetensors", "model.layers.21.input_layernorm.weight": "model-00009-of-00018.safetensors", "model.layers.21.mlp.down_proj.bias": "model-00009-of-00018.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00009-of-00018.safetensors", "model.layers.21.mlp.up_proj.bias": "model-00009-of-00018.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00010-of-00018.safetensors", "model.layers.21.post_attention_layernorm.bias": "model-00010-of-00018.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00010-of-00018.safetensors", "model.layers.21.self_attn.dense.bias": "model-00010-of-00018.safetensors", "model.layers.21.self_attn.dense.weight": "model-00010-of-00018.safetensors", "model.layers.21.self_attn.query_key_value.bias": "model-00010-of-00018.safetensors", "model.layers.21.self_attn.query_key_value.weight": "model-00010-of-00018.safetensors", "model.layers.22.input_layernorm.bias": "model-00010-of-00018.safetensors", "model.layers.22.input_layernorm.weight": "model-00010-of-00018.safetensors", "model.layers.22.mlp.down_proj.bias": "model-00010-of-00018.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00010-of-00018.safetensors", "model.layers.22.mlp.up_proj.bias": "model-00010-of-00018.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00010-of-00018.safetensors", "model.layers.22.post_attention_layernorm.bias": "model-00010-of-00018.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00010-of-00018.safetensors", "model.layers.22.self_attn.dense.bias": "model-00010-of-00018.safetensors", "model.layers.22.self_attn.dense.weight": "model-00010-of-00018.safetensors", "model.layers.22.self_attn.query_key_value.bias": "model-00010-of-00018.safetensors", "model.layers.22.self_attn.query_key_value.weight": "model-00010-of-00018.safetensors", "model.layers.23.input_layernorm.bias": "model-00010-of-00018.safetensors", "model.layers.23.input_layernorm.weight": "model-00010-of-00018.safetensors", "model.layers.23.mlp.down_proj.bias": "model-00010-of-00018.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00010-of-00018.safetensors", "model.layers.23.mlp.up_proj.bias": "model-00010-of-00018.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00011-of-00018.safetensors", "model.layers.23.post_attention_layernorm.bias": "model-00011-of-00018.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00011-of-00018.safetensors", "model.layers.23.self_attn.dense.bias": "model-00011-of-00018.safetensors", "model.layers.23.self_attn.dense.weight": "model-00011-of-00018.safetensors", "model.layers.23.self_attn.query_key_value.bias": "model-00011-of-00018.safetensors", "model.layers.23.self_attn.query_key_value.weight": "model-00011-of-00018.safetensors", "model.layers.24.input_layernorm.bias": "model-00011-of-00018.safetensors", "model.layers.24.input_layernorm.weight": "model-00011-of-00018.safetensors", "model.layers.24.mlp.down_proj.bias": "model-00011-of-00018.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00011-of-00018.safetensors", "model.layers.24.mlp.up_proj.bias": "model-00011-of-00018.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00011-of-00018.safetensors", "model.layers.24.post_attention_layernorm.bias": "model-00011-of-00018.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00011-of-00018.safetensors", "model.layers.24.self_attn.dense.bias": "model-00011-of-00018.safetensors", "model.layers.24.self_attn.dense.weight": "model-00011-of-00018.safetensors", "model.layers.24.self_attn.query_key_value.bias": "model-00011-of-00018.safetensors", "model.layers.24.self_attn.query_key_value.weight": "model-00011-of-00018.safetensors", "model.layers.25.input_layernorm.bias": "model-00011-of-00018.safetensors", "model.layers.25.input_layernorm.weight": "model-00011-of-00018.safetensors", "model.layers.25.mlp.down_proj.bias": "model-00011-of-00018.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00011-of-00018.safetensors", "model.layers.25.mlp.up_proj.bias": "model-00011-of-00018.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00012-of-00018.safetensors", "model.layers.25.post_attention_layernorm.bias": "model-00012-of-00018.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00012-of-00018.safetensors", "model.layers.25.self_attn.dense.bias": "model-00012-of-00018.safetensors", "model.layers.25.self_attn.dense.weight": "model-00012-of-00018.safetensors", "model.layers.25.self_attn.query_key_value.bias": "model-00012-of-00018.safetensors", "model.layers.25.self_attn.query_key_value.weight": "model-00012-of-00018.safetensors", "model.layers.26.input_layernorm.bias": "model-00012-of-00018.safetensors", "model.layers.26.input_layernorm.weight": "model-00012-of-00018.safetensors", "model.layers.26.mlp.down_proj.bias": "model-00012-of-00018.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00012-of-00018.safetensors", "model.layers.26.mlp.up_proj.bias": "model-00012-of-00018.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00012-of-00018.safetensors", "model.layers.26.post_attention_layernorm.bias": "model-00012-of-00018.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00012-of-00018.safetensors", "model.layers.26.self_attn.dense.bias": "model-00012-of-00018.safetensors", "model.layers.26.self_attn.dense.weight": "model-00012-of-00018.safetensors", "model.layers.26.self_attn.query_key_value.bias": "model-00012-of-00018.safetensors", "model.layers.26.self_attn.query_key_value.weight": "model-00012-of-00018.safetensors", "model.layers.27.input_layernorm.bias": "model-00012-of-00018.safetensors", "model.layers.27.input_layernorm.weight": "model-00012-of-00018.safetensors", "model.layers.27.mlp.down_proj.bias": "model-00012-of-00018.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00012-of-00018.safetensors", "model.layers.27.mlp.up_proj.bias": "model-00012-of-00018.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00013-of-00018.safetensors", "model.layers.27.post_attention_layernorm.bias": "model-00013-of-00018.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00013-of-00018.safetensors", "model.layers.27.self_attn.dense.bias": "model-00013-of-00018.safetensors", "model.layers.27.self_attn.dense.weight": "model-00013-of-00018.safetensors", "model.layers.27.self_attn.query_key_value.bias": "model-00013-of-00018.safetensors", "model.layers.27.self_attn.query_key_value.weight": "model-00013-of-00018.safetensors", "model.layers.28.input_layernorm.bias": "model-00013-of-00018.safetensors", "model.layers.28.input_layernorm.weight": "model-00013-of-00018.safetensors", "model.layers.28.mlp.down_proj.bias": "model-00013-of-00018.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00013-of-00018.safetensors", "model.layers.28.mlp.up_proj.bias": "model-00013-of-00018.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00013-of-00018.safetensors", "model.layers.28.post_attention_layernorm.bias": "model-00013-of-00018.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00013-of-00018.safetensors", "model.layers.28.self_attn.dense.bias": "model-00013-of-00018.safetensors", "model.layers.28.self_attn.dense.weight": "model-00013-of-00018.safetensors", "model.layers.28.self_attn.query_key_value.bias": "model-00013-of-00018.safetensors", "model.layers.28.self_attn.query_key_value.weight": "model-00013-of-00018.safetensors", "model.layers.29.input_layernorm.bias": "model-00013-of-00018.safetensors", "model.layers.29.input_layernorm.weight": "model-00013-of-00018.safetensors", "model.layers.29.mlp.down_proj.bias": "model-00013-of-00018.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00013-of-00018.safetensors", "model.layers.29.mlp.up_proj.bias": "model-00013-of-00018.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00014-of-00018.safetensors", "model.layers.29.post_attention_layernorm.bias": "model-00014-of-00018.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00014-of-00018.safetensors", "model.layers.29.self_attn.dense.bias": "model-00014-of-00018.safetensors", "model.layers.29.self_attn.dense.weight": "model-00014-of-00018.safetensors", "model.layers.29.self_attn.query_key_value.bias": "model-00014-of-00018.safetensors", "model.layers.29.self_attn.query_key_value.weight": "model-00014-of-00018.safetensors", "model.layers.3.input_layernorm.bias": "model-00014-of-00018.safetensors", "model.layers.3.input_layernorm.weight": "model-00014-of-00018.safetensors", "model.layers.3.mlp.down_proj.bias": "model-00014-of-00018.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00014-of-00018.safetensors", "model.layers.3.mlp.up_proj.bias": "model-00014-of-00018.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00014-of-00018.safetensors", "model.layers.3.post_attention_layernorm.bias": "model-00014-of-00018.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00014-of-00018.safetensors", "model.layers.3.self_attn.dense.bias": "model-00014-of-00018.safetensors", "model.layers.3.self_attn.dense.weight": "model-00014-of-00018.safetensors", "model.layers.3.self_attn.query_key_value.bias": "model-00014-of-00018.safetensors", "model.layers.3.self_attn.query_key_value.weight": "model-00014-of-00018.safetensors", "model.layers.30.input_layernorm.bias": "model-00014-of-00018.safetensors", "model.layers.30.input_layernorm.weight": "model-00014-of-00018.safetensors", "model.layers.30.mlp.down_proj.bias": "model-00014-of-00018.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00014-of-00018.safetensors", "model.layers.30.mlp.up_proj.bias": "model-00014-of-00018.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00015-of-00018.safetensors", "model.layers.30.post_attention_layernorm.bias": "model-00015-of-00018.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00015-of-00018.safetensors", "model.layers.30.self_attn.dense.bias": "model-00015-of-00018.safetensors", "model.layers.30.self_attn.dense.weight": "model-00015-of-00018.safetensors", "model.layers.30.self_attn.query_key_value.bias": "model-00015-of-00018.safetensors", "model.layers.30.self_attn.query_key_value.weight": "model-00015-of-00018.safetensors", "model.layers.31.input_layernorm.bias": "model-00015-of-00018.safetensors", "model.layers.31.input_layernorm.weight": "model-00015-of-00018.safetensors", "model.layers.31.mlp.down_proj.bias": "model-00015-of-00018.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00015-of-00018.safetensors", "model.layers.31.mlp.up_proj.bias": "model-00015-of-00018.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00015-of-00018.safetensors", "model.layers.31.post_attention_layernorm.bias": "model-00015-of-00018.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00015-of-00018.safetensors", "model.layers.31.self_attn.dense.bias": "model-00015-of-00018.safetensors", "model.layers.31.self_attn.dense.weight": "model-00015-of-00018.safetensors", "model.layers.31.self_attn.query_key_value.bias": "model-00015-of-00018.safetensors", "model.layers.31.self_attn.query_key_value.weight": "model-00015-of-00018.safetensors", "model.layers.4.input_layernorm.bias": "model-00015-of-00018.safetensors", "model.layers.4.input_layernorm.weight": "model-00015-of-00018.safetensors", "model.layers.4.mlp.down_proj.bias": "model-00015-of-00018.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00015-of-00018.safetensors", "model.layers.4.mlp.up_proj.bias": "model-00015-of-00018.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00016-of-00018.safetensors", "model.layers.4.post_attention_layernorm.bias": "model-00016-of-00018.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00016-of-00018.safetensors", "model.layers.4.self_attn.dense.bias": "model-00016-of-00018.safetensors", "model.layers.4.self_attn.dense.weight": "model-00016-of-00018.safetensors", "model.layers.4.self_attn.query_key_value.bias": "model-00016-of-00018.safetensors", "model.layers.4.self_attn.query_key_value.weight": "model-00016-of-00018.safetensors", "model.layers.5.input_layernorm.bias": "model-00016-of-00018.safetensors", "model.layers.5.input_layernorm.weight": "model-00016-of-00018.safetensors", "model.layers.5.mlp.down_proj.bias": "model-00016-of-00018.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00016-of-00018.safetensors", "model.layers.5.mlp.up_proj.bias": "model-00016-of-00018.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00016-of-00018.safetensors", "model.layers.5.post_attention_layernorm.bias": "model-00016-of-00018.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00016-of-00018.safetensors", "model.layers.5.self_attn.dense.bias": "model-00016-of-00018.safetensors", "model.layers.5.self_attn.dense.weight": "model-00016-of-00018.safetensors", "model.layers.5.self_attn.query_key_value.bias": "model-00016-of-00018.safetensors", "model.layers.5.self_attn.query_key_value.weight": "model-00016-of-00018.safetensors", "model.layers.6.input_layernorm.bias": "model-00016-of-00018.safetensors", "model.layers.6.input_layernorm.weight": "model-00016-of-00018.safetensors", "model.layers.6.mlp.down_proj.bias": "model-00016-of-00018.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00016-of-00018.safetensors", "model.layers.6.mlp.up_proj.bias": "model-00016-of-00018.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00017-of-00018.safetensors", "model.layers.6.post_attention_layernorm.bias": "model-00017-of-00018.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00017-of-00018.safetensors", "model.layers.6.self_attn.dense.bias": "model-00017-of-00018.safetensors", "model.layers.6.self_attn.dense.weight": "model-00017-of-00018.safetensors", "model.layers.6.self_attn.query_key_value.bias": "model-00017-of-00018.safetensors", "model.layers.6.self_attn.query_key_value.weight": "model-00017-of-00018.safetensors", "model.layers.7.input_layernorm.bias": "model-00017-of-00018.safetensors", "model.layers.7.input_layernorm.weight": "model-00017-of-00018.safetensors", "model.layers.7.mlp.down_proj.bias": "model-00017-of-00018.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00017-of-00018.safetensors", "model.layers.7.mlp.up_proj.bias": "model-00017-of-00018.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00017-of-00018.safetensors", "model.layers.7.post_attention_layernorm.bias": "model-00017-of-00018.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00017-of-00018.safetensors", "model.layers.7.self_attn.dense.bias": "model-00017-of-00018.safetensors", "model.layers.7.self_attn.dense.weight": "model-00017-of-00018.safetensors", "model.layers.7.self_attn.query_key_value.bias": "model-00017-of-00018.safetensors", "model.layers.7.self_attn.query_key_value.weight": "model-00017-of-00018.safetensors", "model.layers.8.input_layernorm.bias": "model-00017-of-00018.safetensors", "model.layers.8.input_layernorm.weight": "model-00017-of-00018.safetensors", "model.layers.8.mlp.down_proj.bias": "model-00017-of-00018.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00017-of-00018.safetensors", "model.layers.8.mlp.up_proj.bias": "model-00017-of-00018.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00018-of-00018.safetensors", "model.layers.8.post_attention_layernorm.bias": "model-00018-of-00018.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00018-of-00018.safetensors", "model.layers.8.self_attn.dense.bias": "model-00018-of-00018.safetensors", "model.layers.8.self_attn.dense.weight": "model-00018-of-00018.safetensors", "model.layers.8.self_attn.query_key_value.bias": "model-00018-of-00018.safetensors", "model.layers.8.self_attn.query_key_value.weight": "model-00018-of-00018.safetensors", "model.layers.9.input_layernorm.bias": "model-00018-of-00018.safetensors", "model.layers.9.input_layernorm.weight": "model-00018-of-00018.safetensors", "model.layers.9.mlp.down_proj.bias": "model-00018-of-00018.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00018-of-00018.safetensors", "model.layers.9.mlp.up_proj.bias": "model-00018-of-00018.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00018-of-00018.safetensors", "model.layers.9.post_attention_layernorm.bias": "model-00018-of-00018.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00018-of-00018.safetensors", "model.layers.9.self_attn.dense.bias": "model-00018-of-00018.safetensors", "model.layers.9.self_attn.dense.weight": "model-00018-of-00018.safetensors", "model.layers.9.self_attn.query_key_value.bias": "model-00018-of-00018.safetensors", "model.layers.9.self_attn.query_key_value.weight": "model-00018-of-00018.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
5
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "ad85cab62be398dc90203c4377a4ccbf090fbb36",
3
+ "_from_auto": true,
4
+ "added_tokens_decoder": {},
5
+ "auto_map": {
6
+ "AutoTokenizer": [
7
+ "tokenization_phi3_small.Phi3SmallTokenizer",
8
+ "tokenization_phi3_small.Phi3SmallTokenizer"
9
+ ]
10
+ },
11
+ "bos_token": "<|endoftext|>",
12
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
13
+ "clean_up_tokenization_spaces": true,
14
+ "eos_token": "<|endoftext|>",
15
+ "model_max_length": 131072,
16
+ "pad_token": "<|endoftext|>",
17
+ "revision": null,
18
+ "tokenizer_class": "Phi3SmallTokenizer",
19
+ "trust_remote_code": true
20
+ }