CryptAL commited on
Commit
947e870
1 Parent(s): ccab034

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "_name_or_path": "/home/paperspace/models/tm14b1/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 1,
9
- "eos_token_id": 2,
10
  "hidden_act": "silu",
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 13824,
14
  "max_position_embeddings": 4096,
15
  "mlp_bias": false,
16
  "model_type": "llama",
@@ -25,5 +25,5 @@
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.44.1",
27
  "use_cache": true,
28
- "vocab_size": 32000
29
  }
 
1
  {
2
+ "_name_or_path": "/home/paperspace/models/tm14B1/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 100257,
9
+ "eos_token_id": 100257,
10
  "hidden_act": "silu",
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 14100,
14
  "max_position_embeddings": 4096,
15
  "mlp_bias": false,
16
  "model_type": "llama",
 
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.44.1",
27
  "use_cache": true,
28
+ "vocab_size": 100288
29
  }
generation_config.json CHANGED
@@ -1,10 +1,6 @@
1
  {
2
- "bos_token_id": 1,
3
- "do_sample": true,
4
- "eos_token_id": 2,
5
- "max_length": 4096,
6
- "pad_token_id": 0,
7
- "temperature": 0.6,
8
- "top_p": 0.9,
9
  "transformers_version": "4.44.1"
10
  }
 
1
  {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": 100257,
 
 
 
 
5
  "transformers_version": "4.44.1"
6
  }
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d16ae12d206903962c589dc4687a2abcc3e03705a26b17cf5b80698e3cbf0358
3
- size 4978265800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144d1bee9d0db88256602fd58807428472419f3d422210af4edfcb21b0f1be23
3
+ size 4989139424
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b38e6e14de170f3de5d89e5cae66b7f35216d3fa80d489f020f0214dc360cc3d
3
- size 4970422232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e15bf6935931537f325e5279cdd4719d680489deced5c95f2048364de353e2
3
+ size 4893847152
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be21d30c0328def90b901ebdd57a5e1fb433c3f69057492c04bcec22bbb4e1da
3
- size 4970422256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c131198b4abcd06e0ef59f447e56cca24774486b44ef76d78e6432086421cc
3
+ size 4998725736
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b35ad53f9951b84c0e9423e926d7f910bb89ec9802154b3f155441c9a5ca53d
3
- size 4933701504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39d9ae5eedf01635d2d5c13e9c7d906005cfcc322ee16610bd8a24edba27a9d
3
+ size 4998725728
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e57cfc836452bbc2f7a447a51efc0d40ae5daa9857f5e31fb627198977139070
3
- size 4933722216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17fc5b5fc0b1f67aaed743c631502fe439a1633433812f29866639d4a4a0085
3
+ size 4985823096
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6578c544525b23ba5ce603086e16fd459c7ea4ff5bbebf917fbb37e0cbd8fdc8
3
- size 1245236920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194fdabb6398dbd880f15be8ce125bb88960584857644b611675349859697d5a
3
+ size 2903196856
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 26031728640
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00006-of-00006.safetensors",
@@ -50,32 +50,32 @@
50
  "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
51
  "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
52
  "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
53
- "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors",
54
- "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
55
  "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
56
  "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
57
- "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
58
  "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
59
  "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
60
  "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
61
  "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
62
- "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors",
63
- "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
64
- "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
65
- "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
66
- "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
67
- "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
68
- "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
69
- "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
70
- "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
71
  "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
72
  "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
73
  "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
74
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
75
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
76
- "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
77
  "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
78
- "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
79
  "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
80
  "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
81
  "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
@@ -131,24 +131,24 @@
131
  "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
134
- "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors",
135
- "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
137
- "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
138
- "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
142
  "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
143
- "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors",
144
- "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
145
- "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
146
- "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
147
- "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
148
- "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
149
- "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
150
- "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
151
- "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
@@ -203,11 +203,11 @@
203
  "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
206
- "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors",
207
- "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
208
- "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
209
- "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
210
- "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
211
  "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
212
  "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
213
  "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
@@ -223,13 +223,13 @@
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
224
  "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors",
225
  "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
226
- "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
227
- "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
228
  "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
229
- "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
230
- "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
231
- "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
232
- "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
235
  "model.layers.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
@@ -284,24 +284,24 @@
284
  "model.layers.36.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
285
  "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
286
  "model.layers.36.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
287
- "model.layers.37.input_layernorm.weight": "model-00005-of-00006.safetensors",
288
- "model.layers.37.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
289
- "model.layers.37.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
290
- "model.layers.37.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
291
- "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
292
- "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
293
- "model.layers.37.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
294
  "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
295
- "model.layers.37.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
296
  "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors",
297
  "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
298
- "model.layers.38.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
299
  "model.layers.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
300
  "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
301
- "model.layers.38.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
302
- "model.layers.38.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
303
- "model.layers.38.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
304
- "model.layers.38.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
305
  "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors",
306
  "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
307
  "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
@@ -329,24 +329,24 @@
329
  "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
330
  "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
331
  "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
332
- "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors",
333
- "model.layers.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
334
- "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
335
- "model.layers.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
336
- "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
337
  "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
338
- "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
339
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
340
- "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
341
  "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
342
  "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
343
  "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
344
  "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
345
  "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
346
- "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
347
- "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
348
- "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
349
- "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
350
  "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
351
  "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
352
  "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 27769415680
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00006-of-00006.safetensors",
 
50
  "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
51
  "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
52
  "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
55
  "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
56
  "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
58
  "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
59
  "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
60
  "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
61
  "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
71
  "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
72
  "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
73
  "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
74
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
75
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
77
  "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
79
  "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
80
  "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
81
  "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
 
131
  "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
142
  "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
 
203
  "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
211
  "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
212
  "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
213
  "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
 
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
224
  "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors",
225
  "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
228
  "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
235
  "model.layers.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
 
284
  "model.layers.36.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
285
  "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
286
  "model.layers.36.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00006-of-00006.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
294
  "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
296
  "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors",
297
  "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
299
  "model.layers.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
300
  "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
305
  "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors",
306
  "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
307
  "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
 
329
  "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
330
  "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
331
  "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
332
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
333
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
334
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
335
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
336
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
337
  "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
338
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
339
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
340
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
341
  "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
342
  "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
343
  "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
344
  "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
345
  "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
346
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
347
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
348
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
349
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
350
  "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
351
  "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
352
  "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",