jtatman commited on
Commit
575a07a
1 Parent(s): 9138034

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -36,8 +36,8 @@ experts:
36
  - "Optimize this C# script."
37
  - "Implement this feature using JavaScript."
38
  - "Convert this HTML structure into a more efficient design."
39
- negative_prompts:
40
  - "Assist me with writing a program that"
 
41
  - "How do you"
42
  - "Explain the concept of"
43
  - "Give an overview of"
@@ -49,11 +49,29 @@ experts:
49
  - "Answer this question"
50
  - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
51
  positive_prompts:
52
- - "Assist me with writing a program that"
53
  - "How do you"
54
  - "Explain the concept of"
55
  - "Give an overview of"
 
 
 
 
 
 
56
  negative_prompts:
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  - "Help me debug this code."
58
  - "Optimize this C# script."
59
  - "Implement this feature using JavaScript."
@@ -64,8 +82,27 @@ experts:
64
  - "Summarize"
65
  - "Make a recommendation on"
66
  - "Answer this question"
67
- - source_model: Locutusque/TinyMistral-248M-v2-Instruct
68
  - source_model: Locutusque/TinyMistral-248M-Instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  gate_mode: hidden
70
  ```
71
 
 
36
  - "Optimize this C# script."
37
  - "Implement this feature using JavaScript."
38
  - "Convert this HTML structure into a more efficient design."
 
39
  - "Assist me with writing a program that"
40
+ negative_prompts:
41
  - "How do you"
42
  - "Explain the concept of"
43
  - "Give an overview of"
 
49
  - "Answer this question"
50
  - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
51
  positive_prompts:
 
52
  - "How do you"
53
  - "Explain the concept of"
54
  - "Give an overview of"
55
+ - "Compare and contrast between"
56
+ - "Provide information about"
57
+ - "Help me understand"
58
+ - "Summarize"
59
+ - "Make a recommendation on"
60
+ - "Answer this question"
61
  negative_prompts:
62
+ - "Help me debug this code."
63
+ - "Optimize this C# script."
64
+ - "Implement this feature using JavaScript."
65
+ - "Convert this HTML structure into a more efficient design."
66
+ - "Assist me with writing a program that"
67
+ - source_model: Locutusque/TinyMistral-248M-v2-Instruct
68
+ positive_prompts:
69
+ - "How do I incorporate visual elements into my writing?"
70
+ negative_prompts:
71
+ - "Help me debug this code."
72
+ - "Optimize this C# script."
73
+ - "Implement this feature using JavaScript."
74
+ - "Convert this HTML structure into a more efficient design."
75
  - "Help me debug this code."
76
  - "Optimize this C# script."
77
  - "Implement this feature using JavaScript."
 
82
  - "Summarize"
83
  - "Make a recommendation on"
84
  - "Answer this question"
 
85
  - source_model: Locutusque/TinyMistral-248M-Instruct
86
+ positive_prompts:
87
+ - "Craft me a list of some nice places to visit around the world. "
88
+ - "Write me a story"
89
+ - "Write me an essay"
90
+ negative_prompts:
91
+ - "Help me debug this code."
92
+ - "Optimize this C# script."
93
+ - "Implement this feature using JavaScript."
94
+ - "Convert this HTML structure into a more efficient design."
95
+ - "Help me debug this code."
96
+ - "Optimize this C# script."
97
+ - "Implement this feature using JavaScript."
98
+ - "Convert this HTML structure into a more efficient design."
99
+ - "Compare and contrast between"
100
+ - "Provide information about"
101
+ - "Help me understand"
102
+ - "Summarize"
103
+ - "Make a recommendation on"
104
+ - "Answer this question"
105
+
106
  gate_mode: hidden
107
  ```
108
 
config.json CHANGED
@@ -16,7 +16,7 @@
16
  "num_experts_per_tok": 2,
17
  "num_hidden_layers": 12,
18
  "num_key_value_heads": 8,
19
- "num_local_experts": 3,
20
  "output_router_logits": false,
21
  "rms_norm_eps": 1e-06,
22
  "rope_theta": 10000.0,
 
16
  "num_experts_per_tok": 2,
17
  "num_hidden_layers": 12,
18
  "num_key_value_heads": 8,
19
+ "num_local_experts": 4,
20
  "output_router_logits": false,
21
  "rms_norm_eps": 1e-06,
22
  "rope_theta": 10000.0,
mergekit_moe_config.yml CHANGED
@@ -8,6 +8,7 @@ experts:
8
  - "Implement this feature using JavaScript."
9
  - "Convert this HTML structure into a more efficient design."
10
  - "Assist me with writing a program that"
 
11
  - "How do you"
12
  - "Explain the concept of"
13
  - "Give an overview of"
@@ -17,13 +18,60 @@ experts:
17
  - "Summarize"
18
  - "Make a recommendation on"
19
  - "Answer this question"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
21
  positive_prompts:
22
- - "write a short story about depressed bears and a long winter."
23
- - "how does this passage reflect the authors bias?"
24
- - "develop this idea into"
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  - source_model: Locutusque/TinyMistral-248M-Instruct
26
  positive_prompts:
27
- - "analyze this design"
28
- - "produce a design that"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  gate_mode: hidden
 
8
  - "Implement this feature using JavaScript."
9
  - "Convert this HTML structure into a more efficient design."
10
  - "Assist me with writing a program that"
11
+ negative_prompts:
12
  - "How do you"
13
  - "Explain the concept of"
14
  - "Give an overview of"
 
18
  - "Summarize"
19
  - "Make a recommendation on"
20
  - "Answer this question"
21
+ - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
22
+ positive_prompts:
23
+ - "How do you"
24
+ - "Explain the concept of"
25
+ - "Give an overview of"
26
+ - "Compare and contrast between"
27
+ - "Provide information about"
28
+ - "Help me understand"
29
+ - "Summarize"
30
+ - "Make a recommendation on"
31
+ - "Answer this question"
32
+ negative_prompts:
33
+ - "Help me debug this code."
34
+ - "Optimize this C# script."
35
+ - "Implement this feature using JavaScript."
36
+ - "Convert this HTML structure into a more efficient design."
37
+ - "Assist me with writing a program that"
38
  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
39
  positive_prompts:
40
+ - "How do I incorporate visual elements into my writing?"
41
+ negative_prompts:
42
+ - "Help me debug this code."
43
+ - "Optimize this C# script."
44
+ - "Implement this feature using JavaScript."
45
+ - "Convert this HTML structure into a more efficient design."
46
+ - "Help me debug this code."
47
+ - "Optimize this C# script."
48
+ - "Implement this feature using JavaScript."
49
+ - "Convert this HTML structure into a more efficient design."
50
+ - "Compare and contrast between"
51
+ - "Provide information about"
52
+ - "Help me understand"
53
+ - "Summarize"
54
+ - "Make a recommendation on"
55
+ - "Answer this question"
56
  - source_model: Locutusque/TinyMistral-248M-Instruct
57
  positive_prompts:
58
+ - "Craft me a list of some nice places to visit around the world. "
59
+ - "Write me a story"
60
+ - "Write me an essay"
61
+ negative_prompts:
62
+ - "Help me debug this code."
63
+ - "Optimize this C# script."
64
+ - "Implement this feature using JavaScript."
65
+ - "Convert this HTML structure into a more efficient design."
66
+ - "Help me debug this code."
67
+ - "Optimize this C# script."
68
+ - "Implement this feature using JavaScript."
69
+ - "Convert this HTML structure into a more efficient design."
70
+ - "Compare and contrast between"
71
+ - "Provide information about"
72
+ - "Help me understand"
73
+ - "Summarize"
74
+ - "Make a recommendation on"
75
+ - "Answer this question"
76
+
77
  gate_mode: hidden
model-00001-of-00001.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe78835f9511ce7dd33d1fa48bfcded5f0a89095d2508bc222ee2a9c35530c2b
3
- size 1100125288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a6143c521ff0c20f1df03d4a92db7e6a97a71e25ad40f9de4092d3c2fe2a48
3
+ size 1402144424
model.safetensors.index.json CHANGED
@@ -1 +1 @@
1
- {"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
 
1
+ {"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}