Isaak-Carter commited on
Commit
cf43ac5
1 Parent(s): fb0c2c3

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - Felladrin/TinyMistral-248M-Chat-v1
10
+ - Felladrin/TinyMistral-248M-Chat-v1
11
+ - Felladrin/TinyMistral-248M-Chat-v1
12
+ - Felladrin/TinyMistral-248M-Chat-v1
13
+ - Felladrin/TinyMistral-248M-Chat-v1
14
+ - Felladrin/TinyMistral-248M-Chat-v1
15
+ - Felladrin/TinyMistral-248M-Chat-v1
16
+ - Felladrin/TinyMistral-248M-Chat-v1
17
+ - Felladrin/TinyMistral-248M-Chat-v1
18
+ - Felladrin/TinyMistral-248M-Chat-v1
19
+ - Felladrin/TinyMistral-248M-Chat-v1
20
+ - Felladrin/TinyMistral-248M-Chat-v1
21
+ - Felladrin/TinyMistral-248M-Chat-v1
22
+ - Felladrin/TinyMistral-248M-Chat-v1
23
+ - Felladrin/TinyMistral-248M-Chat-v1
24
+ - Felladrin/TinyMistral-248M-Chat-v1
25
+ base_model:
26
+ - Felladrin/TinyMistral-248M-Chat-v1
27
+ - Felladrin/TinyMistral-248M-Chat-v1
28
+ - Felladrin/TinyMistral-248M-Chat-v1
29
+ - Felladrin/TinyMistral-248M-Chat-v1
30
+ - Felladrin/TinyMistral-248M-Chat-v1
31
+ - Felladrin/TinyMistral-248M-Chat-v1
32
+ - Felladrin/TinyMistral-248M-Chat-v1
33
+ - Felladrin/TinyMistral-248M-Chat-v1
34
+ - Felladrin/TinyMistral-248M-Chat-v1
35
+ - Felladrin/TinyMistral-248M-Chat-v1
36
+ - Felladrin/TinyMistral-248M-Chat-v1
37
+ - Felladrin/TinyMistral-248M-Chat-v1
38
+ - Felladrin/TinyMistral-248M-Chat-v1
39
+ - Felladrin/TinyMistral-248M-Chat-v1
40
+ - Felladrin/TinyMistral-248M-Chat-v1
41
+ - Felladrin/TinyMistral-248M-Chat-v1
42
+ ---
43
+
44
+ # SmalJ.O.S.I.E.-16x248M-Chat
45
+
46
+ SmalJ.O.S.I.E.-16x248M-Chat is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
47
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
48
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
49
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
50
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
51
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
52
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
53
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
54
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
55
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
56
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
57
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
58
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
59
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
60
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
61
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
62
+ * [Felladrin/TinyMistral-248M-Chat-v1](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v1)
63
+
64
+ ## 🧩 Configuration
65
+
66
+ ```yamlbase_model: Felladrin/TinyMistral-248M-Chat-v1
67
+ dtype: float32
68
+ gate_mode: hidden
69
+ experts:
70
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
71
+ positive_prompts:
72
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
73
+
74
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
75
+ positive_prompts:
76
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
77
+
78
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
79
+ positive_prompts:
80
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
81
+
82
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
83
+ positive_prompts:
84
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
85
+
86
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
87
+ positive_prompts:
88
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
89
+
90
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
91
+ positive_prompts:
92
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
93
+
94
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
95
+ positive_prompts:
96
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
97
+
98
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
99
+ positive_prompts:
100
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
101
+
102
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
103
+ positive_prompts:
104
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
105
+
106
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
107
+ positive_prompts:
108
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
109
+
110
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
111
+ positive_prompts:
112
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
113
+
114
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
115
+ positive_prompts:
116
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
117
+
118
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
119
+ positive_prompts:
120
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
121
+
122
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
123
+ positive_prompts:
124
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
125
+
126
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
127
+ positive_prompts:
128
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
129
+
130
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
131
+ positive_prompts:
132
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
133
+ ```
134
+
135
+ ## 💻 Usage
136
+
137
+ ```python
138
+ !pip install -qU transformers bitsandbytes accelerate
139
+
140
+ from transformers import AutoTokenizer
141
+ import transformers
142
+ import torch
143
+
144
+ model = "Isaak-Carter/SmalJ.O.S.I.E.-16x248M-Chat"
145
+
146
+ tokenizer = AutoTokenizer.from_pretrained(model)
147
+ pipeline = transformers.pipeline(
148
+ "text-generation",
149
+ model=model,
150
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
151
+ )
152
+
153
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
154
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
155
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
156
+ print(outputs[0]["generated_text"])
157
+ ```
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|bos|>": 32000,
3
+ "<|endoftext|>": 32001,
4
+ "<|im_end|>": 32003,
5
+ "<|im_start|>": 32004,
6
+ "[PAD]": 32002
7
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Felladrin/TinyMistral-248M-Chat-v1",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 32000,
8
+ "eos_token_id": 32003,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1024,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4096,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 32,
16
+ "num_experts_per_tok": 2,
17
+ "num_hidden_layers": 12,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 16,
20
+ "output_router_logits": false,
21
+ "rms_norm_eps": 1e-06,
22
+ "rope_theta": 10000.0,
23
+ "router_aux_loss_coef": 0.001,
24
+ "sliding_window": null,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.38.2",
28
+ "use_cache": true,
29
+ "vocab_size": 32005
30
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Felladrin/TinyMistral-248M-Chat-v1
2
+ dtype: float32
3
+ gate_mode: hidden
4
+ experts:
5
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
6
+ positive_prompts:
7
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
8
+
9
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
10
+ positive_prompts:
11
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
12
+
13
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
14
+ positive_prompts:
15
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
16
+
17
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
18
+ positive_prompts:
19
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
20
+
21
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
22
+ positive_prompts:
23
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
24
+
25
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
26
+ positive_prompts:
27
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
28
+
29
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
30
+ positive_prompts:
31
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
32
+
33
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
34
+ positive_prompts:
35
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
36
+
37
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
38
+ positive_prompts:
39
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
40
+
41
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
42
+ positive_prompts:
43
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
44
+
45
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
46
+ positive_prompts:
47
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
48
+
49
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
50
+ positive_prompts:
51
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
52
+
53
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
54
+ positive_prompts:
55
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
56
+
57
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
58
+ positive_prompts:
59
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
60
+
61
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
62
+ positive_prompts:
63
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
64
+
65
+ - source_model: Felladrin/TinyMistral-248M-Chat-v1
66
+ positive_prompts:
67
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a56026b57e73b64f978756279626b078191512895c85afe6af7c4b37ffa818
3
+ size 10052663928
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|bos|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|bos|>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|bos|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|endoftext|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "32002": {
46
+ "content": "[PAD]",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32003": {
54
+ "content": "<|im_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "32004": {
62
+ "content": "<|im_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "additional_special_tokens": [],
71
+ "bos_token": "<|bos|>",
72
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
73
+ "clean_up_tokenization_spaces": false,
74
+ "eos_token": "<|im_end|>",
75
+ "legacy": true,
76
+ "max_length": 1536,
77
+ "model_max_length": 2048,
78
+ "pad_token": "<|bos|>",
79
+ "sp_model_kwargs": {},
80
+ "spaces_between_special_tokens": false,
81
+ "stride": 0,
82
+ "tokenizer_class": "LlamaTokenizer",
83
+ "truncation_side": "right",
84
+ "truncation_strategy": "longest_first",
85
+ "unk_token": "<unk>",
86
+ "use_default_system_prompt": false
87
+ }