anemll commited on
Commit
ab50654
·
verified ·
1 Parent(s): 24cfe65

Add files using upload-large-folder tool

Browse files
Files changed (44) hide show
  1. .DS_Store +0 -0
  2. .cache/.DS_Store +0 -0
  3. .gitattributes +1 -0
  4. README.md +549 -0
  5. chat_template.jinja +266 -0
  6. config.json +1900 -0
  7. generation_config.json +14 -0
  8. model-00001-of-00003.safetensors +3 -0
  9. model-00002-of-00003.safetensors +3 -0
  10. model-00003-of-00003.safetensors +3 -0
  11. model.safetensors.index.json +0 -0
  12. packed_experts_mixed_full/layer_00.bin +3 -0
  13. packed_experts_mixed_full/layer_01.bin +3 -0
  14. packed_experts_mixed_full/layer_02.bin +3 -0
  15. packed_experts_mixed_full/layer_03.bin +3 -0
  16. packed_experts_mixed_full/layer_04.bin +3 -0
  17. packed_experts_mixed_full/layer_05.bin +3 -0
  18. packed_experts_mixed_full/layer_06.bin +3 -0
  19. packed_experts_mixed_full/layer_07.bin +3 -0
  20. packed_experts_mixed_full/layer_08.bin +3 -0
  21. packed_experts_mixed_full/layer_09.bin +3 -0
  22. packed_experts_mixed_full/layer_10.bin +3 -0
  23. packed_experts_mixed_full/layer_11.bin +3 -0
  24. packed_experts_mixed_full/layer_12.bin +3 -0
  25. packed_experts_mixed_full/layer_13.bin +3 -0
  26. packed_experts_mixed_full/layer_14.bin +3 -0
  27. packed_experts_mixed_full/layer_15.bin +3 -0
  28. packed_experts_mixed_full/layer_16.bin +3 -0
  29. packed_experts_mixed_full/layer_17.bin +3 -0
  30. packed_experts_mixed_full/layer_18.bin +3 -0
  31. packed_experts_mixed_full/layer_19.bin +3 -0
  32. packed_experts_mixed_full/layer_20.bin +3 -0
  33. packed_experts_mixed_full/layer_21.bin +3 -0
  34. packed_experts_mixed_full/layer_22.bin +3 -0
  35. packed_experts_mixed_full/layer_23.bin +3 -0
  36. packed_experts_mixed_full/layer_24.bin +3 -0
  37. packed_experts_mixed_full/layer_25.bin +3 -0
  38. packed_experts_mixed_full/layer_26.bin +3 -0
  39. packed_experts_mixed_full/layer_27.bin +3 -0
  40. packed_experts_mixed_full/layer_28.bin +3 -0
  41. packed_experts_mixed_full/layer_29.bin +3 -0
  42. packed_experts_mixed_full/layout.json +3735 -0
  43. tokenizer.json +3 -0
  44. tokenizer_config.json +94 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.cache/.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,549 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://ai.google.dev/gemma/docs/gemma_4_license
4
+ pipeline_tag: image-text-to-text
5
+ base_model: google/gemma-4-26B-A4B-it
6
+ tags:
7
+ - gemma4
8
+ - unsloth
9
+ - gemma
10
+ - google
11
+ ---
12
+ # Read our How to [Run Gemma 4 Guide!](https://docs.unsloth.ai/models/gemma-4)
13
+
14
+ To run MLX:
15
+ ```
16
+ curl -fsSL https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/install_gemma4_mlx.sh | sh
17
+ source ~/.unsloth/unsloth_gemma4_mlx/bin/activate
18
+ python -m mlx_lm chat --model unsloth/gemma-4-26b-a4b-it-UD-MLX-3bit --max-tokens 200
19
+ ```
20
+
21
+ <div>
22
+ <p style="margin-top: 0;margin-bottom: 0;">
23
+ <em>See <a href="https://unsloth.ai/docs/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0 GGUFs</a> for our quantization benchmarks.</em>
24
+ </p>
25
+ <div style="display: flex; gap: 5px; align-items: center; ">
26
+ <a href="https://github.com/unslothai/unsloth/">
27
+ <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
28
+ </a>
29
+ <a href="https://discord.gg/unsloth">
30
+ <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
31
+ </a>
32
+ <a href="https://unsloth.ai/docs/models/gemma-4">
33
+ <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
34
+ </a>
35
+ </div>
36
+ </div>
37
+
38
+ Gemma 4 can now be run and fine-tuned in [Unsloth Studio](https://unsloth.ai/docs/new/studio). [Read our guide](https://unsloth.ai/docs/models/gemma-4).
39
+
40
+ See all versions of Gemma 4 (GGUF, 16-bit etc.) [in our collection](https://huggingface.co/collections/unsloth/gemma-4).
41
+
42
+ ---
43
+
44
+ <div align="center">
45
+ <img src=https://ai.google.dev/gemma/images/gemma4_banner.png>
46
+ </div>
47
+
48
+
49
+ <p align="center">
50
+ <a href="https://huggingface.co/collections/google/gemma-4" target="_blank">Hugging Face</a> |
51
+ <a href="https://github.com/google-gemma" target="_blank">GitHub</a> |
52
+ <a href="https://blog.google/innovation-and-ai/technology/developers-tools/gemma-4/" target="_blank">Launch Blog</a> |
53
+ <a href="https://ai.google.dev/gemma/docs/core" target="_blank">Documentation</a>
54
+ <br>
55
+ <b>License</b>: <a href="https://ai.google.dev/gemma/docs/gemma_4_license" target="_blank">Apache 2.0</a> | <b>Authors</b>: <a href="https://deepmind.google/models/gemma/" target="_blank">Google DeepMind</a>
56
+ </p>
57
+
58
+ Gemma is a family of open models built by Google DeepMind. Gemma 4 models are multimodal, handling text and image input (with audio supported on small models) and generating text output. This release includes open-weights models in both pre-trained and instruction-tuned variants. Gemma 4 features a context window of up to 256K tokens and maintains multilingual support in over 140 languages.
59
+
60
+ Featuring both Dense and Mixture-of-Experts (MoE) architectures, Gemma 4 is well-suited for tasks like text generation, coding, and reasoning. The models are available in four distinct sizes: **E2B**, **E4B**, **26B A4B**, and **31B**. Their diverse sizes make them deployable in environments ranging from high-end phones to laptops and servers, democratizing access to state-of-the-art AI.
61
+
62
+ Gemma 4 introduces key **capability and architectural advancements**:
63
+
64
+ * **Reasoning** – All models in the family are designed as highly capable reasoners, with configurable thinking modes.
65
+
66
+ * **Extended Multimodalities** – Processes Text, Image with variable aspect ratio and resolution support (all models), Video, and Audio (featured natively on the E2B and E4B models).
67
+
68
+ * **Diverse & Efficient Architectures** – Offers Dense and Mixture-of-Experts (MoE) variants of different sizes for scalable deployment.
69
+
70
+ * **Optimized for On-Device** – Smaller models are specifically designed for efficient local execution on laptops and mobile devices.
71
+
72
+ * **Increased Context Window** – The small models feature a 128K context window, while the medium models support 256K.
73
+
74
+ * **Enhanced Coding & Agentic Capabilities** – Achieves notable improvements in coding benchmarks alongside native function-calling support, powering highly capable autonomous agents.
75
+
76
+ * **Native System Prompt Support** – Gemma 4 introduces native support for the `system` role, enabling more structured and controllable conversations.
77
+
78
+ ## **Models Overview**
79
+
80
+ Gemma 4 models are designed to deliver frontier-level performance at each size, targeting deployment scenarios from mobile and edge devices (E2B, E4B) to consumer GPUs and workstations (26B A4B, 31B). They are well-suited for reasoning, agentic workflows, coding, and multimodal understanding.
81
+
82
+ The models employ a hybrid attention mechanism that interleaves local sliding window attention with full global attention, ensuring the final layer is always global. This hybrid design delivers the processing speed and low memory footprint of a lightweight model without sacrificing the deep awareness required for complex, long-context tasks. To optimize memory for long contexts, global layers feature unified Keys and Values, and apply Proportional RoPE (p-RoPE).
83
+
84
+ ### Dense Models
85
+
86
+ | Property | E2B | E4B | 31B Dense |
87
+ | :---- | :---- | :---- | :---- |
88
+ | **Total Parameters** | 2.3B effective (5.1B with embeddings) | 4.5B effective (8B with embeddings) | 30.7B |
89
+ | **Layers** | 35 | 42 | 60 |
90
+ | **Sliding Window** | 512 tokens | 512 tokens | 1024 tokens |
91
+ | **Context Length** | 128K tokens | 128K tokens | 256K tokens |
92
+ | **Vocabulary Size** | 262K | 262K | 262K |
93
+ | **Supported Modalities** | Text, Image, Audio | Text, Image, Audio | Text, Image |
94
+ | **Vision Encoder Parameters** | *~150M* | *~150M* | *~550M* |
95
+ | **Audio Encoder Parameters** | *~300M* | *~300M* | No Audio |
96
+
97
+ The "E" in E2B and E4B stands for "effective" parameters. The smaller models incorporate Per-Layer Embeddings (PLE) to maximize parameter efficiency in on-device deployments. Rather than adding more layers or parameters to the model, PLE gives each decoder layer its own small embedding for every token. These embedding tables are large but are only used for quick lookups, which is why the effective parameter count is much smaller than the total.
98
+
99
+ ### Mixture-of-Experts (MoE) Model
100
+
101
+ | Property | 26B A4B MoE |
102
+ | :---- | :---- |
103
+ | **Total Parameters** | 25.2B |
104
+ | **Active Parameters** | 3.8B |
105
+ | **Layers** | 30 |
106
+ | **Sliding Window** | 1024 tokens |
107
+ | **Context Length** | 256K tokens |
108
+ | **Vocabulary Size** | 262K |
109
+ | **Expert Count** | 8 active / 128 total and 1 shared |
110
+ | **Supported Modalities** | Text, Image |
111
+ | **Vision Encoder Parameters** | *~550M* |
112
+
113
+ The "A" in 26B A4B stands for "active parameters" in contrast to the total number of parameters the model contains. By only activating a 4B subset of parameters during inference, the Mixture-of-Experts model runs much faster than its 26B total might suggest. This makes it an excellent choice for fast inference compared to the dense 31B model since it runs almost as fast as a 4B-parameter model.
114
+
115
+ ## **Benchmark Results**
116
+
117
+ These models were evaluated against a large collection of different datasets and metrics to cover different aspects of text generation. Evaluation results marked in the table are for instruction-tuned models.
118
+
119
+ | | Gemma 4 31B | Gemma 4 26B A4B | Gemma 4 E4B | Gemma 4 E2B | Gemma 3 27B (no think) |
120
+ | :---- | :---- | :---- | :---- | :---- | :---- |
121
+ | MMLU Pro | 85.2% | 82.6% | 69.4% | 60.0% | 67.6% |
122
+ | AIME 2026 no tools | 89.2% | 88.3% | 42.5% | 37.5% | 20.8% |
123
+ | LiveCodeBench v6 | 80.0% | 77.1% | 52.0% | 44.0% | 29.1% |
124
+ | Codeforces ELO | 2150 | 1718 | 940 | 633 | 110 |
125
+ | GPQA Diamond | 84.3% | 82.3% | 58.6% | 43.4% | 42.4% |
126
+ | Tau2 (average over 3) | 76.9% | 68.2% | 42.2% | 24.5% | 16.2% |
127
+ | HLE no tools | 19.5% | 8.7% | - | - | - |
128
+ | HLE with search | 26.5% | 17.2% | - | - | - |
129
+ | BigBench Extra Hard | 74.4% | 64.8% | 33.1% | 21.9% | 19.3% |
130
+ | MMMLU | 88.4% | 86.3% | 76.6% | 67.4% | 70.7% |
131
+ | **Vision** | | | | | |
132
+ | MMMU Pro | 76.9% | 73.8% | 52.6% | 44.2% | 49.7% |
133
+ | OmniDocBench 1.5 (average edit distance, lower is better) | 0.131 | 0.149 | 0.181 | 0.290 | 0.365 |
134
+ | MATH-Vision | 85.6% | 82.4% | 59.5% | 52.4% | 46.0% |
135
+ | MedXPertQA MM | 61.3% | 58.1% | 28.7% | 23.5% | - |
136
+ | **Audio** | | | | | |
137
+ | CoVoST | - | - | 35.54 | 33.47 | - |
138
+ | FLEURS (lower is better) | - | - | 0.08 | 0.09 | - |
139
+ | **Long Context** | | | | | |
140
+ | MRCR v2 8 needle 128k (average) | 66.4% | 44.1% | 25.4% | 19.1% | 13.5% |
141
+
142
+ ## **Core Capabilities**
143
+
144
+ Gemma 4 models handle a broad range of tasks across text, vision, and audio. Key capabilities include:
145
+
146
+ * **Thinking** – Built-in reasoning mode that lets the model think step-by-step before answering.
147
+ * **Long Context** – Context windows of up to 128K tokens (E2B/E4B) and 256K tokens (26B A4B/31B).
148
+ * **Image Understanding** – Object detection, Document/PDF parsing, screen and UI understanding, chart comprehension, OCR (including multilingual), handwriting recognition, and pointing. Images can be processed at variable aspect ratios and resolutions.
149
+ * **Video Understanding** – Analyze video by processing sequences of frames.
150
+ * **Interleaved Multimodal Input** – Freely mix text and images in any order within a single prompt.
151
+ * **Function Calling** – Native support for structured tool use, enabling agentic workflows.
152
+ * **Coding** – Code generation, completion, and correction.
153
+ * **Multilingual** – Out-of-the-box support for 35+ languages, pre-trained on 140+ languages.
154
+ * **Audio** (E2B and E4B only) – Automatic speech recognition (ASR) and speech-to-translated-text translation across multiple languages.
155
+
156
+ ## Getting Started
157
+
158
+ You can use all Gemma 4 models with the latest version of Transformers. To get started, install the necessary dependencies in your environment:
159
+
160
+ `pip install -U transformers torch accelerate`
161
+
162
+ Once you have everything installed, you can proceed to load the model with the code below:
163
+
164
+ ```python
165
+ from transformers import AutoProcessor, AutoModelForCausalLM
166
+
167
+ MODEL_ID = "google/gemma-4-26B-A4B-it"
168
+
169
+ # Load model
170
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
171
+ model = AutoModelForCausalLM.from_pretrained(
172
+ MODEL_ID,
173
+ dtype="auto",
174
+ device_map="auto"
175
+ )
176
+ ```
177
+
178
+ Once the model is loaded, you can start generating output:
179
+
180
+ ```python
181
+ # Prompt
182
+ messages = [
183
+ {"role": "system", "content": "You are a helpful assistant."},
184
+ {"role": "user", "content": "Write a short joke about saving RAM."},
185
+ ]
186
+
187
+ # Process input
188
+ text = processor.apply_chat_template(
189
+ messages,
190
+ tokenize=False,
191
+ add_generation_prompt=True,
192
+ enable_thinking=False
193
+ )
194
+ inputs = processor(text=text, return_tensors="pt").to(model.device)
195
+ input_len = inputs["input_ids"].shape[-1]
196
+
197
+ # Generate output
198
+ outputs = model.generate(**inputs, max_new_tokens=1024)
199
+ response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
200
+
201
+ # Parse output
202
+ processor.parse_response(response)
203
+ ```
204
+
205
+ To enable reasoning, set `enable_thinking=True` and the `parse_response` function will take care of parsing the thinking output.
206
+
207
+ Below, you will also find snippets for processing audio (E2B and E4B only), images, and video alongside text:
208
+
209
+ <details>
210
+ <summary>Code for processing Audio</summary>
211
+
212
+ Instead of using `AutoModelForCausalLM`, you can use `AutoModelForMultimodalLM` to process audio. To use it, make sure to install the following packages:
213
+
214
+
215
+ `pip install -U transformers torch torchvision librosa accelerate`
216
+
217
+ You can then load the model with the code below:
218
+
219
+ ```python
220
+ from transformers import AutoProcessor, AutoModelForMultimodalLM
221
+
222
+ MODEL_ID = "google/gemma-4-26B-A4B-it"
223
+
224
+ # Load model
225
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
226
+ model = AutoModelForMultimodalLM.from_pretrained(
227
+ MODEL_ID,
228
+ dtype="auto",
229
+ device_map="auto"
230
+ )
231
+ ```
232
+
233
+ Once the model is loaded, you can start generating output by directly referencing the audio URL in the prompt:
234
+
235
+
236
+ ```python
237
+ # Prompt - add audio before text
238
+ messages = [
239
+ {
240
+ "role": "user",
241
+ "content": [
242
+ {"type": "audio", "audio": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"},
243
+ {"type": "text", "text": "Transcribe the following speech segment in its original language. Follow these specific instructions for formatting the answer:\n* Only output the transcription, with no newlines.\n* When transcribing numbers, write the digits, i.e. write 1.7 and not one point seven, and write 3 instead of three."},
244
+ ]
245
+ }
246
+ ]
247
+
248
+ # Process input
249
+ inputs = processor.apply_chat_template(
250
+ messages,
251
+ tokenize=True,
252
+ return_dict=True,
253
+ return_tensors="pt",
254
+ add_generation_prompt=True,
255
+ ).to(model.device)
256
+ input_len = inputs["input_ids"].shape[-1]
257
+
258
+ # Generate output
259
+ outputs = model.generate(**inputs, max_new_tokens=512)
260
+ response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
261
+
262
+ # Parse output
263
+ processor.parse_response(response)
264
+ ```
265
+
266
+ </details>
267
+
268
+ <details>
269
+ <summary>Code for processing Images</summary>
270
+
271
+ Instead of using `AutoModelForCausalLM`, you can use `AutoModelForMultimodalLM` to process images. To use it, make sure to install the following packages:
272
+
273
+
274
+ `pip install -U transformers torch torchvision accelerate`
275
+
276
+ You can then load the model with the code below:
277
+
278
+ ```python
279
+ from transformers import AutoProcessor, AutoModelForMultimodalLM
280
+
281
+ MODEL_ID = "google/gemma-4-26B-A4B-it"
282
+
283
+ # Load model
284
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
285
+ model = AutoModelForMultimodalLM.from_pretrained(
286
+ MODEL_ID,
287
+ dtype="auto",
288
+ device_map="auto"
289
+ )
290
+ ```
291
+
292
+ Once the model is loaded, you can start generating output by directly referencing the image URL in the prompt:
293
+
294
+
295
+ ```python
296
+ # Prompt - add image before text
297
+ messages = [
298
+ {
299
+ "role": "user", "content": [
300
+ {"type": "image", "url": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png"},
301
+ {"type": "text", "text": "What is shown in this image?"}
302
+ ]
303
+ }
304
+ ]
305
+
306
+ # Process input
307
+ inputs = processor.apply_chat_template(
308
+ messages,
309
+ tokenize=True,
310
+ return_dict=True,
311
+ return_tensors="pt",
312
+ add_generation_prompt=True,
313
+ ).to(model.device)
314
+ input_len = inputs["input_ids"].shape[-1]
315
+
316
+ # Generate output
317
+ outputs = model.generate(**inputs, max_new_tokens=512)
318
+ response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
319
+
320
+ # Parse output
321
+ processor.parse_response(response)
322
+ ```
323
+
324
+ </details>
325
+
326
+
327
+ <details>
328
+ <summary>Code for processing Videos</summary>
329
+
330
+ Instead of using `AutoModelForCausalLM`, you can use `AutoModelForMultimodalLM` to process videos. To use it, make sure to install the following packages:
331
+
332
+ `pip install -U transformers torch torchvision librosa accelerate`
333
+
334
+ You can then load the model with the code below:
335
+
336
+ ```python
337
+ from transformers import AutoProcessor, AutoModelForMultimodalLM
338
+
339
+ MODEL_ID = "google/gemma-4-26B-A4B-it"
340
+
341
+ # Load model
342
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
343
+ model = AutoModelForMultimodalLM.from_pretrained(
344
+ MODEL_ID,
345
+ dtype="auto",
346
+ device_map="auto"
347
+ )
348
+ ```
349
+
350
+ Once the model is loaded, you can start generating output by directly referencing the video URL in the prompt:
351
+
352
+
353
+ ```python
354
+ # Prompt - add video before text
355
+ messages = [
356
+ {
357
+ 'role': 'user',
358
+ 'content': [
359
+ {"type": "video", "video": "https://github.com/bebechien/gemma/raw/refs/heads/main/videos/ForBiggerBlazes.mp4"},
360
+ {'type': 'text', 'text': 'Describe this video.'}
361
+ ]
362
+ }
363
+ ]
364
+
365
+ # Process input
366
+ inputs = processor.apply_chat_template(
367
+ messages,
368
+ tokenize=True,
369
+ return_dict=True,
370
+ return_tensors="pt",
371
+ add_generation_prompt=True,
372
+ ).to(model.device)
373
+ input_len = inputs["input_ids"].shape[-1]
374
+
375
+ # Generate output
376
+ outputs = model.generate(**inputs, max_new_tokens=512)
377
+ response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
378
+
379
+ # Parse output
380
+ processor.parse_response(response)
381
+ ```
382
+
383
+ </details>
384
+
385
+
386
+ ## **Best Practices**
387
+
388
+ For the best performance, use these configurations and best practices:
389
+
390
+ ### 1. Sampling Parameters
391
+
392
+ Use the following standardized sampling configuration across all use cases:
393
+
394
+ * `temperature=1.0`
395
+ * `top_p=0.95`
396
+ * `top_k=64`
397
+
398
+ ### 2. Thinking Mode Configuration
399
+
400
+ Compared to Gemma 3, the models use standard `system`, `assistant`, and `user` roles. To properly manage the thinking process, use the following control tokens:
401
+
402
+ * **Trigger Thinking:** Thinking is enabled by including the `<|think|>` token at the start of the system prompt. To disable thinking, remove the token.
403
+ * **Standard Generation:** When thinking is enabled, the model will output its internal reasoning followed by the final answer using this structure:
404
+ `<|channel>thought\n`**[Internal reasoning]**`<channel|>`
405
+ * **Disabled Thinking Behavior:** For all models except for the E2B and E4B variants, if thinking is disabled, the model will still generate the tags but with an empty thought block:
406
+ `<|channel>thought\n<channel|>`**[Final answer]**
407
+
408
+ > [!Note]
409
+ > Note that many libraries like Transformers and llama.cpp handle the complexities of the chat template for you.
410
+
411
+ ### 3. Multi-Turn Conversations
412
+
413
+ * **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final response. Thoughts from previous model turns must *not be added* before the next user turn begins.
414
+
415
+ ### 4. Modality order
416
+
417
+ * For optimal performance with multimodal inputs, place image and/or audio content **before** the text in your prompt.
418
+
419
+ ### 5. Variable Image Resolution
420
+
421
+ Aside from variable aspect ratios, Gemma 4 supports variable image resolution through a configurable visual token budget, which controls how many tokens are used to represent an image. A higher token budget preserves more visual detail at the cost of additional compute, while a lower budget enables faster inference for tasks that don't require fine-grained understanding.
422
+
423
+ * The supported token budgets are: **70**, **140**, **280**, **560**, and **1120**.
424
+ * Use *lower budgets* for classification, captioning, or video understanding, where faster inference and processing many frames outweigh fine-grained detail.
425
+ * Use *higher budgets* for tasks like OCR, document parsing, or reading small text.
426
+
427
+ ### 6. Audio
428
+
429
+ Use the following prompt structures for audio processing:
430
+
431
+ * **Audio Speech Recognition (ASR)**
432
+
433
+ ```text
434
+ Transcribe the following speech segment in {LANGUAGE} into {LANGUAGE} text.
435
+
436
+ Follow these specific instructions for formatting the answer:
437
+ * Only output the transcription, with no newlines.
438
+ * When transcribing numbers, write the digits, i.e. write 1.7 and not one point seven, and write 3 instead of three.
439
+ ```
440
+
441
+ * **Automatic Speech Translation (AST)**
442
+
443
+ ```text
444
+ Transcribe the following speech segment in {SOURCE_LANGUAGE}, then translate it into {TARGET_LANGUAGE}.
445
+ When formatting the answer, first output the transcription in {SOURCE_LANGUAGE}, then one newline, then output the string '{TARGET_LANGUAGE}: ', then the translation in {TARGET_LANGUAGE}.
446
+ ```
447
+
448
+ ### 7. Audio and Video Length
449
+
450
+ All models support image inputs and can process videos as frames whereas the E2B and E4B models also support audio inputs. Audio supports a maximum length of 30 seconds. Video supports a maximum of 60 seconds assuming the images are processed at one frame per second.
451
+
452
+ ## **Model Data**
453
+
454
+ Data used for model training and how the data was processed.
455
+
456
+ ### **Training Dataset**
457
+
458
+ Our pre-training dataset is a large-scale, diverse collection of data encompassing a wide range of domains and modalities, which includes web documents, code, images, audio, with a cutoff date of January 2025. Here are the key components:
459
+
460
+ * **Web Documents**: A diverse collection of web text ensures the model is exposed to a broad range of linguistic styles, topics, and vocabulary. The training dataset includes content in over 140 languages.
461
+ * **Code**: Exposing the model to code helps it to learn the syntax and patterns of programming languages, which improves its ability to generate code and understand code-related questions.
462
+ * **Mathematics**: Training on mathematical text helps the model learn logical reasoning, symbolic representation, and to address mathematical queries.
463
+ * **Images**: A wide range of images enables the model to perform image analysis and visual data extraction tasks.
464
+
465
+ The combination of these diverse data sources is crucial for training a powerful multimodal model that can handle a wide variety of different tasks and data formats.
466
+
467
+ ### **Data Preprocessing**
468
+
469
+ Here are the key data cleaning and filtering methods applied to the training data:
470
+
471
+ * **CSAM Filtering**: Rigorous CSAM (Child Sexual Abuse Material) filtering was applied at multiple stages in the data preparation process to ensure the exclusion of harmful and illegal content.
472
+ * **Sensitive Data Filtering**: As part of making Gemma pre-trained models safe and reliable, automated techniques were used to filter out certain personal information and other sensitive data from training sets.
473
+ * **Additional methods**: Filtering based on content quality and safety in line with [our policies](https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf).
474
+
475
+ ## **Ethics and Safety**
476
+
477
+ As open models become central to enterprise infrastructure, provenance and security are paramount. Developed by Google DeepMind, Gemma 4 undergoes the same rigorous safety evaluations as our proprietary Gemini models.
478
+
479
+ ### **Evaluation Approach**
480
+
481
+ Gemma 4 models were developed in partnership with internal safety and responsible AI teams. A range of automated as well as human evaluations were conducted to help improve model safety. These evaluations align with [Google’s AI principles](https://ai.google/principles/), as well as safety policies, which aim to prevent our generative AI models from generating harmful content, including:
482
+
483
+ * Content related to child sexual abuse material and exploitation
484
+ * Dangerous content (e.g., promoting suicide, or instructing in activities that could cause real-world harm)
485
+ * Sexually explicit content
486
+ * Hate speech (e.g., dehumanizing members of protected groups)
487
+ * Harassment (e.g., encouraging violence against people)
488
+
489
+ ### **Evaluation Results**
490
+
491
+ For all areas of safety testing, we saw major improvements in all categories of content safety relative to previous Gemma models. Overall, Gemma 4 models significantly outperform Gemma 3 and 3n models in improving safety, while keeping unjustified refusals low. All testing was conducted without safety filters to evaluate the model capabilities and behaviors. For both text-to-text and image-to-text, and across all model sizes, the model produced minimal policy violations, and showed significant improvements over previous Gemma models' performance.
492
+
493
+ ## **Usage and Limitations**
494
+
495
+ These models have certain limitations that users should be aware of.
496
+
497
+ ### **Intended Usage**
498
+
499
+ Multimodal models (capable of processing vision, language, and/or audio) have a wide range of applications across various industries and domains. The following list of potential uses is not comprehensive. The purpose of this list is to provide contextual information about the possible use-cases that the model creators considered as part of model training and development.
500
+
501
+ * **Content Creation and Communication**
502
+ * **Text Generation**: These models can be used to generate creative text formats such as poems, scripts, code, marketing copy, and email drafts.
503
+ * **Chatbots and Conversational AI**: Power conversational interfaces for customer service, virtual assistants, or interactive applications.
504
+ * **Text Summarization**: Generate concise summaries of a text corpus, research papers, or reports.
505
+ * **Image Data Extraction**: These models can be used to extract, interpret, and summarize visual data for text communications.
506
+ * **Audio Processing and Interaction**: The smaller models (E2B and E4B) can analyze and interpret audio inputs, enabling voice-driven interactions and transcriptions.
507
+ * **Research and Education**
508
+ * **Natural Language Processing (NLP) and VLM Research**: These models can serve as a foundation for researchers to experiment with VLM and NLP techniques, develop algorithms, and contribute to the advancement of the field.
509
+ * **Language Learning Tools**: Support interactive language learning experiences, aiding in grammar correction or providing writing practice.
510
+ * **Knowledge Exploration**: Assist researchers in exploring large bodies of text by generating summaries or answering questions about specific topics.
511
+
512
+ ### **Limitations**
513
+
514
+ * **Training Data**
515
+ * The quality and diversity of the training data significantly influence the model's capabilities. Biases or gaps in the training data can lead to limitations in the model's responses.
516
+ * The scope of the training dataset determines the subject areas the model can handle effectively.
517
+ * **Context and Task Complexity**
518
+ * Models perform well on tasks that can be framed with clear prompts and instructions. Open-ended or highly complex tasks might be challenging.
519
+ * A model's performance can be influenced by the amount of context provided (longer context generally leads to better outputs, up to a certain point).
520
+ * **Language Ambiguity and Nuance**
521
+ * Natural language is inherently complex. Models might struggle to grasp subtle nuances, sarcasm, or figurative language.
522
+ * **Factual Accuracy**
523
+ * Models generate responses based on information they learned from their training datasets, but they are not knowledge bases. They may generate incorrect or outdated factual statements.
524
+ * **Common Sense**
525
+ * Models rely on statistical patterns in language. They might lack the ability to apply common sense reasoning in certain situations.
526
+
527
+ ### **Ethical Considerations and Risks**
528
+
529
+ The development of vision-language models (VLMs) raises several ethical concerns. In creating an open model, we have carefully considered the following:
530
+
531
+ * **Bias and Fairness**
532
+ * VLMs trained on large-scale, real-world text and image data can reflect socio-cultural biases embedded in the training material. Gemma 4 models underwent careful scrutiny, input data pre-processing, and post-training evaluations as reported in this card to help mitigate the risk of these biases.
533
+ * **Misinformation and Misuse**
534
+ * VLMs can be misused to generate text that is false, misleading, or harmful.
535
+ * Guidelines are provided for responsible use with the model, see the [Responsible Generative AI Toolkit](https://ai.google.dev/responsible).
536
+ * **Transparency and Accountability**
537
+ * This model card summarizes details on the models' architecture, capabilities, limitations, and evaluation processes.
538
+ * A responsibly developed open model offers the opportunity to share innovation by making VLM technology accessible to developers and researchers across the AI ecosystem.
539
+
540
+ **Risks identified and mitigations**:
541
+
542
+ * **Generation of harmful content**: Mechanisms and guidelines for content safety are essential. Developers are encouraged to exercise caution and implement appropriate content safety safeguards based on their specific product policies and application use cases.
543
+ * **Misuse for malicious purposes**: Technical limitations and developer and end-user education can help mitigate against malicious applications of VLMs. Educational resources and reporting mechanisms for users to flag misuse are provided.
544
+ * **Privacy violations**: Models were trained on data filtered for removal of certain personal information and other sensitive data. Developers are encouraged to adhere to privacy regulations with privacy-preserving techniques.
545
+ * **Perpetuation of biases**: It's encouraged to perform continuous monitoring (using evaluation metrics, human review) and the exploration of de-biasing techniques during model training, fine-tuning, and other use cases.
546
+
547
+ ### **Benefits**
548
+
549
+ At the time of release, this family of models provides high-performance open vision-language model implementations designed from the ground up for responsible AI development compared to similarly sized models.
chat_template.jinja ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- set add_comma = false -%}
6
+ {%- if key not in standard_keys -%}
7
+ {%- if ns.found_first %},{% endif -%}
8
+ {%- set ns.found_first = true -%}
9
+ {{ key }}:{
10
+ {%- if value['description'] -%}
11
+ description:<|"|>{{ value['description'] }}<|"|>
12
+ {%- set add_comma = true -%}
13
+ {%- endif -%}
14
+ {%- if value['nullable'] %}
15
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
16
+ nullable:true
17
+ {%- endif -%}
18
+ {%- if value['type'] | upper == 'STRING' -%}
19
+ {%- if value['enum'] -%}
20
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
21
+ enum:{{ format_argument(value['enum']) }}
22
+ {%- endif -%}
23
+ {%- elif value['type'] | upper == 'OBJECT' -%}
24
+ ,properties:{
25
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
26
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
27
+ {%- elif value is mapping -%}
28
+ {{- format_parameters(value, value['required'] | default([])) -}}
29
+ {%- endif -%}
30
+ }
31
+ {%- if value['required'] -%}
32
+ ,required:[
33
+ {%- for item in value['required'] | default([]) -%}
34
+ <|"|>{{- item -}}<|"|>
35
+ {%- if not loop.last %},{% endif -%}
36
+ {%- endfor -%}
37
+ ]
38
+ {%- endif -%}
39
+ {%- elif value['type'] | upper == 'ARRAY' -%}
40
+ {%- if value['items'] is mapping and value['items'] -%}
41
+ ,items:{
42
+ {%- set ns_items = namespace(found_first=false) -%}
43
+ {%- for item_key, item_value in value['items'] | dictsort -%}
44
+ {%- if item_value is not none -%}
45
+ {%- if ns_items.found_first %},{% endif -%}
46
+ {%- set ns_items.found_first = true -%}
47
+ {%- if item_key == 'properties' -%}
48
+ properties:{
49
+ {%- if item_value is mapping -%}
50
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
51
+ {%- endif -%}
52
+ }
53
+ {%- elif item_key == 'required' -%}
54
+ required:[
55
+ {%- for req_item in item_value -%}
56
+ <|"|>{{- req_item -}}<|"|>
57
+ {%- if not loop.last %},{% endif -%}
58
+ {%- endfor -%}
59
+ ]
60
+ {%- elif item_key == 'type' -%}
61
+ {%- if item_value is string -%}
62
+ type:{{ format_argument(item_value | upper) }}
63
+ {%- else -%}
64
+ type:{{ format_argument(item_value | map('upper') | list) }}
65
+ {%- endif -%}
66
+ {%- else -%}
67
+ {{ item_key }}:{{ format_argument(item_value) }}
68
+ {%- endif -%}
69
+ {%- endif -%}
70
+ {%- endfor -%}
71
+ }
72
+ {%- endif -%}
73
+ {%- endif -%}
74
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
75
+ type:<|"|>{{ value['type'] | upper }}<|"|>}
76
+ {%- endif -%}
77
+ {%- endfor -%}
78
+ {%- endmacro -%}
79
+ {%- macro format_function_declaration(tool_data) -%}
80
+ declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
81
+ {%- set params = tool_data['function']['parameters'] -%}
82
+ {%- if params -%}
83
+ ,parameters:{
84
+ {%- if params['properties'] -%}
85
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
86
+ {%- endif -%}
87
+ {%- if params['required'] -%}
88
+ required:[
89
+ {%- for item in params['required'] -%}
90
+ <|"|>{{- item -}}<|"|>
91
+ {{- ',' if not loop.last -}}
92
+ {%- endfor -%}
93
+ ],
94
+ {%- endif -%}
95
+ {%- if params['type'] -%}
96
+ type:<|"|>{{- params['type'] | upper -}}<|"|>}
97
+ {%- endif -%}
98
+ {%- endif -%}
99
+ {%- if 'response' in tool_data['function'] -%}
100
+ {%- set response_declaration = tool_data['function']['response'] -%}
101
+ ,response:{
102
+ {%- if response_declaration['description'] -%}
103
+ description:<|"|>{{- response_declaration['description'] -}}<|"|>,
104
+ {%- endif -%}
105
+ {%- if response_declaration['type'] | upper == 'OBJECT' -%}
106
+ type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
107
+ {%- endif -%}
108
+ {%- endif -%}
109
+ }
110
+ {%- endmacro -%}
111
+ {%- macro format_argument(argument, escape_keys=True) -%}
112
+ {%- if argument is string -%}
113
+ {{- '<|"|>' + argument + '<|"|>' -}}
114
+ {%- elif argument is boolean -%}
115
+ {{- 'true' if argument else 'false' -}}
116
+ {%- elif argument is mapping -%}
117
+ {{- '{' -}}
118
+ {%- set ns = namespace(found_first=false) -%}
119
+ {%- for key, value in argument | dictsort -%}
120
+ {%- if ns.found_first %},{% endif -%}
121
+ {%- set ns.found_first = true -%}
122
+ {%- if escape_keys -%}
123
+ {{- '<|"|>' + key + '<|"|>' -}}
124
+ {%- else -%}
125
+ {{- key -}}
126
+ {%- endif -%}
127
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
128
+ {%- endfor -%}
129
+ {{- '}' -}}
130
+ {%- elif argument is sequence -%}
131
+ {{- '[' -}}
132
+ {%- for item in argument -%}
133
+ {{- format_argument(item, escape_keys=escape_keys) -}}
134
+ {%- if not loop.last %},{% endif -%}
135
+ {%- endfor -%}
136
+ {{- ']' -}}
137
+ {%- else -%}
138
+ {{- argument -}}
139
+ {%- endif -%}
140
+ {%- endmacro -%}
141
+ {%- macro strip_thinking(text) -%}
142
+ {%- set ns = namespace(result='') -%}
143
+ {%- for part in text.split('<channel|>') -%}
144
+ {%- if '<|channel>' in part -%}
145
+ {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
146
+ {%- else -%}
147
+ {%- set ns.result = ns.result + part -%}
148
+ {%- endif -%}
149
+ {%- endfor -%}
150
+ {{- ns.result | trim -}}
151
+ {%- endmacro -%}
152
+
153
+ {%- set ns = namespace(prev_message_type=None) -%}
154
+ {%- set loop_messages = messages -%}
155
+ {{ bos_token }}
156
+ {#- Handle System/Tool Definitions Block -#}
157
+ {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
158
+ {{- '<|turn>system\n' -}}
159
+
160
+ {#- Inject Thinking token at the very top of the FIRST system turn -#}
161
+ {%- if enable_thinking is defined and enable_thinking -%}
162
+ {{- '<|think|>' -}}
163
+ {%- set ns.prev_message_type = 'think' -%}
164
+ {%- endif -%}
165
+
166
+ {%- if messages[0]['role'] in ['system', 'developer'] -%}
167
+ {{- messages[0]['content'] | trim -}}
168
+ {%- set loop_messages = messages[1:] -%}
169
+ {%- endif -%}
170
+
171
+ {%- if tools -%}
172
+ {%- for tool in tools %}
173
+ {{- '<|tool>' -}}
174
+ {{- format_function_declaration(tool) | trim -}}
175
+ {{- '<tool|>' -}}
176
+ {%- endfor %}
177
+ {%- set ns.prev_message_type = 'tool' -%}
178
+ {%- endif -%}
179
+
180
+ {{- '<turn|>\n' -}}
181
+ {%- endif %}
182
+
183
+ {#- Loop through messages -#}
184
+ {%- for message in loop_messages -%}
185
+ {%- set ns.prev_message_type = None -%}
186
+ {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
187
+ {{- '<|turn>' + role + '\n' }}
188
+
189
+ {%- if message['tool_calls'] -%}
190
+ {%- for tool_call in message['tool_calls'] -%}
191
+ {%- set function = tool_call['function'] -%}
192
+ {{- '<|tool_call>call:' + function['name'] + '{' -}}
193
+ {%- if function['arguments'] is mapping -%}
194
+ {%- set ns_args = namespace(found_first=false) -%}
195
+ {%- for key, value in function['arguments'] | dictsort -%}
196
+ {%- if ns_args.found_first %},{% endif -%}
197
+ {%- set ns_args.found_first = true -%}
198
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
199
+ {%- endfor -%}
200
+ {%- elif function['arguments'] is string -%}
201
+ {{- function['arguments'] -}}
202
+ {%- endif -%}
203
+ {{- '}<tool_call|>' -}}
204
+ {%- endfor -%}
205
+ {%- set ns.prev_message_type = 'tool_call' -%}
206
+ {%- endif -%}
207
+
208
+ {%- if message['tool_responses'] -%}
209
+ {#- Tool Response handling -#}
210
+ {%- for tool_response in message['tool_responses'] -%}
211
+ {{- '<|tool_response>' -}}
212
+ {%- if tool_response['response'] is mapping -%}
213
+ {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}
214
+ {%- for key, value in tool_response['response'] | dictsort -%}
215
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
216
+ {%- if not loop.last %},{% endif -%}
217
+ {%- endfor -%}
218
+ {{- '}' -}}
219
+ {%- else -%}
220
+ {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}
221
+ {%- endif -%}
222
+ {{- '<tool_response|>' -}}
223
+ {%- endfor -%}
224
+ {%- set ns.prev_message_type = 'tool_response' -%}
225
+ {%- endif -%}
226
+
227
+ {%- if message['content'] is string -%}
228
+ {%- if role == 'model' -%}
229
+ {{- strip_thinking(message['content']) -}}
230
+ {%- else -%}
231
+ {{- message['content'] | trim -}}
232
+ {%- endif -%}
233
+ {%- elif message['content'] is sequence -%}
234
+ {%- for item in message['content'] -%}
235
+ {%- if item['type'] == 'text' -%}
236
+ {%- if role == 'model' -%}
237
+ {{- strip_thinking(item['text']) -}}
238
+ {%- else -%}
239
+ {{- item['text'] | trim -}}
240
+ {%- endif -%}
241
+ {%- elif item['type'] == 'image' -%}
242
+ {{- '\n\n<|image|>\n\n' -}}
243
+ {%- set ns.prev_message_type = 'image' -%}
244
+ {%- elif item['type'] == 'audio' -%}
245
+ {{- '<|audio|>' -}}
246
+ {%- set ns.prev_message_type = 'audio' -%}
247
+ {%- elif item['type'] == 'video' -%}
248
+ {{- '\n\n<|video|>\n\n' -}}
249
+ {%- set ns.prev_message_type = 'video' -%}
250
+ {%- endif -%}
251
+ {%- endfor -%}
252
+ {%- endif -%}
253
+
254
+ {%- if not (message['tool_responses'] and not message['content']) -%}
255
+ {{- '<turn|>\n' -}}
256
+ {%- endif -%}
257
+ {%- endfor -%}
258
+
259
+ {%- if add_generation_prompt -%}
260
+ {%- if ns.prev_message_type != 'tool_response' -%}
261
+ {{- '<|turn>model\n' -}}
262
+ {%- endif -%}
263
+ {%- if not enable_thinking | default(false) -%}
264
+ {{- '<|channel>thought\n<channel|>' -}}
265
+ {%- endif -%}
266
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,1900 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106,
16
+ 50
17
+ ],
18
+ "image_token_id": 258880,
19
+ "initializer_range": 0.02,
20
+ "model_type": "gemma4",
21
+ "quantization": {
22
+ "bits": 8,
23
+ "group_size": 64,
24
+ "language_model.embed_tokens": {
25
+ "bits": 6,
26
+ "group_size": 64
27
+ },
28
+ "language_model.layers.0.mlp.down_proj": {
29
+ "bits": 3,
30
+ "group_size": 64
31
+ },
32
+ "language_model.layers.0.mlp.gate_proj": {
33
+ "bits": 3,
34
+ "group_size": 64
35
+ },
36
+ "language_model.layers.0.mlp.up_proj": {
37
+ "bits": 3,
38
+ "group_size": 64
39
+ },
40
+ "language_model.layers.0.sparse_moe.router.proj": {
41
+ "bits": 3,
42
+ "group_size": 64
43
+ },
44
+ "language_model.layers.1.mlp.down_proj": {
45
+ "bits": 4,
46
+ "group_size": 64
47
+ },
48
+ "language_model.layers.1.mlp.gate_proj": {
49
+ "bits": 4,
50
+ "group_size": 64
51
+ },
52
+ "language_model.layers.1.mlp.up_proj": {
53
+ "bits": 4,
54
+ "group_size": 64
55
+ },
56
+ "language_model.layers.1.sparse_moe.router.proj": {
57
+ "bits": 3,
58
+ "group_size": 64
59
+ },
60
+ "language_model.layers.10.mlp.down_proj": {
61
+ "bits": 3,
62
+ "group_size": 64
63
+ },
64
+ "language_model.layers.10.mlp.gate_proj": {
65
+ "bits": 3,
66
+ "group_size": 64
67
+ },
68
+ "language_model.layers.10.mlp.up_proj": {
69
+ "bits": 3,
70
+ "group_size": 64
71
+ },
72
+ "language_model.layers.10.sparse_moe.router.proj": {
73
+ "bits": 3,
74
+ "group_size": 64
75
+ },
76
+ "language_model.layers.11.mlp.down_proj": {
77
+ "bits": 3,
78
+ "group_size": 64
79
+ },
80
+ "language_model.layers.11.mlp.gate_proj": {
81
+ "bits": 3,
82
+ "group_size": 64
83
+ },
84
+ "language_model.layers.11.mlp.up_proj": {
85
+ "bits": 3,
86
+ "group_size": 64
87
+ },
88
+ "language_model.layers.11.sparse_moe.router.proj": {
89
+ "bits": 3,
90
+ "group_size": 64
91
+ },
92
+ "language_model.layers.12.mlp.down_proj": {
93
+ "bits": 3,
94
+ "group_size": 64
95
+ },
96
+ "language_model.layers.12.mlp.gate_proj": {
97
+ "bits": 3,
98
+ "group_size": 64
99
+ },
100
+ "language_model.layers.12.mlp.up_proj": {
101
+ "bits": 3,
102
+ "group_size": 64
103
+ },
104
+ "language_model.layers.12.sparse_moe.router.proj": {
105
+ "bits": 3,
106
+ "group_size": 64
107
+ },
108
+ "language_model.layers.13.mlp.down_proj": {
109
+ "bits": 3,
110
+ "group_size": 64
111
+ },
112
+ "language_model.layers.13.mlp.gate_proj": {
113
+ "bits": 3,
114
+ "group_size": 64
115
+ },
116
+ "language_model.layers.13.mlp.up_proj": {
117
+ "bits": 3,
118
+ "group_size": 64
119
+ },
120
+ "language_model.layers.13.sparse_moe.router.proj": {
121
+ "bits": 3,
122
+ "group_size": 64
123
+ },
124
+ "language_model.layers.14.mlp.down_proj": {
125
+ "bits": 3,
126
+ "group_size": 64
127
+ },
128
+ "language_model.layers.14.mlp.gate_proj": {
129
+ "bits": 3,
130
+ "group_size": 64
131
+ },
132
+ "language_model.layers.14.mlp.up_proj": {
133
+ "bits": 3,
134
+ "group_size": 64
135
+ },
136
+ "language_model.layers.14.sparse_moe.router.proj": {
137
+ "bits": 3,
138
+ "group_size": 64
139
+ },
140
+ "language_model.layers.15.mlp.down_proj": {
141
+ "bits": 3,
142
+ "group_size": 64
143
+ },
144
+ "language_model.layers.15.mlp.gate_proj": {
145
+ "bits": 3,
146
+ "group_size": 64
147
+ },
148
+ "language_model.layers.15.mlp.up_proj": {
149
+ "bits": 3,
150
+ "group_size": 64
151
+ },
152
+ "language_model.layers.15.sparse_moe.router.proj": {
153
+ "bits": 3,
154
+ "group_size": 64
155
+ },
156
+ "language_model.layers.16.mlp.down_proj": {
157
+ "bits": 3,
158
+ "group_size": 64
159
+ },
160
+ "language_model.layers.16.mlp.gate_proj": {
161
+ "bits": 3,
162
+ "group_size": 64
163
+ },
164
+ "language_model.layers.16.mlp.up_proj": {
165
+ "bits": 3,
166
+ "group_size": 64
167
+ },
168
+ "language_model.layers.16.sparse_moe.router.proj": {
169
+ "bits": 3,
170
+ "group_size": 64
171
+ },
172
+ "language_model.layers.17.mlp.down_proj": {
173
+ "bits": 4,
174
+ "group_size": 64
175
+ },
176
+ "language_model.layers.17.mlp.gate_proj": {
177
+ "bits": 4,
178
+ "group_size": 64
179
+ },
180
+ "language_model.layers.17.mlp.up_proj": {
181
+ "bits": 4,
182
+ "group_size": 64
183
+ },
184
+ "language_model.layers.17.sparse_moe.router.proj": {
185
+ "bits": 3,
186
+ "group_size": 64
187
+ },
188
+ "language_model.layers.18.mlp.down_proj": {
189
+ "bits": 4,
190
+ "group_size": 64
191
+ },
192
+ "language_model.layers.18.mlp.gate_proj": {
193
+ "bits": 4,
194
+ "group_size": 64
195
+ },
196
+ "language_model.layers.18.mlp.up_proj": {
197
+ "bits": 4,
198
+ "group_size": 64
199
+ },
200
+ "language_model.layers.18.sparse_moe.router.proj": {
201
+ "bits": 3,
202
+ "group_size": 64
203
+ },
204
+ "language_model.layers.19.mlp.down_proj": {
205
+ "bits": 4,
206
+ "group_size": 64
207
+ },
208
+ "language_model.layers.19.mlp.gate_proj": {
209
+ "bits": 4,
210
+ "group_size": 64
211
+ },
212
+ "language_model.layers.19.mlp.up_proj": {
213
+ "bits": 4,
214
+ "group_size": 64
215
+ },
216
+ "language_model.layers.19.sparse_moe.router.proj": {
217
+ "bits": 3,
218
+ "group_size": 64
219
+ },
220
+ "language_model.layers.2.mlp.down_proj": {
221
+ "bits": 4,
222
+ "group_size": 64
223
+ },
224
+ "language_model.layers.2.mlp.gate_proj": {
225
+ "bits": 4,
226
+ "group_size": 64
227
+ },
228
+ "language_model.layers.2.mlp.up_proj": {
229
+ "bits": 4,
230
+ "group_size": 64
231
+ },
232
+ "language_model.layers.2.sparse_moe.router.proj": {
233
+ "bits": 3,
234
+ "group_size": 64
235
+ },
236
+ "language_model.layers.20.mlp.down_proj": {
237
+ "bits": 3,
238
+ "group_size": 64
239
+ },
240
+ "language_model.layers.20.mlp.gate_proj": {
241
+ "bits": 3,
242
+ "group_size": 64
243
+ },
244
+ "language_model.layers.20.mlp.up_proj": {
245
+ "bits": 3,
246
+ "group_size": 64
247
+ },
248
+ "language_model.layers.20.sparse_moe.router.proj": {
249
+ "bits": 3,
250
+ "group_size": 64
251
+ },
252
+ "language_model.layers.21.mlp.down_proj": {
253
+ "bits": 3,
254
+ "group_size": 64
255
+ },
256
+ "language_model.layers.21.mlp.gate_proj": {
257
+ "bits": 3,
258
+ "group_size": 64
259
+ },
260
+ "language_model.layers.21.mlp.up_proj": {
261
+ "bits": 3,
262
+ "group_size": 64
263
+ },
264
+ "language_model.layers.21.sparse_moe.router.proj": {
265
+ "bits": 3,
266
+ "group_size": 64
267
+ },
268
+ "language_model.layers.22.mlp.down_proj": {
269
+ "bits": 3,
270
+ "group_size": 64
271
+ },
272
+ "language_model.layers.22.mlp.gate_proj": {
273
+ "bits": 3,
274
+ "group_size": 64
275
+ },
276
+ "language_model.layers.22.mlp.up_proj": {
277
+ "bits": 3,
278
+ "group_size": 64
279
+ },
280
+ "language_model.layers.22.sparse_moe.router.proj": {
281
+ "bits": 3,
282
+ "group_size": 64
283
+ },
284
+ "language_model.layers.23.mlp.down_proj": {
285
+ "bits": 3,
286
+ "group_size": 64
287
+ },
288
+ "language_model.layers.23.mlp.gate_proj": {
289
+ "bits": 3,
290
+ "group_size": 64
291
+ },
292
+ "language_model.layers.23.mlp.up_proj": {
293
+ "bits": 3,
294
+ "group_size": 64
295
+ },
296
+ "language_model.layers.23.sparse_moe.router.proj": {
297
+ "bits": 3,
298
+ "group_size": 64
299
+ },
300
+ "language_model.layers.24.mlp.down_proj": {
301
+ "bits": 3,
302
+ "group_size": 64
303
+ },
304
+ "language_model.layers.24.mlp.gate_proj": {
305
+ "bits": 3,
306
+ "group_size": 64
307
+ },
308
+ "language_model.layers.24.mlp.up_proj": {
309
+ "bits": 3,
310
+ "group_size": 64
311
+ },
312
+ "language_model.layers.24.sparse_moe.router.proj": {
313
+ "bits": 3,
314
+ "group_size": 64
315
+ },
316
+ "language_model.layers.25.mlp.down_proj": {
317
+ "bits": 3,
318
+ "group_size": 64
319
+ },
320
+ "language_model.layers.25.mlp.gate_proj": {
321
+ "bits": 3,
322
+ "group_size": 64
323
+ },
324
+ "language_model.layers.25.mlp.up_proj": {
325
+ "bits": 3,
326
+ "group_size": 64
327
+ },
328
+ "language_model.layers.25.sparse_moe.router.proj": {
329
+ "bits": 3,
330
+ "group_size": 64
331
+ },
332
+ "language_model.layers.26.mlp.down_proj": {
333
+ "bits": 4,
334
+ "group_size": 64
335
+ },
336
+ "language_model.layers.26.mlp.gate_proj": {
337
+ "bits": 4,
338
+ "group_size": 64
339
+ },
340
+ "language_model.layers.26.mlp.up_proj": {
341
+ "bits": 4,
342
+ "group_size": 64
343
+ },
344
+ "language_model.layers.26.sparse_moe.router.proj": {
345
+ "bits": 3,
346
+ "group_size": 64
347
+ },
348
+ "language_model.layers.27.mlp.down_proj": {
349
+ "bits": 4,
350
+ "group_size": 64
351
+ },
352
+ "language_model.layers.27.mlp.gate_proj": {
353
+ "bits": 4,
354
+ "group_size": 64
355
+ },
356
+ "language_model.layers.27.mlp.up_proj": {
357
+ "bits": 4,
358
+ "group_size": 64
359
+ },
360
+ "language_model.layers.27.sparse_moe.router.proj": {
361
+ "bits": 3,
362
+ "group_size": 64
363
+ },
364
+ "language_model.layers.28.mlp.down_proj": {
365
+ "bits": 4,
366
+ "group_size": 64
367
+ },
368
+ "language_model.layers.28.mlp.gate_proj": {
369
+ "bits": 4,
370
+ "group_size": 64
371
+ },
372
+ "language_model.layers.28.mlp.up_proj": {
373
+ "bits": 4,
374
+ "group_size": 64
375
+ },
376
+ "language_model.layers.28.sparse_moe.router.proj": {
377
+ "bits": 3,
378
+ "group_size": 64
379
+ },
380
+ "language_model.layers.29.mlp.down_proj": {
381
+ "bits": 5,
382
+ "group_size": 64
383
+ },
384
+ "language_model.layers.29.mlp.gate_proj": {
385
+ "bits": 5,
386
+ "group_size": 64
387
+ },
388
+ "language_model.layers.29.mlp.up_proj": {
389
+ "bits": 5,
390
+ "group_size": 64
391
+ },
392
+ "language_model.layers.29.sparse_moe.router.proj": {
393
+ "bits": 3,
394
+ "group_size": 64
395
+ },
396
+ "language_model.layers.3.mlp.down_proj": {
397
+ "bits": 4,
398
+ "group_size": 64
399
+ },
400
+ "language_model.layers.3.mlp.gate_proj": {
401
+ "bits": 4,
402
+ "group_size": 64
403
+ },
404
+ "language_model.layers.3.mlp.up_proj": {
405
+ "bits": 4,
406
+ "group_size": 64
407
+ },
408
+ "language_model.layers.3.sparse_moe.router.proj": {
409
+ "bits": 3,
410
+ "group_size": 64
411
+ },
412
+ "language_model.layers.4.mlp.down_proj": {
413
+ "bits": 3,
414
+ "group_size": 64
415
+ },
416
+ "language_model.layers.4.mlp.gate_proj": {
417
+ "bits": 3,
418
+ "group_size": 64
419
+ },
420
+ "language_model.layers.4.mlp.up_proj": {
421
+ "bits": 3,
422
+ "group_size": 64
423
+ },
424
+ "language_model.layers.4.sparse_moe.router.proj": {
425
+ "bits": 3,
426
+ "group_size": 64
427
+ },
428
+ "language_model.layers.5.mlp.down_proj": {
429
+ "bits": 3,
430
+ "group_size": 64
431
+ },
432
+ "language_model.layers.5.mlp.gate_proj": {
433
+ "bits": 3,
434
+ "group_size": 64
435
+ },
436
+ "language_model.layers.5.mlp.up_proj": {
437
+ "bits": 3,
438
+ "group_size": 64
439
+ },
440
+ "language_model.layers.5.sparse_moe.router.proj": {
441
+ "bits": 3,
442
+ "group_size": 64
443
+ },
444
+ "language_model.layers.6.mlp.down_proj": {
445
+ "bits": 3,
446
+ "group_size": 64
447
+ },
448
+ "language_model.layers.6.mlp.gate_proj": {
449
+ "bits": 3,
450
+ "group_size": 64
451
+ },
452
+ "language_model.layers.6.mlp.up_proj": {
453
+ "bits": 3,
454
+ "group_size": 64
455
+ },
456
+ "language_model.layers.6.sparse_moe.router.proj": {
457
+ "bits": 3,
458
+ "group_size": 64
459
+ },
460
+ "language_model.layers.7.mlp.down_proj": {
461
+ "bits": 3,
462
+ "group_size": 64
463
+ },
464
+ "language_model.layers.7.mlp.gate_proj": {
465
+ "bits": 3,
466
+ "group_size": 64
467
+ },
468
+ "language_model.layers.7.mlp.up_proj": {
469
+ "bits": 3,
470
+ "group_size": 64
471
+ },
472
+ "language_model.layers.7.sparse_moe.router.proj": {
473
+ "bits": 3,
474
+ "group_size": 64
475
+ },
476
+ "language_model.layers.8.mlp.down_proj": {
477
+ "bits": 3,
478
+ "group_size": 64
479
+ },
480
+ "language_model.layers.8.mlp.gate_proj": {
481
+ "bits": 3,
482
+ "group_size": 64
483
+ },
484
+ "language_model.layers.8.mlp.up_proj": {
485
+ "bits": 3,
486
+ "group_size": 64
487
+ },
488
+ "language_model.layers.8.sparse_moe.router.proj": {
489
+ "bits": 3,
490
+ "group_size": 64
491
+ },
492
+ "language_model.layers.9.mlp.down_proj": {
493
+ "bits": 3,
494
+ "group_size": 64
495
+ },
496
+ "language_model.layers.9.mlp.gate_proj": {
497
+ "bits": 3,
498
+ "group_size": 64
499
+ },
500
+ "language_model.layers.9.mlp.up_proj": {
501
+ "bits": 3,
502
+ "group_size": 64
503
+ },
504
+ "language_model.layers.9.sparse_moe.router.proj": {
505
+ "bits": 3,
506
+ "group_size": 64
507
+ }
508
+ },
509
+ "quantization_config": {
510
+ "bits": 3,
511
+ "group_size": 64,
512
+ "language_model.model.embed_tokens": {
513
+ "bits": 6,
514
+ "group_size": 64
515
+ },
516
+ "language_model.model.layers.0.experts.switch_glu.down_proj": {
517
+ "bits": 3,
518
+ "group_size": 64
519
+ },
520
+ "language_model.model.layers.0.experts.switch_glu.gate_proj": {
521
+ "bits": 3,
522
+ "group_size": 64
523
+ },
524
+ "language_model.model.layers.0.experts.switch_glu.up_proj": {
525
+ "bits": 3,
526
+ "group_size": 64
527
+ },
528
+ "language_model.model.layers.0.mlp.down_proj": {
529
+ "bits": 3,
530
+ "group_size": 64
531
+ },
532
+ "language_model.model.layers.0.mlp.gate_proj": {
533
+ "bits": 3,
534
+ "group_size": 64
535
+ },
536
+ "language_model.model.layers.0.mlp.up_proj": {
537
+ "bits": 3,
538
+ "group_size": 64
539
+ },
540
+ "language_model.model.layers.0.router.proj": {
541
+ "bits": 3,
542
+ "group_size": 64
543
+ },
544
+ "language_model.model.layers.0.self_attn.k_proj": {
545
+ "bits": 8,
546
+ "group_size": 64
547
+ },
548
+ "language_model.model.layers.0.self_attn.o_proj": {
549
+ "bits": 8,
550
+ "group_size": 64
551
+ },
552
+ "language_model.model.layers.0.self_attn.q_proj": {
553
+ "bits": 8,
554
+ "group_size": 64
555
+ },
556
+ "language_model.model.layers.0.self_attn.v_proj": {
557
+ "bits": 8,
558
+ "group_size": 64
559
+ },
560
+ "language_model.model.layers.1.experts.switch_glu.down_proj": {
561
+ "bits": 3,
562
+ "group_size": 64
563
+ },
564
+ "language_model.model.layers.1.experts.switch_glu.gate_proj": {
565
+ "bits": 3,
566
+ "group_size": 64
567
+ },
568
+ "language_model.model.layers.1.experts.switch_glu.up_proj": {
569
+ "bits": 3,
570
+ "group_size": 64
571
+ },
572
+ "language_model.model.layers.1.mlp.down_proj": {
573
+ "bits": 4,
574
+ "group_size": 64
575
+ },
576
+ "language_model.model.layers.1.mlp.gate_proj": {
577
+ "bits": 4,
578
+ "group_size": 64
579
+ },
580
+ "language_model.model.layers.1.mlp.up_proj": {
581
+ "bits": 4,
582
+ "group_size": 64
583
+ },
584
+ "language_model.model.layers.1.router.proj": {
585
+ "bits": 3,
586
+ "group_size": 64
587
+ },
588
+ "language_model.model.layers.1.self_attn.k_proj": {
589
+ "bits": 8,
590
+ "group_size": 64
591
+ },
592
+ "language_model.model.layers.1.self_attn.o_proj": {
593
+ "bits": 8,
594
+ "group_size": 64
595
+ },
596
+ "language_model.model.layers.1.self_attn.q_proj": {
597
+ "bits": 8,
598
+ "group_size": 64
599
+ },
600
+ "language_model.model.layers.1.self_attn.v_proj": {
601
+ "bits": 8,
602
+ "group_size": 64
603
+ },
604
+ "language_model.model.layers.10.experts.switch_glu.down_proj": {
605
+ "bits": 3,
606
+ "group_size": 64
607
+ },
608
+ "language_model.model.layers.10.experts.switch_glu.gate_proj": {
609
+ "bits": 3,
610
+ "group_size": 64
611
+ },
612
+ "language_model.model.layers.10.experts.switch_glu.up_proj": {
613
+ "bits": 3,
614
+ "group_size": 64
615
+ },
616
+ "language_model.model.layers.10.mlp.down_proj": {
617
+ "bits": 3,
618
+ "group_size": 64
619
+ },
620
+ "language_model.model.layers.10.mlp.gate_proj": {
621
+ "bits": 3,
622
+ "group_size": 64
623
+ },
624
+ "language_model.model.layers.10.mlp.up_proj": {
625
+ "bits": 3,
626
+ "group_size": 64
627
+ },
628
+ "language_model.model.layers.10.router.proj": {
629
+ "bits": 3,
630
+ "group_size": 64
631
+ },
632
+ "language_model.model.layers.10.self_attn.k_proj": {
633
+ "bits": 8,
634
+ "group_size": 64
635
+ },
636
+ "language_model.model.layers.10.self_attn.o_proj": {
637
+ "bits": 8,
638
+ "group_size": 64
639
+ },
640
+ "language_model.model.layers.10.self_attn.q_proj": {
641
+ "bits": 8,
642
+ "group_size": 64
643
+ },
644
+ "language_model.model.layers.10.self_attn.v_proj": {
645
+ "bits": 8,
646
+ "group_size": 64
647
+ },
648
+ "language_model.model.layers.11.experts.switch_glu.down_proj": {
649
+ "bits": 3,
650
+ "group_size": 64
651
+ },
652
+ "language_model.model.layers.11.experts.switch_glu.gate_proj": {
653
+ "bits": 3,
654
+ "group_size": 64
655
+ },
656
+ "language_model.model.layers.11.experts.switch_glu.up_proj": {
657
+ "bits": 3,
658
+ "group_size": 64
659
+ },
660
+ "language_model.model.layers.11.mlp.down_proj": {
661
+ "bits": 3,
662
+ "group_size": 64
663
+ },
664
+ "language_model.model.layers.11.mlp.gate_proj": {
665
+ "bits": 3,
666
+ "group_size": 64
667
+ },
668
+ "language_model.model.layers.11.mlp.up_proj": {
669
+ "bits": 3,
670
+ "group_size": 64
671
+ },
672
+ "language_model.model.layers.11.router.proj": {
673
+ "bits": 3,
674
+ "group_size": 64
675
+ },
676
+ "language_model.model.layers.11.self_attn.k_proj": {
677
+ "bits": 8,
678
+ "group_size": 64
679
+ },
680
+ "language_model.model.layers.11.self_attn.o_proj": {
681
+ "bits": 8,
682
+ "group_size": 64
683
+ },
684
+ "language_model.model.layers.11.self_attn.q_proj": {
685
+ "bits": 8,
686
+ "group_size": 64
687
+ },
688
+ "language_model.model.layers.12.experts.switch_glu.down_proj": {
689
+ "bits": 3,
690
+ "group_size": 64
691
+ },
692
+ "language_model.model.layers.12.experts.switch_glu.gate_proj": {
693
+ "bits": 3,
694
+ "group_size": 64
695
+ },
696
+ "language_model.model.layers.12.experts.switch_glu.up_proj": {
697
+ "bits": 3,
698
+ "group_size": 64
699
+ },
700
+ "language_model.model.layers.12.mlp.down_proj": {
701
+ "bits": 3,
702
+ "group_size": 64
703
+ },
704
+ "language_model.model.layers.12.mlp.gate_proj": {
705
+ "bits": 3,
706
+ "group_size": 64
707
+ },
708
+ "language_model.model.layers.12.mlp.up_proj": {
709
+ "bits": 3,
710
+ "group_size": 64
711
+ },
712
+ "language_model.model.layers.12.router.proj": {
713
+ "bits": 3,
714
+ "group_size": 64
715
+ },
716
+ "language_model.model.layers.12.self_attn.k_proj": {
717
+ "bits": 8,
718
+ "group_size": 64
719
+ },
720
+ "language_model.model.layers.12.self_attn.o_proj": {
721
+ "bits": 8,
722
+ "group_size": 64
723
+ },
724
+ "language_model.model.layers.12.self_attn.q_proj": {
725
+ "bits": 8,
726
+ "group_size": 64
727
+ },
728
+ "language_model.model.layers.12.self_attn.v_proj": {
729
+ "bits": 8,
730
+ "group_size": 64
731
+ },
732
+ "language_model.model.layers.13.experts.switch_glu.down_proj": {
733
+ "bits": 3,
734
+ "group_size": 64
735
+ },
736
+ "language_model.model.layers.13.experts.switch_glu.gate_proj": {
737
+ "bits": 3,
738
+ "group_size": 64
739
+ },
740
+ "language_model.model.layers.13.experts.switch_glu.up_proj": {
741
+ "bits": 3,
742
+ "group_size": 64
743
+ },
744
+ "language_model.model.layers.13.mlp.down_proj": {
745
+ "bits": 3,
746
+ "group_size": 64
747
+ },
748
+ "language_model.model.layers.13.mlp.gate_proj": {
749
+ "bits": 3,
750
+ "group_size": 64
751
+ },
752
+ "language_model.model.layers.13.mlp.up_proj": {
753
+ "bits": 3,
754
+ "group_size": 64
755
+ },
756
+ "language_model.model.layers.13.router.proj": {
757
+ "bits": 3,
758
+ "group_size": 64
759
+ },
760
+ "language_model.model.layers.13.self_attn.k_proj": {
761
+ "bits": 8,
762
+ "group_size": 64
763
+ },
764
+ "language_model.model.layers.13.self_attn.o_proj": {
765
+ "bits": 8,
766
+ "group_size": 64
767
+ },
768
+ "language_model.model.layers.13.self_attn.q_proj": {
769
+ "bits": 8,
770
+ "group_size": 64
771
+ },
772
+ "language_model.model.layers.13.self_attn.v_proj": {
773
+ "bits": 8,
774
+ "group_size": 64
775
+ },
776
+ "language_model.model.layers.14.experts.switch_glu.down_proj": {
777
+ "bits": 3,
778
+ "group_size": 64
779
+ },
780
+ "language_model.model.layers.14.experts.switch_glu.gate_proj": {
781
+ "bits": 3,
782
+ "group_size": 64
783
+ },
784
+ "language_model.model.layers.14.experts.switch_glu.up_proj": {
785
+ "bits": 3,
786
+ "group_size": 64
787
+ },
788
+ "language_model.model.layers.14.mlp.down_proj": {
789
+ "bits": 3,
790
+ "group_size": 64
791
+ },
792
+ "language_model.model.layers.14.mlp.gate_proj": {
793
+ "bits": 3,
794
+ "group_size": 64
795
+ },
796
+ "language_model.model.layers.14.mlp.up_proj": {
797
+ "bits": 3,
798
+ "group_size": 64
799
+ },
800
+ "language_model.model.layers.14.router.proj": {
801
+ "bits": 3,
802
+ "group_size": 64
803
+ },
804
+ "language_model.model.layers.14.self_attn.k_proj": {
805
+ "bits": 8,
806
+ "group_size": 64
807
+ },
808
+ "language_model.model.layers.14.self_attn.o_proj": {
809
+ "bits": 8,
810
+ "group_size": 64
811
+ },
812
+ "language_model.model.layers.14.self_attn.q_proj": {
813
+ "bits": 8,
814
+ "group_size": 64
815
+ },
816
+ "language_model.model.layers.14.self_attn.v_proj": {
817
+ "bits": 8,
818
+ "group_size": 64
819
+ },
820
+ "language_model.model.layers.15.experts.switch_glu.down_proj": {
821
+ "bits": 3,
822
+ "group_size": 64
823
+ },
824
+ "language_model.model.layers.15.experts.switch_glu.gate_proj": {
825
+ "bits": 3,
826
+ "group_size": 64
827
+ },
828
+ "language_model.model.layers.15.experts.switch_glu.up_proj": {
829
+ "bits": 3,
830
+ "group_size": 64
831
+ },
832
+ "language_model.model.layers.15.mlp.down_proj": {
833
+ "bits": 3,
834
+ "group_size": 64
835
+ },
836
+ "language_model.model.layers.15.mlp.gate_proj": {
837
+ "bits": 3,
838
+ "group_size": 64
839
+ },
840
+ "language_model.model.layers.15.mlp.up_proj": {
841
+ "bits": 3,
842
+ "group_size": 64
843
+ },
844
+ "language_model.model.layers.15.router.proj": {
845
+ "bits": 3,
846
+ "group_size": 64
847
+ },
848
+ "language_model.model.layers.15.self_attn.k_proj": {
849
+ "bits": 8,
850
+ "group_size": 64
851
+ },
852
+ "language_model.model.layers.15.self_attn.o_proj": {
853
+ "bits": 8,
854
+ "group_size": 64
855
+ },
856
+ "language_model.model.layers.15.self_attn.q_proj": {
857
+ "bits": 8,
858
+ "group_size": 64
859
+ },
860
+ "language_model.model.layers.15.self_attn.v_proj": {
861
+ "bits": 8,
862
+ "group_size": 64
863
+ },
864
+ "language_model.model.layers.16.experts.switch_glu.down_proj": {
865
+ "bits": 3,
866
+ "group_size": 64
867
+ },
868
+ "language_model.model.layers.16.experts.switch_glu.gate_proj": {
869
+ "bits": 3,
870
+ "group_size": 64
871
+ },
872
+ "language_model.model.layers.16.experts.switch_glu.up_proj": {
873
+ "bits": 3,
874
+ "group_size": 64
875
+ },
876
+ "language_model.model.layers.16.mlp.down_proj": {
877
+ "bits": 3,
878
+ "group_size": 64
879
+ },
880
+ "language_model.model.layers.16.mlp.gate_proj": {
881
+ "bits": 3,
882
+ "group_size": 64
883
+ },
884
+ "language_model.model.layers.16.mlp.up_proj": {
885
+ "bits": 3,
886
+ "group_size": 64
887
+ },
888
+ "language_model.model.layers.16.router.proj": {
889
+ "bits": 3,
890
+ "group_size": 64
891
+ },
892
+ "language_model.model.layers.16.self_attn.k_proj": {
893
+ "bits": 8,
894
+ "group_size": 64
895
+ },
896
+ "language_model.model.layers.16.self_attn.o_proj": {
897
+ "bits": 8,
898
+ "group_size": 64
899
+ },
900
+ "language_model.model.layers.16.self_attn.q_proj": {
901
+ "bits": 8,
902
+ "group_size": 64
903
+ },
904
+ "language_model.model.layers.16.self_attn.v_proj": {
905
+ "bits": 8,
906
+ "group_size": 64
907
+ },
908
+ "language_model.model.layers.17.experts.switch_glu.down_proj": {
909
+ "bits": 3,
910
+ "group_size": 64
911
+ },
912
+ "language_model.model.layers.17.experts.switch_glu.gate_proj": {
913
+ "bits": 3,
914
+ "group_size": 64
915
+ },
916
+ "language_model.model.layers.17.experts.switch_glu.up_proj": {
917
+ "bits": 3,
918
+ "group_size": 64
919
+ },
920
+ "language_model.model.layers.17.mlp.down_proj": {
921
+ "bits": 4,
922
+ "group_size": 64
923
+ },
924
+ "language_model.model.layers.17.mlp.gate_proj": {
925
+ "bits": 4,
926
+ "group_size": 64
927
+ },
928
+ "language_model.model.layers.17.mlp.up_proj": {
929
+ "bits": 4,
930
+ "group_size": 64
931
+ },
932
+ "language_model.model.layers.17.router.proj": {
933
+ "bits": 3,
934
+ "group_size": 64
935
+ },
936
+ "language_model.model.layers.17.self_attn.k_proj": {
937
+ "bits": 8,
938
+ "group_size": 64
939
+ },
940
+ "language_model.model.layers.17.self_attn.o_proj": {
941
+ "bits": 8,
942
+ "group_size": 64
943
+ },
944
+ "language_model.model.layers.17.self_attn.q_proj": {
945
+ "bits": 8,
946
+ "group_size": 64
947
+ },
948
+ "language_model.model.layers.18.experts.switch_glu.down_proj": {
949
+ "bits": 3,
950
+ "group_size": 64
951
+ },
952
+ "language_model.model.layers.18.experts.switch_glu.gate_proj": {
953
+ "bits": 3,
954
+ "group_size": 64
955
+ },
956
+ "language_model.model.layers.18.experts.switch_glu.up_proj": {
957
+ "bits": 3,
958
+ "group_size": 64
959
+ },
960
+ "language_model.model.layers.18.mlp.down_proj": {
961
+ "bits": 4,
962
+ "group_size": 64
963
+ },
964
+ "language_model.model.layers.18.mlp.gate_proj": {
965
+ "bits": 4,
966
+ "group_size": 64
967
+ },
968
+ "language_model.model.layers.18.mlp.up_proj": {
969
+ "bits": 4,
970
+ "group_size": 64
971
+ },
972
+ "language_model.model.layers.18.router.proj": {
973
+ "bits": 3,
974
+ "group_size": 64
975
+ },
976
+ "language_model.model.layers.18.self_attn.k_proj": {
977
+ "bits": 8,
978
+ "group_size": 64
979
+ },
980
+ "language_model.model.layers.18.self_attn.o_proj": {
981
+ "bits": 8,
982
+ "group_size": 64
983
+ },
984
+ "language_model.model.layers.18.self_attn.q_proj": {
985
+ "bits": 8,
986
+ "group_size": 64
987
+ },
988
+ "language_model.model.layers.18.self_attn.v_proj": {
989
+ "bits": 8,
990
+ "group_size": 64
991
+ },
992
+ "language_model.model.layers.19.experts.switch_glu.down_proj": {
993
+ "bits": 3,
994
+ "group_size": 64
995
+ },
996
+ "language_model.model.layers.19.experts.switch_glu.gate_proj": {
997
+ "bits": 3,
998
+ "group_size": 64
999
+ },
1000
+ "language_model.model.layers.19.experts.switch_glu.up_proj": {
1001
+ "bits": 3,
1002
+ "group_size": 64
1003
+ },
1004
+ "language_model.model.layers.19.mlp.down_proj": {
1005
+ "bits": 4,
1006
+ "group_size": 64
1007
+ },
1008
+ "language_model.model.layers.19.mlp.gate_proj": {
1009
+ "bits": 4,
1010
+ "group_size": 64
1011
+ },
1012
+ "language_model.model.layers.19.mlp.up_proj": {
1013
+ "bits": 4,
1014
+ "group_size": 64
1015
+ },
1016
+ "language_model.model.layers.19.router.proj": {
1017
+ "bits": 3,
1018
+ "group_size": 64
1019
+ },
1020
+ "language_model.model.layers.19.self_attn.k_proj": {
1021
+ "bits": 8,
1022
+ "group_size": 64
1023
+ },
1024
+ "language_model.model.layers.19.self_attn.o_proj": {
1025
+ "bits": 8,
1026
+ "group_size": 64
1027
+ },
1028
+ "language_model.model.layers.19.self_attn.q_proj": {
1029
+ "bits": 8,
1030
+ "group_size": 64
1031
+ },
1032
+ "language_model.model.layers.19.self_attn.v_proj": {
1033
+ "bits": 8,
1034
+ "group_size": 64
1035
+ },
1036
+ "language_model.model.layers.2.experts.switch_glu.down_proj": {
1037
+ "bits": 3,
1038
+ "group_size": 64
1039
+ },
1040
+ "language_model.model.layers.2.experts.switch_glu.gate_proj": {
1041
+ "bits": 3,
1042
+ "group_size": 64
1043
+ },
1044
+ "language_model.model.layers.2.experts.switch_glu.up_proj": {
1045
+ "bits": 3,
1046
+ "group_size": 64
1047
+ },
1048
+ "language_model.model.layers.2.mlp.down_proj": {
1049
+ "bits": 4,
1050
+ "group_size": 64
1051
+ },
1052
+ "language_model.model.layers.2.mlp.gate_proj": {
1053
+ "bits": 4,
1054
+ "group_size": 64
1055
+ },
1056
+ "language_model.model.layers.2.mlp.up_proj": {
1057
+ "bits": 4,
1058
+ "group_size": 64
1059
+ },
1060
+ "language_model.model.layers.2.router.proj": {
1061
+ "bits": 3,
1062
+ "group_size": 64
1063
+ },
1064
+ "language_model.model.layers.2.self_attn.k_proj": {
1065
+ "bits": 8,
1066
+ "group_size": 64
1067
+ },
1068
+ "language_model.model.layers.2.self_attn.o_proj": {
1069
+ "bits": 8,
1070
+ "group_size": 64
1071
+ },
1072
+ "language_model.model.layers.2.self_attn.q_proj": {
1073
+ "bits": 8,
1074
+ "group_size": 64
1075
+ },
1076
+ "language_model.model.layers.2.self_attn.v_proj": {
1077
+ "bits": 8,
1078
+ "group_size": 64
1079
+ },
1080
+ "language_model.model.layers.20.experts.switch_glu.down_proj": {
1081
+ "bits": 3,
1082
+ "group_size": 64
1083
+ },
1084
+ "language_model.model.layers.20.experts.switch_glu.gate_proj": {
1085
+ "bits": 3,
1086
+ "group_size": 64
1087
+ },
1088
+ "language_model.model.layers.20.experts.switch_glu.up_proj": {
1089
+ "bits": 3,
1090
+ "group_size": 64
1091
+ },
1092
+ "language_model.model.layers.20.mlp.down_proj": {
1093
+ "bits": 3,
1094
+ "group_size": 64
1095
+ },
1096
+ "language_model.model.layers.20.mlp.gate_proj": {
1097
+ "bits": 3,
1098
+ "group_size": 64
1099
+ },
1100
+ "language_model.model.layers.20.mlp.up_proj": {
1101
+ "bits": 3,
1102
+ "group_size": 64
1103
+ },
1104
+ "language_model.model.layers.20.router.proj": {
1105
+ "bits": 3,
1106
+ "group_size": 64
1107
+ },
1108
+ "language_model.model.layers.20.self_attn.k_proj": {
1109
+ "bits": 8,
1110
+ "group_size": 64
1111
+ },
1112
+ "language_model.model.layers.20.self_attn.o_proj": {
1113
+ "bits": 8,
1114
+ "group_size": 64
1115
+ },
1116
+ "language_model.model.layers.20.self_attn.q_proj": {
1117
+ "bits": 8,
1118
+ "group_size": 64
1119
+ },
1120
+ "language_model.model.layers.20.self_attn.v_proj": {
1121
+ "bits": 8,
1122
+ "group_size": 64
1123
+ },
1124
+ "language_model.model.layers.21.experts.switch_glu.down_proj": {
1125
+ "bits": 3,
1126
+ "group_size": 64
1127
+ },
1128
+ "language_model.model.layers.21.experts.switch_glu.gate_proj": {
1129
+ "bits": 3,
1130
+ "group_size": 64
1131
+ },
1132
+ "language_model.model.layers.21.experts.switch_glu.up_proj": {
1133
+ "bits": 3,
1134
+ "group_size": 64
1135
+ },
1136
+ "language_model.model.layers.21.mlp.down_proj": {
1137
+ "bits": 3,
1138
+ "group_size": 64
1139
+ },
1140
+ "language_model.model.layers.21.mlp.gate_proj": {
1141
+ "bits": 3,
1142
+ "group_size": 64
1143
+ },
1144
+ "language_model.model.layers.21.mlp.up_proj": {
1145
+ "bits": 3,
1146
+ "group_size": 64
1147
+ },
1148
+ "language_model.model.layers.21.router.proj": {
1149
+ "bits": 3,
1150
+ "group_size": 64
1151
+ },
1152
+ "language_model.model.layers.21.self_attn.k_proj": {
1153
+ "bits": 8,
1154
+ "group_size": 64
1155
+ },
1156
+ "language_model.model.layers.21.self_attn.o_proj": {
1157
+ "bits": 8,
1158
+ "group_size": 64
1159
+ },
1160
+ "language_model.model.layers.21.self_attn.q_proj": {
1161
+ "bits": 8,
1162
+ "group_size": 64
1163
+ },
1164
+ "language_model.model.layers.21.self_attn.v_proj": {
1165
+ "bits": 8,
1166
+ "group_size": 64
1167
+ },
1168
+ "language_model.model.layers.22.experts.switch_glu.down_proj": {
1169
+ "bits": 3,
1170
+ "group_size": 64
1171
+ },
1172
+ "language_model.model.layers.22.experts.switch_glu.gate_proj": {
1173
+ "bits": 3,
1174
+ "group_size": 64
1175
+ },
1176
+ "language_model.model.layers.22.experts.switch_glu.up_proj": {
1177
+ "bits": 3,
1178
+ "group_size": 64
1179
+ },
1180
+ "language_model.model.layers.22.mlp.down_proj": {
1181
+ "bits": 3,
1182
+ "group_size": 64
1183
+ },
1184
+ "language_model.model.layers.22.mlp.gate_proj": {
1185
+ "bits": 3,
1186
+ "group_size": 64
1187
+ },
1188
+ "language_model.model.layers.22.mlp.up_proj": {
1189
+ "bits": 3,
1190
+ "group_size": 64
1191
+ },
1192
+ "language_model.model.layers.22.router.proj": {
1193
+ "bits": 3,
1194
+ "group_size": 64
1195
+ },
1196
+ "language_model.model.layers.22.self_attn.k_proj": {
1197
+ "bits": 8,
1198
+ "group_size": 64
1199
+ },
1200
+ "language_model.model.layers.22.self_attn.o_proj": {
1201
+ "bits": 8,
1202
+ "group_size": 64
1203
+ },
1204
+ "language_model.model.layers.22.self_attn.q_proj": {
1205
+ "bits": 8,
1206
+ "group_size": 64
1207
+ },
1208
+ "language_model.model.layers.22.self_attn.v_proj": {
1209
+ "bits": 8,
1210
+ "group_size": 64
1211
+ },
1212
+ "language_model.model.layers.23.experts.switch_glu.down_proj": {
1213
+ "bits": 3,
1214
+ "group_size": 64
1215
+ },
1216
+ "language_model.model.layers.23.experts.switch_glu.gate_proj": {
1217
+ "bits": 3,
1218
+ "group_size": 64
1219
+ },
1220
+ "language_model.model.layers.23.experts.switch_glu.up_proj": {
1221
+ "bits": 3,
1222
+ "group_size": 64
1223
+ },
1224
+ "language_model.model.layers.23.mlp.down_proj": {
1225
+ "bits": 3,
1226
+ "group_size": 64
1227
+ },
1228
+ "language_model.model.layers.23.mlp.gate_proj": {
1229
+ "bits": 3,
1230
+ "group_size": 64
1231
+ },
1232
+ "language_model.model.layers.23.mlp.up_proj": {
1233
+ "bits": 3,
1234
+ "group_size": 64
1235
+ },
1236
+ "language_model.model.layers.23.router.proj": {
1237
+ "bits": 3,
1238
+ "group_size": 64
1239
+ },
1240
+ "language_model.model.layers.23.self_attn.k_proj": {
1241
+ "bits": 8,
1242
+ "group_size": 64
1243
+ },
1244
+ "language_model.model.layers.23.self_attn.o_proj": {
1245
+ "bits": 8,
1246
+ "group_size": 64
1247
+ },
1248
+ "language_model.model.layers.23.self_attn.q_proj": {
1249
+ "bits": 8,
1250
+ "group_size": 64
1251
+ },
1252
+ "language_model.model.layers.24.experts.switch_glu.down_proj": {
1253
+ "bits": 3,
1254
+ "group_size": 64
1255
+ },
1256
+ "language_model.model.layers.24.experts.switch_glu.gate_proj": {
1257
+ "bits": 3,
1258
+ "group_size": 64
1259
+ },
1260
+ "language_model.model.layers.24.experts.switch_glu.up_proj": {
1261
+ "bits": 3,
1262
+ "group_size": 64
1263
+ },
1264
+ "language_model.model.layers.24.mlp.down_proj": {
1265
+ "bits": 3,
1266
+ "group_size": 64
1267
+ },
1268
+ "language_model.model.layers.24.mlp.gate_proj": {
1269
+ "bits": 3,
1270
+ "group_size": 64
1271
+ },
1272
+ "language_model.model.layers.24.mlp.up_proj": {
1273
+ "bits": 3,
1274
+ "group_size": 64
1275
+ },
1276
+ "language_model.model.layers.24.router.proj": {
1277
+ "bits": 3,
1278
+ "group_size": 64
1279
+ },
1280
+ "language_model.model.layers.24.self_attn.k_proj": {
1281
+ "bits": 8,
1282
+ "group_size": 64
1283
+ },
1284
+ "language_model.model.layers.24.self_attn.o_proj": {
1285
+ "bits": 8,
1286
+ "group_size": 64
1287
+ },
1288
+ "language_model.model.layers.24.self_attn.q_proj": {
1289
+ "bits": 8,
1290
+ "group_size": 64
1291
+ },
1292
+ "language_model.model.layers.24.self_attn.v_proj": {
1293
+ "bits": 8,
1294
+ "group_size": 64
1295
+ },
1296
+ "language_model.model.layers.25.experts.switch_glu.down_proj": {
1297
+ "bits": 3,
1298
+ "group_size": 64
1299
+ },
1300
+ "language_model.model.layers.25.experts.switch_glu.gate_proj": {
1301
+ "bits": 3,
1302
+ "group_size": 64
1303
+ },
1304
+ "language_model.model.layers.25.experts.switch_glu.up_proj": {
1305
+ "bits": 3,
1306
+ "group_size": 64
1307
+ },
1308
+ "language_model.model.layers.25.mlp.down_proj": {
1309
+ "bits": 3,
1310
+ "group_size": 64
1311
+ },
1312
+ "language_model.model.layers.25.mlp.gate_proj": {
1313
+ "bits": 3,
1314
+ "group_size": 64
1315
+ },
1316
+ "language_model.model.layers.25.mlp.up_proj": {
1317
+ "bits": 3,
1318
+ "group_size": 64
1319
+ },
1320
+ "language_model.model.layers.25.router.proj": {
1321
+ "bits": 3,
1322
+ "group_size": 64
1323
+ },
1324
+ "language_model.model.layers.25.self_attn.k_proj": {
1325
+ "bits": 8,
1326
+ "group_size": 64
1327
+ },
1328
+ "language_model.model.layers.25.self_attn.o_proj": {
1329
+ "bits": 8,
1330
+ "group_size": 64
1331
+ },
1332
+ "language_model.model.layers.25.self_attn.q_proj": {
1333
+ "bits": 8,
1334
+ "group_size": 64
1335
+ },
1336
+ "language_model.model.layers.25.self_attn.v_proj": {
1337
+ "bits": 8,
1338
+ "group_size": 64
1339
+ },
1340
+ "language_model.model.layers.26.experts.switch_glu.down_proj": {
1341
+ "bits": 3,
1342
+ "group_size": 64
1343
+ },
1344
+ "language_model.model.layers.26.experts.switch_glu.gate_proj": {
1345
+ "bits": 3,
1346
+ "group_size": 64
1347
+ },
1348
+ "language_model.model.layers.26.experts.switch_glu.up_proj": {
1349
+ "bits": 3,
1350
+ "group_size": 64
1351
+ },
1352
+ "language_model.model.layers.26.mlp.down_proj": {
1353
+ "bits": 4,
1354
+ "group_size": 64
1355
+ },
1356
+ "language_model.model.layers.26.mlp.gate_proj": {
1357
+ "bits": 4,
1358
+ "group_size": 64
1359
+ },
1360
+ "language_model.model.layers.26.mlp.up_proj": {
1361
+ "bits": 4,
1362
+ "group_size": 64
1363
+ },
1364
+ "language_model.model.layers.26.router.proj": {
1365
+ "bits": 3,
1366
+ "group_size": 64
1367
+ },
1368
+ "language_model.model.layers.26.self_attn.k_proj": {
1369
+ "bits": 8,
1370
+ "group_size": 64
1371
+ },
1372
+ "language_model.model.layers.26.self_attn.o_proj": {
1373
+ "bits": 8,
1374
+ "group_size": 64
1375
+ },
1376
+ "language_model.model.layers.26.self_attn.q_proj": {
1377
+ "bits": 8,
1378
+ "group_size": 64
1379
+ },
1380
+ "language_model.model.layers.26.self_attn.v_proj": {
1381
+ "bits": 8,
1382
+ "group_size": 64
1383
+ },
1384
+ "language_model.model.layers.27.experts.switch_glu.down_proj": {
1385
+ "bits": 3,
1386
+ "group_size": 64
1387
+ },
1388
+ "language_model.model.layers.27.experts.switch_glu.gate_proj": {
1389
+ "bits": 3,
1390
+ "group_size": 64
1391
+ },
1392
+ "language_model.model.layers.27.experts.switch_glu.up_proj": {
1393
+ "bits": 3,
1394
+ "group_size": 64
1395
+ },
1396
+ "language_model.model.layers.27.mlp.down_proj": {
1397
+ "bits": 4,
1398
+ "group_size": 64
1399
+ },
1400
+ "language_model.model.layers.27.mlp.gate_proj": {
1401
+ "bits": 4,
1402
+ "group_size": 64
1403
+ },
1404
+ "language_model.model.layers.27.mlp.up_proj": {
1405
+ "bits": 4,
1406
+ "group_size": 64
1407
+ },
1408
+ "language_model.model.layers.27.router.proj": {
1409
+ "bits": 3,
1410
+ "group_size": 64
1411
+ },
1412
+ "language_model.model.layers.27.self_attn.k_proj": {
1413
+ "bits": 8,
1414
+ "group_size": 64
1415
+ },
1416
+ "language_model.model.layers.27.self_attn.o_proj": {
1417
+ "bits": 8,
1418
+ "group_size": 64
1419
+ },
1420
+ "language_model.model.layers.27.self_attn.q_proj": {
1421
+ "bits": 8,
1422
+ "group_size": 64
1423
+ },
1424
+ "language_model.model.layers.27.self_attn.v_proj": {
1425
+ "bits": 8,
1426
+ "group_size": 64
1427
+ },
1428
+ "language_model.model.layers.28.experts.switch_glu.down_proj": {
1429
+ "bits": 3,
1430
+ "group_size": 64
1431
+ },
1432
+ "language_model.model.layers.28.experts.switch_glu.gate_proj": {
1433
+ "bits": 3,
1434
+ "group_size": 64
1435
+ },
1436
+ "language_model.model.layers.28.experts.switch_glu.up_proj": {
1437
+ "bits": 3,
1438
+ "group_size": 64
1439
+ },
1440
+ "language_model.model.layers.28.mlp.down_proj": {
1441
+ "bits": 4,
1442
+ "group_size": 64
1443
+ },
1444
+ "language_model.model.layers.28.mlp.gate_proj": {
1445
+ "bits": 4,
1446
+ "group_size": 64
1447
+ },
1448
+ "language_model.model.layers.28.mlp.up_proj": {
1449
+ "bits": 4,
1450
+ "group_size": 64
1451
+ },
1452
+ "language_model.model.layers.28.router.proj": {
1453
+ "bits": 3,
1454
+ "group_size": 64
1455
+ },
1456
+ "language_model.model.layers.28.self_attn.k_proj": {
1457
+ "bits": 8,
1458
+ "group_size": 64
1459
+ },
1460
+ "language_model.model.layers.28.self_attn.o_proj": {
1461
+ "bits": 8,
1462
+ "group_size": 64
1463
+ },
1464
+ "language_model.model.layers.28.self_attn.q_proj": {
1465
+ "bits": 8,
1466
+ "group_size": 64
1467
+ },
1468
+ "language_model.model.layers.28.self_attn.v_proj": {
1469
+ "bits": 8,
1470
+ "group_size": 64
1471
+ },
1472
+ "language_model.model.layers.29.experts.switch_glu.down_proj": {
1473
+ "bits": 3,
1474
+ "group_size": 64
1475
+ },
1476
+ "language_model.model.layers.29.experts.switch_glu.gate_proj": {
1477
+ "bits": 3,
1478
+ "group_size": 64
1479
+ },
1480
+ "language_model.model.layers.29.experts.switch_glu.up_proj": {
1481
+ "bits": 3,
1482
+ "group_size": 64
1483
+ },
1484
+ "language_model.model.layers.29.mlp.down_proj": {
1485
+ "bits": 5,
1486
+ "group_size": 64
1487
+ },
1488
+ "language_model.model.layers.29.mlp.gate_proj": {
1489
+ "bits": 5,
1490
+ "group_size": 64
1491
+ },
1492
+ "language_model.model.layers.29.mlp.up_proj": {
1493
+ "bits": 5,
1494
+ "group_size": 64
1495
+ },
1496
+ "language_model.model.layers.29.router.proj": {
1497
+ "bits": 3,
1498
+ "group_size": 64
1499
+ },
1500
+ "language_model.model.layers.29.self_attn.k_proj": {
1501
+ "bits": 8,
1502
+ "group_size": 64
1503
+ },
1504
+ "language_model.model.layers.29.self_attn.o_proj": {
1505
+ "bits": 8,
1506
+ "group_size": 64
1507
+ },
1508
+ "language_model.model.layers.29.self_attn.q_proj": {
1509
+ "bits": 8,
1510
+ "group_size": 64
1511
+ },
1512
+ "language_model.model.layers.3.experts.switch_glu.down_proj": {
1513
+ "bits": 3,
1514
+ "group_size": 64
1515
+ },
1516
+ "language_model.model.layers.3.experts.switch_glu.gate_proj": {
1517
+ "bits": 3,
1518
+ "group_size": 64
1519
+ },
1520
+ "language_model.model.layers.3.experts.switch_glu.up_proj": {
1521
+ "bits": 3,
1522
+ "group_size": 64
1523
+ },
1524
+ "language_model.model.layers.3.mlp.down_proj": {
1525
+ "bits": 4,
1526
+ "group_size": 64
1527
+ },
1528
+ "language_model.model.layers.3.mlp.gate_proj": {
1529
+ "bits": 4,
1530
+ "group_size": 64
1531
+ },
1532
+ "language_model.model.layers.3.mlp.up_proj": {
1533
+ "bits": 4,
1534
+ "group_size": 64
1535
+ },
1536
+ "language_model.model.layers.3.router.proj": {
1537
+ "bits": 3,
1538
+ "group_size": 64
1539
+ },
1540
+ "language_model.model.layers.3.self_attn.k_proj": {
1541
+ "bits": 8,
1542
+ "group_size": 64
1543
+ },
1544
+ "language_model.model.layers.3.self_attn.o_proj": {
1545
+ "bits": 8,
1546
+ "group_size": 64
1547
+ },
1548
+ "language_model.model.layers.3.self_attn.q_proj": {
1549
+ "bits": 8,
1550
+ "group_size": 64
1551
+ },
1552
+ "language_model.model.layers.3.self_attn.v_proj": {
1553
+ "bits": 8,
1554
+ "group_size": 64
1555
+ },
1556
+ "language_model.model.layers.4.experts.switch_glu.down_proj": {
1557
+ "bits": 3,
1558
+ "group_size": 64
1559
+ },
1560
+ "language_model.model.layers.4.experts.switch_glu.gate_proj": {
1561
+ "bits": 3,
1562
+ "group_size": 64
1563
+ },
1564
+ "language_model.model.layers.4.experts.switch_glu.up_proj": {
1565
+ "bits": 3,
1566
+ "group_size": 64
1567
+ },
1568
+ "language_model.model.layers.4.mlp.down_proj": {
1569
+ "bits": 3,
1570
+ "group_size": 64
1571
+ },
1572
+ "language_model.model.layers.4.mlp.gate_proj": {
1573
+ "bits": 3,
1574
+ "group_size": 64
1575
+ },
1576
+ "language_model.model.layers.4.mlp.up_proj": {
1577
+ "bits": 3,
1578
+ "group_size": 64
1579
+ },
1580
+ "language_model.model.layers.4.router.proj": {
1581
+ "bits": 3,
1582
+ "group_size": 64
1583
+ },
1584
+ "language_model.model.layers.4.self_attn.k_proj": {
1585
+ "bits": 8,
1586
+ "group_size": 64
1587
+ },
1588
+ "language_model.model.layers.4.self_attn.o_proj": {
1589
+ "bits": 8,
1590
+ "group_size": 64
1591
+ },
1592
+ "language_model.model.layers.4.self_attn.q_proj": {
1593
+ "bits": 8,
1594
+ "group_size": 64
1595
+ },
1596
+ "language_model.model.layers.4.self_attn.v_proj": {
1597
+ "bits": 8,
1598
+ "group_size": 64
1599
+ },
1600
+ "language_model.model.layers.5.experts.switch_glu.down_proj": {
1601
+ "bits": 3,
1602
+ "group_size": 64
1603
+ },
1604
+ "language_model.model.layers.5.experts.switch_glu.gate_proj": {
1605
+ "bits": 3,
1606
+ "group_size": 64
1607
+ },
1608
+ "language_model.model.layers.5.experts.switch_glu.up_proj": {
1609
+ "bits": 3,
1610
+ "group_size": 64
1611
+ },
1612
+ "language_model.model.layers.5.mlp.down_proj": {
1613
+ "bits": 3,
1614
+ "group_size": 64
1615
+ },
1616
+ "language_model.model.layers.5.mlp.gate_proj": {
1617
+ "bits": 3,
1618
+ "group_size": 64
1619
+ },
1620
+ "language_model.model.layers.5.mlp.up_proj": {
1621
+ "bits": 3,
1622
+ "group_size": 64
1623
+ },
1624
+ "language_model.model.layers.5.router.proj": {
1625
+ "bits": 3,
1626
+ "group_size": 64
1627
+ },
1628
+ "language_model.model.layers.5.self_attn.k_proj": {
1629
+ "bits": 8,
1630
+ "group_size": 64
1631
+ },
1632
+ "language_model.model.layers.5.self_attn.o_proj": {
1633
+ "bits": 8,
1634
+ "group_size": 64
1635
+ },
1636
+ "language_model.model.layers.5.self_attn.q_proj": {
1637
+ "bits": 8,
1638
+ "group_size": 64
1639
+ },
1640
+ "language_model.model.layers.6.experts.switch_glu.down_proj": {
1641
+ "bits": 3,
1642
+ "group_size": 64
1643
+ },
1644
+ "language_model.model.layers.6.experts.switch_glu.gate_proj": {
1645
+ "bits": 3,
1646
+ "group_size": 64
1647
+ },
1648
+ "language_model.model.layers.6.experts.switch_glu.up_proj": {
1649
+ "bits": 3,
1650
+ "group_size": 64
1651
+ },
1652
+ "language_model.model.layers.6.mlp.down_proj": {
1653
+ "bits": 3,
1654
+ "group_size": 64
1655
+ },
1656
+ "language_model.model.layers.6.mlp.gate_proj": {
1657
+ "bits": 3,
1658
+ "group_size": 64
1659
+ },
1660
+ "language_model.model.layers.6.mlp.up_proj": {
1661
+ "bits": 3,
1662
+ "group_size": 64
1663
+ },
1664
+ "language_model.model.layers.6.router.proj": {
1665
+ "bits": 3,
1666
+ "group_size": 64
1667
+ },
1668
+ "language_model.model.layers.6.self_attn.k_proj": {
1669
+ "bits": 8,
1670
+ "group_size": 64
1671
+ },
1672
+ "language_model.model.layers.6.self_attn.o_proj": {
1673
+ "bits": 8,
1674
+ "group_size": 64
1675
+ },
1676
+ "language_model.model.layers.6.self_attn.q_proj": {
1677
+ "bits": 8,
1678
+ "group_size": 64
1679
+ },
1680
+ "language_model.model.layers.6.self_attn.v_proj": {
1681
+ "bits": 8,
1682
+ "group_size": 64
1683
+ },
1684
+ "language_model.model.layers.7.experts.switch_glu.down_proj": {
1685
+ "bits": 3,
1686
+ "group_size": 64
1687
+ },
1688
+ "language_model.model.layers.7.experts.switch_glu.gate_proj": {
1689
+ "bits": 3,
1690
+ "group_size": 64
1691
+ },
1692
+ "language_model.model.layers.7.experts.switch_glu.up_proj": {
1693
+ "bits": 3,
1694
+ "group_size": 64
1695
+ },
1696
+ "language_model.model.layers.7.mlp.down_proj": {
1697
+ "bits": 3,
1698
+ "group_size": 64
1699
+ },
1700
+ "language_model.model.layers.7.mlp.gate_proj": {
1701
+ "bits": 3,
1702
+ "group_size": 64
1703
+ },
1704
+ "language_model.model.layers.7.mlp.up_proj": {
1705
+ "bits": 3,
1706
+ "group_size": 64
1707
+ },
1708
+ "language_model.model.layers.7.router.proj": {
1709
+ "bits": 3,
1710
+ "group_size": 64
1711
+ },
1712
+ "language_model.model.layers.7.self_attn.k_proj": {
1713
+ "bits": 8,
1714
+ "group_size": 64
1715
+ },
1716
+ "language_model.model.layers.7.self_attn.o_proj": {
1717
+ "bits": 8,
1718
+ "group_size": 64
1719
+ },
1720
+ "language_model.model.layers.7.self_attn.q_proj": {
1721
+ "bits": 8,
1722
+ "group_size": 64
1723
+ },
1724
+ "language_model.model.layers.7.self_attn.v_proj": {
1725
+ "bits": 8,
1726
+ "group_size": 64
1727
+ },
1728
+ "language_model.model.layers.8.experts.switch_glu.down_proj": {
1729
+ "bits": 3,
1730
+ "group_size": 64
1731
+ },
1732
+ "language_model.model.layers.8.experts.switch_glu.gate_proj": {
1733
+ "bits": 3,
1734
+ "group_size": 64
1735
+ },
1736
+ "language_model.model.layers.8.experts.switch_glu.up_proj": {
1737
+ "bits": 3,
1738
+ "group_size": 64
1739
+ },
1740
+ "language_model.model.layers.8.mlp.down_proj": {
1741
+ "bits": 3,
1742
+ "group_size": 64
1743
+ },
1744
+ "language_model.model.layers.8.mlp.gate_proj": {
1745
+ "bits": 3,
1746
+ "group_size": 64
1747
+ },
1748
+ "language_model.model.layers.8.mlp.up_proj": {
1749
+ "bits": 3,
1750
+ "group_size": 64
1751
+ },
1752
+ "language_model.model.layers.8.router.proj": {
1753
+ "bits": 3,
1754
+ "group_size": 64
1755
+ },
1756
+ "language_model.model.layers.8.self_attn.k_proj": {
1757
+ "bits": 8,
1758
+ "group_size": 64
1759
+ },
1760
+ "language_model.model.layers.8.self_attn.o_proj": {
1761
+ "bits": 8,
1762
+ "group_size": 64
1763
+ },
1764
+ "language_model.model.layers.8.self_attn.q_proj": {
1765
+ "bits": 8,
1766
+ "group_size": 64
1767
+ },
1768
+ "language_model.model.layers.8.self_attn.v_proj": {
1769
+ "bits": 8,
1770
+ "group_size": 64
1771
+ },
1772
+ "language_model.model.layers.9.experts.switch_glu.down_proj": {
1773
+ "bits": 3,
1774
+ "group_size": 64
1775
+ },
1776
+ "language_model.model.layers.9.experts.switch_glu.gate_proj": {
1777
+ "bits": 3,
1778
+ "group_size": 64
1779
+ },
1780
+ "language_model.model.layers.9.experts.switch_glu.up_proj": {
1781
+ "bits": 3,
1782
+ "group_size": 64
1783
+ },
1784
+ "language_model.model.layers.9.mlp.down_proj": {
1785
+ "bits": 3,
1786
+ "group_size": 64
1787
+ },
1788
+ "language_model.model.layers.9.mlp.gate_proj": {
1789
+ "bits": 3,
1790
+ "group_size": 64
1791
+ },
1792
+ "language_model.model.layers.9.mlp.up_proj": {
1793
+ "bits": 3,
1794
+ "group_size": 64
1795
+ },
1796
+ "language_model.model.layers.9.router.proj": {
1797
+ "bits": 3,
1798
+ "group_size": 64
1799
+ },
1800
+ "language_model.model.layers.9.self_attn.k_proj": {
1801
+ "bits": 8,
1802
+ "group_size": 64
1803
+ },
1804
+ "language_model.model.layers.9.self_attn.o_proj": {
1805
+ "bits": 8,
1806
+ "group_size": 64
1807
+ },
1808
+ "language_model.model.layers.9.self_attn.q_proj": {
1809
+ "bits": 8,
1810
+ "group_size": 64
1811
+ },
1812
+ "language_model.model.layers.9.self_attn.v_proj": {
1813
+ "bits": 8,
1814
+ "group_size": 64
1815
+ }
1816
+ },
1817
+ "text_config": {
1818
+ "attention_bias": false,
1819
+ "attention_dropout": 0.0,
1820
+ "attention_k_eq_v": true,
1821
+ "bos_token_id": 2,
1822
+ "dtype": "bfloat16",
1823
+ "enable_moe_block": true,
1824
+ "eos_token_id": 1,
1825
+ "final_logit_softcapping": 30.0,
1826
+ "global_head_dim": 512,
1827
+ "head_dim": 256,
1828
+ "hidden_activation": "gelu_pytorch_tanh",
1829
+ "hidden_size": 2816,
1830
+ "hidden_size_per_layer_input": 0,
1831
+ "initializer_range": 0.02,
1832
+ "intermediate_size": 2112,
1833
+ "layer_types": [
1834
+ "sliding_attention",
1835
+ "sliding_attention",
1836
+ "sliding_attention",
1837
+ "sliding_attention",
1838
+ "sliding_attention",
1839
+ "full_attention",
1840
+ "sliding_attention",
1841
+ "sliding_attention",
1842
+ "sliding_attention",
1843
+ "sliding_attention",
1844
+ "sliding_attention",
1845
+ "full_attention",
1846
+ "sliding_attention",
1847
+ "sliding_attention",
1848
+ "sliding_attention",
1849
+ "sliding_attention",
1850
+ "sliding_attention",
1851
+ "full_attention",
1852
+ "sliding_attention",
1853
+ "sliding_attention",
1854
+ "sliding_attention",
1855
+ "sliding_attention",
1856
+ "sliding_attention",
1857
+ "full_attention",
1858
+ "sliding_attention",
1859
+ "sliding_attention",
1860
+ "sliding_attention",
1861
+ "sliding_attention",
1862
+ "sliding_attention",
1863
+ "full_attention"
1864
+ ],
1865
+ "max_position_embeddings": 262144,
1866
+ "model_type": "gemma4_text",
1867
+ "moe_intermediate_size": 704,
1868
+ "num_attention_heads": 16,
1869
+ "num_experts": 128,
1870
+ "num_global_key_value_heads": 2,
1871
+ "num_hidden_layers": 30,
1872
+ "num_key_value_heads": 8,
1873
+ "num_kv_shared_layers": 0,
1874
+ "pad_token_id": 0,
1875
+ "rms_norm_eps": 1e-06,
1876
+ "rope_parameters": {
1877
+ "full_attention": {
1878
+ "partial_rotary_factor": 0.25,
1879
+ "rope_theta": 1000000.0,
1880
+ "rope_type": "proportional"
1881
+ },
1882
+ "sliding_attention": {
1883
+ "rope_theta": 10000.0,
1884
+ "rope_type": "default"
1885
+ }
1886
+ },
1887
+ "sliding_window": 1024,
1888
+ "tie_word_embeddings": true,
1889
+ "top_k_experts": 8,
1890
+ "use_bidirectional_attention": "vision",
1891
+ "use_cache": true,
1892
+ "use_double_wide_mlp": false,
1893
+ "vocab_size": 262144,
1894
+ "vocab_size_per_layer_input": 262144
1895
+ },
1896
+ "tie_word_embeddings": true,
1897
+ "transformers_version": "5.5.0.dev0",
1898
+ "video_token_id": 258884,
1899
+ "vision_soft_tokens_per_image": 280
1900
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "pad_token_id": 0,
10
+ "temperature": 1.0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.5.0.dev0"
14
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558ba00e4251300ee555baacfa952472c5b1948b12f2f9303dc51ed70eff757f
3
+ size 1017052426
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb8a815ef88aba747cc592d4d4a80b228a9f8c380b2f4033ba72e9cd935ec9f
3
+ size 1004204162
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c538217c2bb81dd3879e0cec8808acdee899d5c5a1ceda527abe702f4ef80574
3
+ size 23074568
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
packed_experts_mixed_full/layer_00.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b715a1e5501252b14b1c572ca293a6caaf2239cfda5f5726187d9fa49f6245b
3
+ size 333053952
packed_experts_mixed_full/layer_01.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d843b2f42d0984c3a4e4fb443b9128f8dc33fa7ae400ea5552f7f1fb62dfae5c
3
+ size 333053952
packed_experts_mixed_full/layer_02.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4048e6b26fa2fbc8ea3b3cb8227778508f48bdec5ff92391c5a2c5d772f4dcf
3
+ size 333053952
packed_experts_mixed_full/layer_03.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf09d55cd0d0de7660810c2aefac56234227ee98b17face7582d45dd6e17b93
3
+ size 333053952
packed_experts_mixed_full/layer_04.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2230bd56855c594ae1c4717ba8e94fd3b6d2356ddb5e5d86b662affc40ff8dc
3
+ size 333053952
packed_experts_mixed_full/layer_05.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8805eb27c1e20129c8ddc5a89d82369656d55e68e6d19fda547e58b9c756f796
3
+ size 333053952
packed_experts_mixed_full/layer_06.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:489ff799e637128be8e2f6a92befd2a56c5f0ae07430b87d5b85db2d3161529f
3
+ size 333053952
packed_experts_mixed_full/layer_07.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75de8754bdef333c35c4caa45df1474978f19356ba1c3272bdfedaf9303f09e6
3
+ size 333053952
packed_experts_mixed_full/layer_08.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99fb7df62032593fe6741701bede920773585b18efa59ac409377abdc4bee6d7
3
+ size 333053952
packed_experts_mixed_full/layer_09.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f418b4bba9c3e3d2ceb07701321121fbc1001ac5fefb90b524015deae4ac1983
3
+ size 333053952
packed_experts_mixed_full/layer_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2123f06c643fb3499a74e0fc7e4e1d751a2a3c92ddfbbc9343f7c72c4dba30
3
+ size 333053952
packed_experts_mixed_full/layer_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a058f803e0f3e06213bd81145c3cc850b81e271f208f86953d35a915e900d0bb
3
+ size 333053952
packed_experts_mixed_full/layer_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c119575c4d3a3adeace97f933ef112a0d27ac740c8a9c4b6e859570593775f
3
+ size 333053952
packed_experts_mixed_full/layer_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef58bee3a1a67605b9870baf44521e0455a6a17b54af1dde7e0573485fcb8094
3
+ size 333053952
packed_experts_mixed_full/layer_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49ade6722f21b5a6fe07e33ae5229c1f868892049157c0da723ca39ebb958ec
3
+ size 333053952
packed_experts_mixed_full/layer_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187c5119b878b9c9573585293cb1247d3ae9b67089b7912e38a1aba827a0c824
3
+ size 333053952
packed_experts_mixed_full/layer_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801e5a22a7e5cedeab681d3af35e3cc966a7980085841e01f4eb016094d0153a
3
+ size 333053952
packed_experts_mixed_full/layer_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71880873007c6bef065d551f78b603cefc12cc0a885eef57d31b6e3f977e9ea5
3
+ size 333053952
packed_experts_mixed_full/layer_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ab586619a0e3c0c584eb5fa6ab85dc6d589733c60140743d433333d76d4a648
3
+ size 333053952
packed_experts_mixed_full/layer_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddf0a6f574c4ccf47735c2e8eed62bf764eb66165ffc4abca3f5c2fc55cab7d
3
+ size 333053952
packed_experts_mixed_full/layer_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1c0d0be6c8b0525260796be21964a8bb96bf7ef298bfd1536f6d72071fb0ab
3
+ size 333053952
packed_experts_mixed_full/layer_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a964dda18b7cb8ef2d21e20723aede28a86908f9c2623ef7f0f2c5a9422572
3
+ size 333053952
packed_experts_mixed_full/layer_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5223c11963c13260c5cfba81219a1db9a3a6b64c0103cadfd9d6194c4f32a8c
3
+ size 333053952
packed_experts_mixed_full/layer_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c6343b19f14e09ee73b93a1e9f6da42c76d92b386ae8e907e0fb5b7d7a4bd3
3
+ size 333053952
packed_experts_mixed_full/layer_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd6e571b1612325d8bbc608c00177f5540cb25d63eb6aceb37fb4722b401d0f
3
+ size 333053952
packed_experts_mixed_full/layer_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6979d4f8293c3f008e22e3b42fd9c2c498bfd1ef5d9eedf9e98fc613d5f9fe80
3
+ size 333053952
packed_experts_mixed_full/layer_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619c07475bfc36b1b73713fabdbea02eefdc720d4c7e366be2711ff4dfe61ad7
3
+ size 333053952
packed_experts_mixed_full/layer_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70cf4823d0ea160c0f59fbe2929705487373ae4dabde17fc2c33084467c31473
3
+ size 333053952
packed_experts_mixed_full/layer_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e04bb12d0f2dc5dd509850b0e932b3131705bdda4f2aa758e6caa4a9b7c3b6a
3
+ size 333053952
packed_experts_mixed_full/layer_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ff34ef0d14dd91c830b0be039265f557654c7e1d8f3da8717a87dc431fa2fb
3
+ size 333053952
packed_experts_mixed_full/layout.json ADDED
@@ -0,0 +1,3735 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "mlx-flash-moe-mixed-sidecar-v1",
3
+ "model": "/Users/anemll/Models/gemma4/unsloth-gemma-4-26b-a4b-it-UD-MLX-3bit",
4
+ "source": "mlx-model-safetensors",
5
+ "num_layers": 30,
6
+ "num_experts": 128,
7
+ "group_size": 64,
8
+ "model_type": "gemma4_text",
9
+ "layers": [
10
+ {
11
+ "layer": 0,
12
+ "file": "layer_00.bin",
13
+ "file_size": 333053952,
14
+ "tensors": [
15
+ {
16
+ "name": "language_model.model.layers.0.experts.switch_glu.down_proj.biases",
17
+ "dtype": "bfloat16",
18
+ "shape": [
19
+ 128,
20
+ 2816,
21
+ 11
22
+ ],
23
+ "bits": 0,
24
+ "group_size": 64,
25
+ "offset": 0,
26
+ "nbytes": 7929856
27
+ },
28
+ {
29
+ "name": "language_model.model.layers.0.experts.switch_glu.down_proj.scales",
30
+ "dtype": "bfloat16",
31
+ "shape": [
32
+ 128,
33
+ 2816,
34
+ 11
35
+ ],
36
+ "bits": 0,
37
+ "group_size": 64,
38
+ "offset": 7929856,
39
+ "nbytes": 7929856
40
+ },
41
+ {
42
+ "name": "language_model.model.layers.0.experts.switch_glu.down_proj.weight",
43
+ "dtype": "uint32",
44
+ "shape": [
45
+ 128,
46
+ 2816,
47
+ 66
48
+ ],
49
+ "bits": 3,
50
+ "group_size": 64,
51
+ "offset": 15859712,
52
+ "nbytes": 95158272
53
+ },
54
+ {
55
+ "name": "language_model.model.layers.0.experts.switch_glu.gate_proj.biases",
56
+ "dtype": "bfloat16",
57
+ "shape": [
58
+ 128,
59
+ 704,
60
+ 44
61
+ ],
62
+ "bits": 0,
63
+ "group_size": 64,
64
+ "offset": 111017984,
65
+ "nbytes": 7929856
66
+ },
67
+ {
68
+ "name": "language_model.model.layers.0.experts.switch_glu.gate_proj.scales",
69
+ "dtype": "bfloat16",
70
+ "shape": [
71
+ 128,
72
+ 704,
73
+ 44
74
+ ],
75
+ "bits": 0,
76
+ "group_size": 64,
77
+ "offset": 118947840,
78
+ "nbytes": 7929856
79
+ },
80
+ {
81
+ "name": "language_model.model.layers.0.experts.switch_glu.gate_proj.weight",
82
+ "dtype": "uint32",
83
+ "shape": [
84
+ 128,
85
+ 704,
86
+ 264
87
+ ],
88
+ "bits": 3,
89
+ "group_size": 64,
90
+ "offset": 126877696,
91
+ "nbytes": 95158272
92
+ },
93
+ {
94
+ "name": "language_model.model.layers.0.experts.switch_glu.up_proj.biases",
95
+ "dtype": "bfloat16",
96
+ "shape": [
97
+ 128,
98
+ 704,
99
+ 44
100
+ ],
101
+ "bits": 0,
102
+ "group_size": 64,
103
+ "offset": 222035968,
104
+ "nbytes": 7929856
105
+ },
106
+ {
107
+ "name": "language_model.model.layers.0.experts.switch_glu.up_proj.scales",
108
+ "dtype": "bfloat16",
109
+ "shape": [
110
+ 128,
111
+ 704,
112
+ 44
113
+ ],
114
+ "bits": 0,
115
+ "group_size": 64,
116
+ "offset": 229965824,
117
+ "nbytes": 7929856
118
+ },
119
+ {
120
+ "name": "language_model.model.layers.0.experts.switch_glu.up_proj.weight",
121
+ "dtype": "uint32",
122
+ "shape": [
123
+ 128,
124
+ 704,
125
+ 264
126
+ ],
127
+ "bits": 3,
128
+ "group_size": 64,
129
+ "offset": 237895680,
130
+ "nbytes": 95158272
131
+ }
132
+ ]
133
+ },
134
+ {
135
+ "layer": 1,
136
+ "file": "layer_01.bin",
137
+ "file_size": 333053952,
138
+ "tensors": [
139
+ {
140
+ "name": "language_model.model.layers.1.experts.switch_glu.down_proj.biases",
141
+ "dtype": "bfloat16",
142
+ "shape": [
143
+ 128,
144
+ 2816,
145
+ 11
146
+ ],
147
+ "bits": 0,
148
+ "group_size": 64,
149
+ "offset": 0,
150
+ "nbytes": 7929856
151
+ },
152
+ {
153
+ "name": "language_model.model.layers.1.experts.switch_glu.down_proj.scales",
154
+ "dtype": "bfloat16",
155
+ "shape": [
156
+ 128,
157
+ 2816,
158
+ 11
159
+ ],
160
+ "bits": 0,
161
+ "group_size": 64,
162
+ "offset": 7929856,
163
+ "nbytes": 7929856
164
+ },
165
+ {
166
+ "name": "language_model.model.layers.1.experts.switch_glu.down_proj.weight",
167
+ "dtype": "uint32",
168
+ "shape": [
169
+ 128,
170
+ 2816,
171
+ 66
172
+ ],
173
+ "bits": 3,
174
+ "group_size": 64,
175
+ "offset": 15859712,
176
+ "nbytes": 95158272
177
+ },
178
+ {
179
+ "name": "language_model.model.layers.1.experts.switch_glu.gate_proj.biases",
180
+ "dtype": "bfloat16",
181
+ "shape": [
182
+ 128,
183
+ 704,
184
+ 44
185
+ ],
186
+ "bits": 0,
187
+ "group_size": 64,
188
+ "offset": 111017984,
189
+ "nbytes": 7929856
190
+ },
191
+ {
192
+ "name": "language_model.model.layers.1.experts.switch_glu.gate_proj.scales",
193
+ "dtype": "bfloat16",
194
+ "shape": [
195
+ 128,
196
+ 704,
197
+ 44
198
+ ],
199
+ "bits": 0,
200
+ "group_size": 64,
201
+ "offset": 118947840,
202
+ "nbytes": 7929856
203
+ },
204
+ {
205
+ "name": "language_model.model.layers.1.experts.switch_glu.gate_proj.weight",
206
+ "dtype": "uint32",
207
+ "shape": [
208
+ 128,
209
+ 704,
210
+ 264
211
+ ],
212
+ "bits": 3,
213
+ "group_size": 64,
214
+ "offset": 126877696,
215
+ "nbytes": 95158272
216
+ },
217
+ {
218
+ "name": "language_model.model.layers.1.experts.switch_glu.up_proj.biases",
219
+ "dtype": "bfloat16",
220
+ "shape": [
221
+ 128,
222
+ 704,
223
+ 44
224
+ ],
225
+ "bits": 0,
226
+ "group_size": 64,
227
+ "offset": 222035968,
228
+ "nbytes": 7929856
229
+ },
230
+ {
231
+ "name": "language_model.model.layers.1.experts.switch_glu.up_proj.scales",
232
+ "dtype": "bfloat16",
233
+ "shape": [
234
+ 128,
235
+ 704,
236
+ 44
237
+ ],
238
+ "bits": 0,
239
+ "group_size": 64,
240
+ "offset": 229965824,
241
+ "nbytes": 7929856
242
+ },
243
+ {
244
+ "name": "language_model.model.layers.1.experts.switch_glu.up_proj.weight",
245
+ "dtype": "uint32",
246
+ "shape": [
247
+ 128,
248
+ 704,
249
+ 264
250
+ ],
251
+ "bits": 3,
252
+ "group_size": 64,
253
+ "offset": 237895680,
254
+ "nbytes": 95158272
255
+ }
256
+ ]
257
+ },
258
+ {
259
+ "layer": 2,
260
+ "file": "layer_02.bin",
261
+ "file_size": 333053952,
262
+ "tensors": [
263
+ {
264
+ "name": "language_model.model.layers.2.experts.switch_glu.down_proj.biases",
265
+ "dtype": "bfloat16",
266
+ "shape": [
267
+ 128,
268
+ 2816,
269
+ 11
270
+ ],
271
+ "bits": 0,
272
+ "group_size": 64,
273
+ "offset": 0,
274
+ "nbytes": 7929856
275
+ },
276
+ {
277
+ "name": "language_model.model.layers.2.experts.switch_glu.down_proj.scales",
278
+ "dtype": "bfloat16",
279
+ "shape": [
280
+ 128,
281
+ 2816,
282
+ 11
283
+ ],
284
+ "bits": 0,
285
+ "group_size": 64,
286
+ "offset": 7929856,
287
+ "nbytes": 7929856
288
+ },
289
+ {
290
+ "name": "language_model.model.layers.2.experts.switch_glu.down_proj.weight",
291
+ "dtype": "uint32",
292
+ "shape": [
293
+ 128,
294
+ 2816,
295
+ 66
296
+ ],
297
+ "bits": 3,
298
+ "group_size": 64,
299
+ "offset": 15859712,
300
+ "nbytes": 95158272
301
+ },
302
+ {
303
+ "name": "language_model.model.layers.2.experts.switch_glu.gate_proj.biases",
304
+ "dtype": "bfloat16",
305
+ "shape": [
306
+ 128,
307
+ 704,
308
+ 44
309
+ ],
310
+ "bits": 0,
311
+ "group_size": 64,
312
+ "offset": 111017984,
313
+ "nbytes": 7929856
314
+ },
315
+ {
316
+ "name": "language_model.model.layers.2.experts.switch_glu.gate_proj.scales",
317
+ "dtype": "bfloat16",
318
+ "shape": [
319
+ 128,
320
+ 704,
321
+ 44
322
+ ],
323
+ "bits": 0,
324
+ "group_size": 64,
325
+ "offset": 118947840,
326
+ "nbytes": 7929856
327
+ },
328
+ {
329
+ "name": "language_model.model.layers.2.experts.switch_glu.gate_proj.weight",
330
+ "dtype": "uint32",
331
+ "shape": [
332
+ 128,
333
+ 704,
334
+ 264
335
+ ],
336
+ "bits": 3,
337
+ "group_size": 64,
338
+ "offset": 126877696,
339
+ "nbytes": 95158272
340
+ },
341
+ {
342
+ "name": "language_model.model.layers.2.experts.switch_glu.up_proj.biases",
343
+ "dtype": "bfloat16",
344
+ "shape": [
345
+ 128,
346
+ 704,
347
+ 44
348
+ ],
349
+ "bits": 0,
350
+ "group_size": 64,
351
+ "offset": 222035968,
352
+ "nbytes": 7929856
353
+ },
354
+ {
355
+ "name": "language_model.model.layers.2.experts.switch_glu.up_proj.scales",
356
+ "dtype": "bfloat16",
357
+ "shape": [
358
+ 128,
359
+ 704,
360
+ 44
361
+ ],
362
+ "bits": 0,
363
+ "group_size": 64,
364
+ "offset": 229965824,
365
+ "nbytes": 7929856
366
+ },
367
+ {
368
+ "name": "language_model.model.layers.2.experts.switch_glu.up_proj.weight",
369
+ "dtype": "uint32",
370
+ "shape": [
371
+ 128,
372
+ 704,
373
+ 264
374
+ ],
375
+ "bits": 3,
376
+ "group_size": 64,
377
+ "offset": 237895680,
378
+ "nbytes": 95158272
379
+ }
380
+ ]
381
+ },
382
+ {
383
+ "layer": 3,
384
+ "file": "layer_03.bin",
385
+ "file_size": 333053952,
386
+ "tensors": [
387
+ {
388
+ "name": "language_model.model.layers.3.experts.switch_glu.down_proj.biases",
389
+ "dtype": "bfloat16",
390
+ "shape": [
391
+ 128,
392
+ 2816,
393
+ 11
394
+ ],
395
+ "bits": 0,
396
+ "group_size": 64,
397
+ "offset": 0,
398
+ "nbytes": 7929856
399
+ },
400
+ {
401
+ "name": "language_model.model.layers.3.experts.switch_glu.down_proj.scales",
402
+ "dtype": "bfloat16",
403
+ "shape": [
404
+ 128,
405
+ 2816,
406
+ 11
407
+ ],
408
+ "bits": 0,
409
+ "group_size": 64,
410
+ "offset": 7929856,
411
+ "nbytes": 7929856
412
+ },
413
+ {
414
+ "name": "language_model.model.layers.3.experts.switch_glu.down_proj.weight",
415
+ "dtype": "uint32",
416
+ "shape": [
417
+ 128,
418
+ 2816,
419
+ 66
420
+ ],
421
+ "bits": 3,
422
+ "group_size": 64,
423
+ "offset": 15859712,
424
+ "nbytes": 95158272
425
+ },
426
+ {
427
+ "name": "language_model.model.layers.3.experts.switch_glu.gate_proj.biases",
428
+ "dtype": "bfloat16",
429
+ "shape": [
430
+ 128,
431
+ 704,
432
+ 44
433
+ ],
434
+ "bits": 0,
435
+ "group_size": 64,
436
+ "offset": 111017984,
437
+ "nbytes": 7929856
438
+ },
439
+ {
440
+ "name": "language_model.model.layers.3.experts.switch_glu.gate_proj.scales",
441
+ "dtype": "bfloat16",
442
+ "shape": [
443
+ 128,
444
+ 704,
445
+ 44
446
+ ],
447
+ "bits": 0,
448
+ "group_size": 64,
449
+ "offset": 118947840,
450
+ "nbytes": 7929856
451
+ },
452
+ {
453
+ "name": "language_model.model.layers.3.experts.switch_glu.gate_proj.weight",
454
+ "dtype": "uint32",
455
+ "shape": [
456
+ 128,
457
+ 704,
458
+ 264
459
+ ],
460
+ "bits": 3,
461
+ "group_size": 64,
462
+ "offset": 126877696,
463
+ "nbytes": 95158272
464
+ },
465
+ {
466
+ "name": "language_model.model.layers.3.experts.switch_glu.up_proj.biases",
467
+ "dtype": "bfloat16",
468
+ "shape": [
469
+ 128,
470
+ 704,
471
+ 44
472
+ ],
473
+ "bits": 0,
474
+ "group_size": 64,
475
+ "offset": 222035968,
476
+ "nbytes": 7929856
477
+ },
478
+ {
479
+ "name": "language_model.model.layers.3.experts.switch_glu.up_proj.scales",
480
+ "dtype": "bfloat16",
481
+ "shape": [
482
+ 128,
483
+ 704,
484
+ 44
485
+ ],
486
+ "bits": 0,
487
+ "group_size": 64,
488
+ "offset": 229965824,
489
+ "nbytes": 7929856
490
+ },
491
+ {
492
+ "name": "language_model.model.layers.3.experts.switch_glu.up_proj.weight",
493
+ "dtype": "uint32",
494
+ "shape": [
495
+ 128,
496
+ 704,
497
+ 264
498
+ ],
499
+ "bits": 3,
500
+ "group_size": 64,
501
+ "offset": 237895680,
502
+ "nbytes": 95158272
503
+ }
504
+ ]
505
+ },
506
+ {
507
+ "layer": 4,
508
+ "file": "layer_04.bin",
509
+ "file_size": 333053952,
510
+ "tensors": [
511
+ {
512
+ "name": "language_model.model.layers.4.experts.switch_glu.down_proj.biases",
513
+ "dtype": "bfloat16",
514
+ "shape": [
515
+ 128,
516
+ 2816,
517
+ 11
518
+ ],
519
+ "bits": 0,
520
+ "group_size": 64,
521
+ "offset": 0,
522
+ "nbytes": 7929856
523
+ },
524
+ {
525
+ "name": "language_model.model.layers.4.experts.switch_glu.down_proj.scales",
526
+ "dtype": "bfloat16",
527
+ "shape": [
528
+ 128,
529
+ 2816,
530
+ 11
531
+ ],
532
+ "bits": 0,
533
+ "group_size": 64,
534
+ "offset": 7929856,
535
+ "nbytes": 7929856
536
+ },
537
+ {
538
+ "name": "language_model.model.layers.4.experts.switch_glu.down_proj.weight",
539
+ "dtype": "uint32",
540
+ "shape": [
541
+ 128,
542
+ 2816,
543
+ 66
544
+ ],
545
+ "bits": 3,
546
+ "group_size": 64,
547
+ "offset": 15859712,
548
+ "nbytes": 95158272
549
+ },
550
+ {
551
+ "name": "language_model.model.layers.4.experts.switch_glu.gate_proj.biases",
552
+ "dtype": "bfloat16",
553
+ "shape": [
554
+ 128,
555
+ 704,
556
+ 44
557
+ ],
558
+ "bits": 0,
559
+ "group_size": 64,
560
+ "offset": 111017984,
561
+ "nbytes": 7929856
562
+ },
563
+ {
564
+ "name": "language_model.model.layers.4.experts.switch_glu.gate_proj.scales",
565
+ "dtype": "bfloat16",
566
+ "shape": [
567
+ 128,
568
+ 704,
569
+ 44
570
+ ],
571
+ "bits": 0,
572
+ "group_size": 64,
573
+ "offset": 118947840,
574
+ "nbytes": 7929856
575
+ },
576
+ {
577
+ "name": "language_model.model.layers.4.experts.switch_glu.gate_proj.weight",
578
+ "dtype": "uint32",
579
+ "shape": [
580
+ 128,
581
+ 704,
582
+ 264
583
+ ],
584
+ "bits": 3,
585
+ "group_size": 64,
586
+ "offset": 126877696,
587
+ "nbytes": 95158272
588
+ },
589
+ {
590
+ "name": "language_model.model.layers.4.experts.switch_glu.up_proj.biases",
591
+ "dtype": "bfloat16",
592
+ "shape": [
593
+ 128,
594
+ 704,
595
+ 44
596
+ ],
597
+ "bits": 0,
598
+ "group_size": 64,
599
+ "offset": 222035968,
600
+ "nbytes": 7929856
601
+ },
602
+ {
603
+ "name": "language_model.model.layers.4.experts.switch_glu.up_proj.scales",
604
+ "dtype": "bfloat16",
605
+ "shape": [
606
+ 128,
607
+ 704,
608
+ 44
609
+ ],
610
+ "bits": 0,
611
+ "group_size": 64,
612
+ "offset": 229965824,
613
+ "nbytes": 7929856
614
+ },
615
+ {
616
+ "name": "language_model.model.layers.4.experts.switch_glu.up_proj.weight",
617
+ "dtype": "uint32",
618
+ "shape": [
619
+ 128,
620
+ 704,
621
+ 264
622
+ ],
623
+ "bits": 3,
624
+ "group_size": 64,
625
+ "offset": 237895680,
626
+ "nbytes": 95158272
627
+ }
628
+ ]
629
+ },
630
+ {
631
+ "layer": 5,
632
+ "file": "layer_05.bin",
633
+ "file_size": 333053952,
634
+ "tensors": [
635
+ {
636
+ "name": "language_model.model.layers.5.experts.switch_glu.down_proj.biases",
637
+ "dtype": "bfloat16",
638
+ "shape": [
639
+ 128,
640
+ 2816,
641
+ 11
642
+ ],
643
+ "bits": 0,
644
+ "group_size": 64,
645
+ "offset": 0,
646
+ "nbytes": 7929856
647
+ },
648
+ {
649
+ "name": "language_model.model.layers.5.experts.switch_glu.down_proj.scales",
650
+ "dtype": "bfloat16",
651
+ "shape": [
652
+ 128,
653
+ 2816,
654
+ 11
655
+ ],
656
+ "bits": 0,
657
+ "group_size": 64,
658
+ "offset": 7929856,
659
+ "nbytes": 7929856
660
+ },
661
+ {
662
+ "name": "language_model.model.layers.5.experts.switch_glu.down_proj.weight",
663
+ "dtype": "uint32",
664
+ "shape": [
665
+ 128,
666
+ 2816,
667
+ 66
668
+ ],
669
+ "bits": 3,
670
+ "group_size": 64,
671
+ "offset": 15859712,
672
+ "nbytes": 95158272
673
+ },
674
+ {
675
+ "name": "language_model.model.layers.5.experts.switch_glu.gate_proj.biases",
676
+ "dtype": "bfloat16",
677
+ "shape": [
678
+ 128,
679
+ 704,
680
+ 44
681
+ ],
682
+ "bits": 0,
683
+ "group_size": 64,
684
+ "offset": 111017984,
685
+ "nbytes": 7929856
686
+ },
687
+ {
688
+ "name": "language_model.model.layers.5.experts.switch_glu.gate_proj.scales",
689
+ "dtype": "bfloat16",
690
+ "shape": [
691
+ 128,
692
+ 704,
693
+ 44
694
+ ],
695
+ "bits": 0,
696
+ "group_size": 64,
697
+ "offset": 118947840,
698
+ "nbytes": 7929856
699
+ },
700
+ {
701
+ "name": "language_model.model.layers.5.experts.switch_glu.gate_proj.weight",
702
+ "dtype": "uint32",
703
+ "shape": [
704
+ 128,
705
+ 704,
706
+ 264
707
+ ],
708
+ "bits": 3,
709
+ "group_size": 64,
710
+ "offset": 126877696,
711
+ "nbytes": 95158272
712
+ },
713
+ {
714
+ "name": "language_model.model.layers.5.experts.switch_glu.up_proj.biases",
715
+ "dtype": "bfloat16",
716
+ "shape": [
717
+ 128,
718
+ 704,
719
+ 44
720
+ ],
721
+ "bits": 0,
722
+ "group_size": 64,
723
+ "offset": 222035968,
724
+ "nbytes": 7929856
725
+ },
726
+ {
727
+ "name": "language_model.model.layers.5.experts.switch_glu.up_proj.scales",
728
+ "dtype": "bfloat16",
729
+ "shape": [
730
+ 128,
731
+ 704,
732
+ 44
733
+ ],
734
+ "bits": 0,
735
+ "group_size": 64,
736
+ "offset": 229965824,
737
+ "nbytes": 7929856
738
+ },
739
+ {
740
+ "name": "language_model.model.layers.5.experts.switch_glu.up_proj.weight",
741
+ "dtype": "uint32",
742
+ "shape": [
743
+ 128,
744
+ 704,
745
+ 264
746
+ ],
747
+ "bits": 3,
748
+ "group_size": 64,
749
+ "offset": 237895680,
750
+ "nbytes": 95158272
751
+ }
752
+ ]
753
+ },
754
+ {
755
+ "layer": 6,
756
+ "file": "layer_06.bin",
757
+ "file_size": 333053952,
758
+ "tensors": [
759
+ {
760
+ "name": "language_model.model.layers.6.experts.switch_glu.down_proj.biases",
761
+ "dtype": "bfloat16",
762
+ "shape": [
763
+ 128,
764
+ 2816,
765
+ 11
766
+ ],
767
+ "bits": 0,
768
+ "group_size": 64,
769
+ "offset": 0,
770
+ "nbytes": 7929856
771
+ },
772
+ {
773
+ "name": "language_model.model.layers.6.experts.switch_glu.down_proj.scales",
774
+ "dtype": "bfloat16",
775
+ "shape": [
776
+ 128,
777
+ 2816,
778
+ 11
779
+ ],
780
+ "bits": 0,
781
+ "group_size": 64,
782
+ "offset": 7929856,
783
+ "nbytes": 7929856
784
+ },
785
+ {
786
+ "name": "language_model.model.layers.6.experts.switch_glu.down_proj.weight",
787
+ "dtype": "uint32",
788
+ "shape": [
789
+ 128,
790
+ 2816,
791
+ 66
792
+ ],
793
+ "bits": 3,
794
+ "group_size": 64,
795
+ "offset": 15859712,
796
+ "nbytes": 95158272
797
+ },
798
+ {
799
+ "name": "language_model.model.layers.6.experts.switch_glu.gate_proj.biases",
800
+ "dtype": "bfloat16",
801
+ "shape": [
802
+ 128,
803
+ 704,
804
+ 44
805
+ ],
806
+ "bits": 0,
807
+ "group_size": 64,
808
+ "offset": 111017984,
809
+ "nbytes": 7929856
810
+ },
811
+ {
812
+ "name": "language_model.model.layers.6.experts.switch_glu.gate_proj.scales",
813
+ "dtype": "bfloat16",
814
+ "shape": [
815
+ 128,
816
+ 704,
817
+ 44
818
+ ],
819
+ "bits": 0,
820
+ "group_size": 64,
821
+ "offset": 118947840,
822
+ "nbytes": 7929856
823
+ },
824
+ {
825
+ "name": "language_model.model.layers.6.experts.switch_glu.gate_proj.weight",
826
+ "dtype": "uint32",
827
+ "shape": [
828
+ 128,
829
+ 704,
830
+ 264
831
+ ],
832
+ "bits": 3,
833
+ "group_size": 64,
834
+ "offset": 126877696,
835
+ "nbytes": 95158272
836
+ },
837
+ {
838
+ "name": "language_model.model.layers.6.experts.switch_glu.up_proj.biases",
839
+ "dtype": "bfloat16",
840
+ "shape": [
841
+ 128,
842
+ 704,
843
+ 44
844
+ ],
845
+ "bits": 0,
846
+ "group_size": 64,
847
+ "offset": 222035968,
848
+ "nbytes": 7929856
849
+ },
850
+ {
851
+ "name": "language_model.model.layers.6.experts.switch_glu.up_proj.scales",
852
+ "dtype": "bfloat16",
853
+ "shape": [
854
+ 128,
855
+ 704,
856
+ 44
857
+ ],
858
+ "bits": 0,
859
+ "group_size": 64,
860
+ "offset": 229965824,
861
+ "nbytes": 7929856
862
+ },
863
+ {
864
+ "name": "language_model.model.layers.6.experts.switch_glu.up_proj.weight",
865
+ "dtype": "uint32",
866
+ "shape": [
867
+ 128,
868
+ 704,
869
+ 264
870
+ ],
871
+ "bits": 3,
872
+ "group_size": 64,
873
+ "offset": 237895680,
874
+ "nbytes": 95158272
875
+ }
876
+ ]
877
+ },
878
+ {
879
+ "layer": 7,
880
+ "file": "layer_07.bin",
881
+ "file_size": 333053952,
882
+ "tensors": [
883
+ {
884
+ "name": "language_model.model.layers.7.experts.switch_glu.down_proj.biases",
885
+ "dtype": "bfloat16",
886
+ "shape": [
887
+ 128,
888
+ 2816,
889
+ 11
890
+ ],
891
+ "bits": 0,
892
+ "group_size": 64,
893
+ "offset": 0,
894
+ "nbytes": 7929856
895
+ },
896
+ {
897
+ "name": "language_model.model.layers.7.experts.switch_glu.down_proj.scales",
898
+ "dtype": "bfloat16",
899
+ "shape": [
900
+ 128,
901
+ 2816,
902
+ 11
903
+ ],
904
+ "bits": 0,
905
+ "group_size": 64,
906
+ "offset": 7929856,
907
+ "nbytes": 7929856
908
+ },
909
+ {
910
+ "name": "language_model.model.layers.7.experts.switch_glu.down_proj.weight",
911
+ "dtype": "uint32",
912
+ "shape": [
913
+ 128,
914
+ 2816,
915
+ 66
916
+ ],
917
+ "bits": 3,
918
+ "group_size": 64,
919
+ "offset": 15859712,
920
+ "nbytes": 95158272
921
+ },
922
+ {
923
+ "name": "language_model.model.layers.7.experts.switch_glu.gate_proj.biases",
924
+ "dtype": "bfloat16",
925
+ "shape": [
926
+ 128,
927
+ 704,
928
+ 44
929
+ ],
930
+ "bits": 0,
931
+ "group_size": 64,
932
+ "offset": 111017984,
933
+ "nbytes": 7929856
934
+ },
935
+ {
936
+ "name": "language_model.model.layers.7.experts.switch_glu.gate_proj.scales",
937
+ "dtype": "bfloat16",
938
+ "shape": [
939
+ 128,
940
+ 704,
941
+ 44
942
+ ],
943
+ "bits": 0,
944
+ "group_size": 64,
945
+ "offset": 118947840,
946
+ "nbytes": 7929856
947
+ },
948
+ {
949
+ "name": "language_model.model.layers.7.experts.switch_glu.gate_proj.weight",
950
+ "dtype": "uint32",
951
+ "shape": [
952
+ 128,
953
+ 704,
954
+ 264
955
+ ],
956
+ "bits": 3,
957
+ "group_size": 64,
958
+ "offset": 126877696,
959
+ "nbytes": 95158272
960
+ },
961
+ {
962
+ "name": "language_model.model.layers.7.experts.switch_glu.up_proj.biases",
963
+ "dtype": "bfloat16",
964
+ "shape": [
965
+ 128,
966
+ 704,
967
+ 44
968
+ ],
969
+ "bits": 0,
970
+ "group_size": 64,
971
+ "offset": 222035968,
972
+ "nbytes": 7929856
973
+ },
974
+ {
975
+ "name": "language_model.model.layers.7.experts.switch_glu.up_proj.scales",
976
+ "dtype": "bfloat16",
977
+ "shape": [
978
+ 128,
979
+ 704,
980
+ 44
981
+ ],
982
+ "bits": 0,
983
+ "group_size": 64,
984
+ "offset": 229965824,
985
+ "nbytes": 7929856
986
+ },
987
+ {
988
+ "name": "language_model.model.layers.7.experts.switch_glu.up_proj.weight",
989
+ "dtype": "uint32",
990
+ "shape": [
991
+ 128,
992
+ 704,
993
+ 264
994
+ ],
995
+ "bits": 3,
996
+ "group_size": 64,
997
+ "offset": 237895680,
998
+ "nbytes": 95158272
999
+ }
1000
+ ]
1001
+ },
1002
+ {
1003
+ "layer": 8,
1004
+ "file": "layer_08.bin",
1005
+ "file_size": 333053952,
1006
+ "tensors": [
1007
+ {
1008
+ "name": "language_model.model.layers.8.experts.switch_glu.down_proj.biases",
1009
+ "dtype": "bfloat16",
1010
+ "shape": [
1011
+ 128,
1012
+ 2816,
1013
+ 11
1014
+ ],
1015
+ "bits": 0,
1016
+ "group_size": 64,
1017
+ "offset": 0,
1018
+ "nbytes": 7929856
1019
+ },
1020
+ {
1021
+ "name": "language_model.model.layers.8.experts.switch_glu.down_proj.scales",
1022
+ "dtype": "bfloat16",
1023
+ "shape": [
1024
+ 128,
1025
+ 2816,
1026
+ 11
1027
+ ],
1028
+ "bits": 0,
1029
+ "group_size": 64,
1030
+ "offset": 7929856,
1031
+ "nbytes": 7929856
1032
+ },
1033
+ {
1034
+ "name": "language_model.model.layers.8.experts.switch_glu.down_proj.weight",
1035
+ "dtype": "uint32",
1036
+ "shape": [
1037
+ 128,
1038
+ 2816,
1039
+ 66
1040
+ ],
1041
+ "bits": 3,
1042
+ "group_size": 64,
1043
+ "offset": 15859712,
1044
+ "nbytes": 95158272
1045
+ },
1046
+ {
1047
+ "name": "language_model.model.layers.8.experts.switch_glu.gate_proj.biases",
1048
+ "dtype": "bfloat16",
1049
+ "shape": [
1050
+ 128,
1051
+ 704,
1052
+ 44
1053
+ ],
1054
+ "bits": 0,
1055
+ "group_size": 64,
1056
+ "offset": 111017984,
1057
+ "nbytes": 7929856
1058
+ },
1059
+ {
1060
+ "name": "language_model.model.layers.8.experts.switch_glu.gate_proj.scales",
1061
+ "dtype": "bfloat16",
1062
+ "shape": [
1063
+ 128,
1064
+ 704,
1065
+ 44
1066
+ ],
1067
+ "bits": 0,
1068
+ "group_size": 64,
1069
+ "offset": 118947840,
1070
+ "nbytes": 7929856
1071
+ },
1072
+ {
1073
+ "name": "language_model.model.layers.8.experts.switch_glu.gate_proj.weight",
1074
+ "dtype": "uint32",
1075
+ "shape": [
1076
+ 128,
1077
+ 704,
1078
+ 264
1079
+ ],
1080
+ "bits": 3,
1081
+ "group_size": 64,
1082
+ "offset": 126877696,
1083
+ "nbytes": 95158272
1084
+ },
1085
+ {
1086
+ "name": "language_model.model.layers.8.experts.switch_glu.up_proj.biases",
1087
+ "dtype": "bfloat16",
1088
+ "shape": [
1089
+ 128,
1090
+ 704,
1091
+ 44
1092
+ ],
1093
+ "bits": 0,
1094
+ "group_size": 64,
1095
+ "offset": 222035968,
1096
+ "nbytes": 7929856
1097
+ },
1098
+ {
1099
+ "name": "language_model.model.layers.8.experts.switch_glu.up_proj.scales",
1100
+ "dtype": "bfloat16",
1101
+ "shape": [
1102
+ 128,
1103
+ 704,
1104
+ 44
1105
+ ],
1106
+ "bits": 0,
1107
+ "group_size": 64,
1108
+ "offset": 229965824,
1109
+ "nbytes": 7929856
1110
+ },
1111
+ {
1112
+ "name": "language_model.model.layers.8.experts.switch_glu.up_proj.weight",
1113
+ "dtype": "uint32",
1114
+ "shape": [
1115
+ 128,
1116
+ 704,
1117
+ 264
1118
+ ],
1119
+ "bits": 3,
1120
+ "group_size": 64,
1121
+ "offset": 237895680,
1122
+ "nbytes": 95158272
1123
+ }
1124
+ ]
1125
+ },
1126
+ {
1127
+ "layer": 9,
1128
+ "file": "layer_09.bin",
1129
+ "file_size": 333053952,
1130
+ "tensors": [
1131
+ {
1132
+ "name": "language_model.model.layers.9.experts.switch_glu.down_proj.biases",
1133
+ "dtype": "bfloat16",
1134
+ "shape": [
1135
+ 128,
1136
+ 2816,
1137
+ 11
1138
+ ],
1139
+ "bits": 0,
1140
+ "group_size": 64,
1141
+ "offset": 0,
1142
+ "nbytes": 7929856
1143
+ },
1144
+ {
1145
+ "name": "language_model.model.layers.9.experts.switch_glu.down_proj.scales",
1146
+ "dtype": "bfloat16",
1147
+ "shape": [
1148
+ 128,
1149
+ 2816,
1150
+ 11
1151
+ ],
1152
+ "bits": 0,
1153
+ "group_size": 64,
1154
+ "offset": 7929856,
1155
+ "nbytes": 7929856
1156
+ },
1157
+ {
1158
+ "name": "language_model.model.layers.9.experts.switch_glu.down_proj.weight",
1159
+ "dtype": "uint32",
1160
+ "shape": [
1161
+ 128,
1162
+ 2816,
1163
+ 66
1164
+ ],
1165
+ "bits": 3,
1166
+ "group_size": 64,
1167
+ "offset": 15859712,
1168
+ "nbytes": 95158272
1169
+ },
1170
+ {
1171
+ "name": "language_model.model.layers.9.experts.switch_glu.gate_proj.biases",
1172
+ "dtype": "bfloat16",
1173
+ "shape": [
1174
+ 128,
1175
+ 704,
1176
+ 44
1177
+ ],
1178
+ "bits": 0,
1179
+ "group_size": 64,
1180
+ "offset": 111017984,
1181
+ "nbytes": 7929856
1182
+ },
1183
+ {
1184
+ "name": "language_model.model.layers.9.experts.switch_glu.gate_proj.scales",
1185
+ "dtype": "bfloat16",
1186
+ "shape": [
1187
+ 128,
1188
+ 704,
1189
+ 44
1190
+ ],
1191
+ "bits": 0,
1192
+ "group_size": 64,
1193
+ "offset": 118947840,
1194
+ "nbytes": 7929856
1195
+ },
1196
+ {
1197
+ "name": "language_model.model.layers.9.experts.switch_glu.gate_proj.weight",
1198
+ "dtype": "uint32",
1199
+ "shape": [
1200
+ 128,
1201
+ 704,
1202
+ 264
1203
+ ],
1204
+ "bits": 3,
1205
+ "group_size": 64,
1206
+ "offset": 126877696,
1207
+ "nbytes": 95158272
1208
+ },
1209
+ {
1210
+ "name": "language_model.model.layers.9.experts.switch_glu.up_proj.biases",
1211
+ "dtype": "bfloat16",
1212
+ "shape": [
1213
+ 128,
1214
+ 704,
1215
+ 44
1216
+ ],
1217
+ "bits": 0,
1218
+ "group_size": 64,
1219
+ "offset": 222035968,
1220
+ "nbytes": 7929856
1221
+ },
1222
+ {
1223
+ "name": "language_model.model.layers.9.experts.switch_glu.up_proj.scales",
1224
+ "dtype": "bfloat16",
1225
+ "shape": [
1226
+ 128,
1227
+ 704,
1228
+ 44
1229
+ ],
1230
+ "bits": 0,
1231
+ "group_size": 64,
1232
+ "offset": 229965824,
1233
+ "nbytes": 7929856
1234
+ },
1235
+ {
1236
+ "name": "language_model.model.layers.9.experts.switch_glu.up_proj.weight",
1237
+ "dtype": "uint32",
1238
+ "shape": [
1239
+ 128,
1240
+ 704,
1241
+ 264
1242
+ ],
1243
+ "bits": 3,
1244
+ "group_size": 64,
1245
+ "offset": 237895680,
1246
+ "nbytes": 95158272
1247
+ }
1248
+ ]
1249
+ },
1250
+ {
1251
+ "layer": 10,
1252
+ "file": "layer_10.bin",
1253
+ "file_size": 333053952,
1254
+ "tensors": [
1255
+ {
1256
+ "name": "language_model.model.layers.10.experts.switch_glu.down_proj.biases",
1257
+ "dtype": "bfloat16",
1258
+ "shape": [
1259
+ 128,
1260
+ 2816,
1261
+ 11
1262
+ ],
1263
+ "bits": 0,
1264
+ "group_size": 64,
1265
+ "offset": 0,
1266
+ "nbytes": 7929856
1267
+ },
1268
+ {
1269
+ "name": "language_model.model.layers.10.experts.switch_glu.down_proj.scales",
1270
+ "dtype": "bfloat16",
1271
+ "shape": [
1272
+ 128,
1273
+ 2816,
1274
+ 11
1275
+ ],
1276
+ "bits": 0,
1277
+ "group_size": 64,
1278
+ "offset": 7929856,
1279
+ "nbytes": 7929856
1280
+ },
1281
+ {
1282
+ "name": "language_model.model.layers.10.experts.switch_glu.down_proj.weight",
1283
+ "dtype": "uint32",
1284
+ "shape": [
1285
+ 128,
1286
+ 2816,
1287
+ 66
1288
+ ],
1289
+ "bits": 3,
1290
+ "group_size": 64,
1291
+ "offset": 15859712,
1292
+ "nbytes": 95158272
1293
+ },
1294
+ {
1295
+ "name": "language_model.model.layers.10.experts.switch_glu.gate_proj.biases",
1296
+ "dtype": "bfloat16",
1297
+ "shape": [
1298
+ 128,
1299
+ 704,
1300
+ 44
1301
+ ],
1302
+ "bits": 0,
1303
+ "group_size": 64,
1304
+ "offset": 111017984,
1305
+ "nbytes": 7929856
1306
+ },
1307
+ {
1308
+ "name": "language_model.model.layers.10.experts.switch_glu.gate_proj.scales",
1309
+ "dtype": "bfloat16",
1310
+ "shape": [
1311
+ 128,
1312
+ 704,
1313
+ 44
1314
+ ],
1315
+ "bits": 0,
1316
+ "group_size": 64,
1317
+ "offset": 118947840,
1318
+ "nbytes": 7929856
1319
+ },
1320
+ {
1321
+ "name": "language_model.model.layers.10.experts.switch_glu.gate_proj.weight",
1322
+ "dtype": "uint32",
1323
+ "shape": [
1324
+ 128,
1325
+ 704,
1326
+ 264
1327
+ ],
1328
+ "bits": 3,
1329
+ "group_size": 64,
1330
+ "offset": 126877696,
1331
+ "nbytes": 95158272
1332
+ },
1333
+ {
1334
+ "name": "language_model.model.layers.10.experts.switch_glu.up_proj.biases",
1335
+ "dtype": "bfloat16",
1336
+ "shape": [
1337
+ 128,
1338
+ 704,
1339
+ 44
1340
+ ],
1341
+ "bits": 0,
1342
+ "group_size": 64,
1343
+ "offset": 222035968,
1344
+ "nbytes": 7929856
1345
+ },
1346
+ {
1347
+ "name": "language_model.model.layers.10.experts.switch_glu.up_proj.scales",
1348
+ "dtype": "bfloat16",
1349
+ "shape": [
1350
+ 128,
1351
+ 704,
1352
+ 44
1353
+ ],
1354
+ "bits": 0,
1355
+ "group_size": 64,
1356
+ "offset": 229965824,
1357
+ "nbytes": 7929856
1358
+ },
1359
+ {
1360
+ "name": "language_model.model.layers.10.experts.switch_glu.up_proj.weight",
1361
+ "dtype": "uint32",
1362
+ "shape": [
1363
+ 128,
1364
+ 704,
1365
+ 264
1366
+ ],
1367
+ "bits": 3,
1368
+ "group_size": 64,
1369
+ "offset": 237895680,
1370
+ "nbytes": 95158272
1371
+ }
1372
+ ]
1373
+ },
1374
+ {
1375
+ "layer": 11,
1376
+ "file": "layer_11.bin",
1377
+ "file_size": 333053952,
1378
+ "tensors": [
1379
+ {
1380
+ "name": "language_model.model.layers.11.experts.switch_glu.down_proj.biases",
1381
+ "dtype": "bfloat16",
1382
+ "shape": [
1383
+ 128,
1384
+ 2816,
1385
+ 11
1386
+ ],
1387
+ "bits": 0,
1388
+ "group_size": 64,
1389
+ "offset": 0,
1390
+ "nbytes": 7929856
1391
+ },
1392
+ {
1393
+ "name": "language_model.model.layers.11.experts.switch_glu.down_proj.scales",
1394
+ "dtype": "bfloat16",
1395
+ "shape": [
1396
+ 128,
1397
+ 2816,
1398
+ 11
1399
+ ],
1400
+ "bits": 0,
1401
+ "group_size": 64,
1402
+ "offset": 7929856,
1403
+ "nbytes": 7929856
1404
+ },
1405
+ {
1406
+ "name": "language_model.model.layers.11.experts.switch_glu.down_proj.weight",
1407
+ "dtype": "uint32",
1408
+ "shape": [
1409
+ 128,
1410
+ 2816,
1411
+ 66
1412
+ ],
1413
+ "bits": 3,
1414
+ "group_size": 64,
1415
+ "offset": 15859712,
1416
+ "nbytes": 95158272
1417
+ },
1418
+ {
1419
+ "name": "language_model.model.layers.11.experts.switch_glu.gate_proj.biases",
1420
+ "dtype": "bfloat16",
1421
+ "shape": [
1422
+ 128,
1423
+ 704,
1424
+ 44
1425
+ ],
1426
+ "bits": 0,
1427
+ "group_size": 64,
1428
+ "offset": 111017984,
1429
+ "nbytes": 7929856
1430
+ },
1431
+ {
1432
+ "name": "language_model.model.layers.11.experts.switch_glu.gate_proj.scales",
1433
+ "dtype": "bfloat16",
1434
+ "shape": [
1435
+ 128,
1436
+ 704,
1437
+ 44
1438
+ ],
1439
+ "bits": 0,
1440
+ "group_size": 64,
1441
+ "offset": 118947840,
1442
+ "nbytes": 7929856
1443
+ },
1444
+ {
1445
+ "name": "language_model.model.layers.11.experts.switch_glu.gate_proj.weight",
1446
+ "dtype": "uint32",
1447
+ "shape": [
1448
+ 128,
1449
+ 704,
1450
+ 264
1451
+ ],
1452
+ "bits": 3,
1453
+ "group_size": 64,
1454
+ "offset": 126877696,
1455
+ "nbytes": 95158272
1456
+ },
1457
+ {
1458
+ "name": "language_model.model.layers.11.experts.switch_glu.up_proj.biases",
1459
+ "dtype": "bfloat16",
1460
+ "shape": [
1461
+ 128,
1462
+ 704,
1463
+ 44
1464
+ ],
1465
+ "bits": 0,
1466
+ "group_size": 64,
1467
+ "offset": 222035968,
1468
+ "nbytes": 7929856
1469
+ },
1470
+ {
1471
+ "name": "language_model.model.layers.11.experts.switch_glu.up_proj.scales",
1472
+ "dtype": "bfloat16",
1473
+ "shape": [
1474
+ 128,
1475
+ 704,
1476
+ 44
1477
+ ],
1478
+ "bits": 0,
1479
+ "group_size": 64,
1480
+ "offset": 229965824,
1481
+ "nbytes": 7929856
1482
+ },
1483
+ {
1484
+ "name": "language_model.model.layers.11.experts.switch_glu.up_proj.weight",
1485
+ "dtype": "uint32",
1486
+ "shape": [
1487
+ 128,
1488
+ 704,
1489
+ 264
1490
+ ],
1491
+ "bits": 3,
1492
+ "group_size": 64,
1493
+ "offset": 237895680,
1494
+ "nbytes": 95158272
1495
+ }
1496
+ ]
1497
+ },
1498
+ {
1499
+ "layer": 12,
1500
+ "file": "layer_12.bin",
1501
+ "file_size": 333053952,
1502
+ "tensors": [
1503
+ {
1504
+ "name": "language_model.model.layers.12.experts.switch_glu.down_proj.biases",
1505
+ "dtype": "bfloat16",
1506
+ "shape": [
1507
+ 128,
1508
+ 2816,
1509
+ 11
1510
+ ],
1511
+ "bits": 0,
1512
+ "group_size": 64,
1513
+ "offset": 0,
1514
+ "nbytes": 7929856
1515
+ },
1516
+ {
1517
+ "name": "language_model.model.layers.12.experts.switch_glu.down_proj.scales",
1518
+ "dtype": "bfloat16",
1519
+ "shape": [
1520
+ 128,
1521
+ 2816,
1522
+ 11
1523
+ ],
1524
+ "bits": 0,
1525
+ "group_size": 64,
1526
+ "offset": 7929856,
1527
+ "nbytes": 7929856
1528
+ },
1529
+ {
1530
+ "name": "language_model.model.layers.12.experts.switch_glu.down_proj.weight",
1531
+ "dtype": "uint32",
1532
+ "shape": [
1533
+ 128,
1534
+ 2816,
1535
+ 66
1536
+ ],
1537
+ "bits": 3,
1538
+ "group_size": 64,
1539
+ "offset": 15859712,
1540
+ "nbytes": 95158272
1541
+ },
1542
+ {
1543
+ "name": "language_model.model.layers.12.experts.switch_glu.gate_proj.biases",
1544
+ "dtype": "bfloat16",
1545
+ "shape": [
1546
+ 128,
1547
+ 704,
1548
+ 44
1549
+ ],
1550
+ "bits": 0,
1551
+ "group_size": 64,
1552
+ "offset": 111017984,
1553
+ "nbytes": 7929856
1554
+ },
1555
+ {
1556
+ "name": "language_model.model.layers.12.experts.switch_glu.gate_proj.scales",
1557
+ "dtype": "bfloat16",
1558
+ "shape": [
1559
+ 128,
1560
+ 704,
1561
+ 44
1562
+ ],
1563
+ "bits": 0,
1564
+ "group_size": 64,
1565
+ "offset": 118947840,
1566
+ "nbytes": 7929856
1567
+ },
1568
+ {
1569
+ "name": "language_model.model.layers.12.experts.switch_glu.gate_proj.weight",
1570
+ "dtype": "uint32",
1571
+ "shape": [
1572
+ 128,
1573
+ 704,
1574
+ 264
1575
+ ],
1576
+ "bits": 3,
1577
+ "group_size": 64,
1578
+ "offset": 126877696,
1579
+ "nbytes": 95158272
1580
+ },
1581
+ {
1582
+ "name": "language_model.model.layers.12.experts.switch_glu.up_proj.biases",
1583
+ "dtype": "bfloat16",
1584
+ "shape": [
1585
+ 128,
1586
+ 704,
1587
+ 44
1588
+ ],
1589
+ "bits": 0,
1590
+ "group_size": 64,
1591
+ "offset": 222035968,
1592
+ "nbytes": 7929856
1593
+ },
1594
+ {
1595
+ "name": "language_model.model.layers.12.experts.switch_glu.up_proj.scales",
1596
+ "dtype": "bfloat16",
1597
+ "shape": [
1598
+ 128,
1599
+ 704,
1600
+ 44
1601
+ ],
1602
+ "bits": 0,
1603
+ "group_size": 64,
1604
+ "offset": 229965824,
1605
+ "nbytes": 7929856
1606
+ },
1607
+ {
1608
+ "name": "language_model.model.layers.12.experts.switch_glu.up_proj.weight",
1609
+ "dtype": "uint32",
1610
+ "shape": [
1611
+ 128,
1612
+ 704,
1613
+ 264
1614
+ ],
1615
+ "bits": 3,
1616
+ "group_size": 64,
1617
+ "offset": 237895680,
1618
+ "nbytes": 95158272
1619
+ }
1620
+ ]
1621
+ },
1622
+ {
1623
+ "layer": 13,
1624
+ "file": "layer_13.bin",
1625
+ "file_size": 333053952,
1626
+ "tensors": [
1627
+ {
1628
+ "name": "language_model.model.layers.13.experts.switch_glu.down_proj.biases",
1629
+ "dtype": "bfloat16",
1630
+ "shape": [
1631
+ 128,
1632
+ 2816,
1633
+ 11
1634
+ ],
1635
+ "bits": 0,
1636
+ "group_size": 64,
1637
+ "offset": 0,
1638
+ "nbytes": 7929856
1639
+ },
1640
+ {
1641
+ "name": "language_model.model.layers.13.experts.switch_glu.down_proj.scales",
1642
+ "dtype": "bfloat16",
1643
+ "shape": [
1644
+ 128,
1645
+ 2816,
1646
+ 11
1647
+ ],
1648
+ "bits": 0,
1649
+ "group_size": 64,
1650
+ "offset": 7929856,
1651
+ "nbytes": 7929856
1652
+ },
1653
+ {
1654
+ "name": "language_model.model.layers.13.experts.switch_glu.down_proj.weight",
1655
+ "dtype": "uint32",
1656
+ "shape": [
1657
+ 128,
1658
+ 2816,
1659
+ 66
1660
+ ],
1661
+ "bits": 3,
1662
+ "group_size": 64,
1663
+ "offset": 15859712,
1664
+ "nbytes": 95158272
1665
+ },
1666
+ {
1667
+ "name": "language_model.model.layers.13.experts.switch_glu.gate_proj.biases",
1668
+ "dtype": "bfloat16",
1669
+ "shape": [
1670
+ 128,
1671
+ 704,
1672
+ 44
1673
+ ],
1674
+ "bits": 0,
1675
+ "group_size": 64,
1676
+ "offset": 111017984,
1677
+ "nbytes": 7929856
1678
+ },
1679
+ {
1680
+ "name": "language_model.model.layers.13.experts.switch_glu.gate_proj.scales",
1681
+ "dtype": "bfloat16",
1682
+ "shape": [
1683
+ 128,
1684
+ 704,
1685
+ 44
1686
+ ],
1687
+ "bits": 0,
1688
+ "group_size": 64,
1689
+ "offset": 118947840,
1690
+ "nbytes": 7929856
1691
+ },
1692
+ {
1693
+ "name": "language_model.model.layers.13.experts.switch_glu.gate_proj.weight",
1694
+ "dtype": "uint32",
1695
+ "shape": [
1696
+ 128,
1697
+ 704,
1698
+ 264
1699
+ ],
1700
+ "bits": 3,
1701
+ "group_size": 64,
1702
+ "offset": 126877696,
1703
+ "nbytes": 95158272
1704
+ },
1705
+ {
1706
+ "name": "language_model.model.layers.13.experts.switch_glu.up_proj.biases",
1707
+ "dtype": "bfloat16",
1708
+ "shape": [
1709
+ 128,
1710
+ 704,
1711
+ 44
1712
+ ],
1713
+ "bits": 0,
1714
+ "group_size": 64,
1715
+ "offset": 222035968,
1716
+ "nbytes": 7929856
1717
+ },
1718
+ {
1719
+ "name": "language_model.model.layers.13.experts.switch_glu.up_proj.scales",
1720
+ "dtype": "bfloat16",
1721
+ "shape": [
1722
+ 128,
1723
+ 704,
1724
+ 44
1725
+ ],
1726
+ "bits": 0,
1727
+ "group_size": 64,
1728
+ "offset": 229965824,
1729
+ "nbytes": 7929856
1730
+ },
1731
+ {
1732
+ "name": "language_model.model.layers.13.experts.switch_glu.up_proj.weight",
1733
+ "dtype": "uint32",
1734
+ "shape": [
1735
+ 128,
1736
+ 704,
1737
+ 264
1738
+ ],
1739
+ "bits": 3,
1740
+ "group_size": 64,
1741
+ "offset": 237895680,
1742
+ "nbytes": 95158272
1743
+ }
1744
+ ]
1745
+ },
1746
+ {
1747
+ "layer": 14,
1748
+ "file": "layer_14.bin",
1749
+ "file_size": 333053952,
1750
+ "tensors": [
1751
+ {
1752
+ "name": "language_model.model.layers.14.experts.switch_glu.down_proj.biases",
1753
+ "dtype": "bfloat16",
1754
+ "shape": [
1755
+ 128,
1756
+ 2816,
1757
+ 11
1758
+ ],
1759
+ "bits": 0,
1760
+ "group_size": 64,
1761
+ "offset": 0,
1762
+ "nbytes": 7929856
1763
+ },
1764
+ {
1765
+ "name": "language_model.model.layers.14.experts.switch_glu.down_proj.scales",
1766
+ "dtype": "bfloat16",
1767
+ "shape": [
1768
+ 128,
1769
+ 2816,
1770
+ 11
1771
+ ],
1772
+ "bits": 0,
1773
+ "group_size": 64,
1774
+ "offset": 7929856,
1775
+ "nbytes": 7929856
1776
+ },
1777
+ {
1778
+ "name": "language_model.model.layers.14.experts.switch_glu.down_proj.weight",
1779
+ "dtype": "uint32",
1780
+ "shape": [
1781
+ 128,
1782
+ 2816,
1783
+ 66
1784
+ ],
1785
+ "bits": 3,
1786
+ "group_size": 64,
1787
+ "offset": 15859712,
1788
+ "nbytes": 95158272
1789
+ },
1790
+ {
1791
+ "name": "language_model.model.layers.14.experts.switch_glu.gate_proj.biases",
1792
+ "dtype": "bfloat16",
1793
+ "shape": [
1794
+ 128,
1795
+ 704,
1796
+ 44
1797
+ ],
1798
+ "bits": 0,
1799
+ "group_size": 64,
1800
+ "offset": 111017984,
1801
+ "nbytes": 7929856
1802
+ },
1803
+ {
1804
+ "name": "language_model.model.layers.14.experts.switch_glu.gate_proj.scales",
1805
+ "dtype": "bfloat16",
1806
+ "shape": [
1807
+ 128,
1808
+ 704,
1809
+ 44
1810
+ ],
1811
+ "bits": 0,
1812
+ "group_size": 64,
1813
+ "offset": 118947840,
1814
+ "nbytes": 7929856
1815
+ },
1816
+ {
1817
+ "name": "language_model.model.layers.14.experts.switch_glu.gate_proj.weight",
1818
+ "dtype": "uint32",
1819
+ "shape": [
1820
+ 128,
1821
+ 704,
1822
+ 264
1823
+ ],
1824
+ "bits": 3,
1825
+ "group_size": 64,
1826
+ "offset": 126877696,
1827
+ "nbytes": 95158272
1828
+ },
1829
+ {
1830
+ "name": "language_model.model.layers.14.experts.switch_glu.up_proj.biases",
1831
+ "dtype": "bfloat16",
1832
+ "shape": [
1833
+ 128,
1834
+ 704,
1835
+ 44
1836
+ ],
1837
+ "bits": 0,
1838
+ "group_size": 64,
1839
+ "offset": 222035968,
1840
+ "nbytes": 7929856
1841
+ },
1842
+ {
1843
+ "name": "language_model.model.layers.14.experts.switch_glu.up_proj.scales",
1844
+ "dtype": "bfloat16",
1845
+ "shape": [
1846
+ 128,
1847
+ 704,
1848
+ 44
1849
+ ],
1850
+ "bits": 0,
1851
+ "group_size": 64,
1852
+ "offset": 229965824,
1853
+ "nbytes": 7929856
1854
+ },
1855
+ {
1856
+ "name": "language_model.model.layers.14.experts.switch_glu.up_proj.weight",
1857
+ "dtype": "uint32",
1858
+ "shape": [
1859
+ 128,
1860
+ 704,
1861
+ 264
1862
+ ],
1863
+ "bits": 3,
1864
+ "group_size": 64,
1865
+ "offset": 237895680,
1866
+ "nbytes": 95158272
1867
+ }
1868
+ ]
1869
+ },
1870
+ {
1871
+ "layer": 15,
1872
+ "file": "layer_15.bin",
1873
+ "file_size": 333053952,
1874
+ "tensors": [
1875
+ {
1876
+ "name": "language_model.model.layers.15.experts.switch_glu.down_proj.biases",
1877
+ "dtype": "bfloat16",
1878
+ "shape": [
1879
+ 128,
1880
+ 2816,
1881
+ 11
1882
+ ],
1883
+ "bits": 0,
1884
+ "group_size": 64,
1885
+ "offset": 0,
1886
+ "nbytes": 7929856
1887
+ },
1888
+ {
1889
+ "name": "language_model.model.layers.15.experts.switch_glu.down_proj.scales",
1890
+ "dtype": "bfloat16",
1891
+ "shape": [
1892
+ 128,
1893
+ 2816,
1894
+ 11
1895
+ ],
1896
+ "bits": 0,
1897
+ "group_size": 64,
1898
+ "offset": 7929856,
1899
+ "nbytes": 7929856
1900
+ },
1901
+ {
1902
+ "name": "language_model.model.layers.15.experts.switch_glu.down_proj.weight",
1903
+ "dtype": "uint32",
1904
+ "shape": [
1905
+ 128,
1906
+ 2816,
1907
+ 66
1908
+ ],
1909
+ "bits": 3,
1910
+ "group_size": 64,
1911
+ "offset": 15859712,
1912
+ "nbytes": 95158272
1913
+ },
1914
+ {
1915
+ "name": "language_model.model.layers.15.experts.switch_glu.gate_proj.biases",
1916
+ "dtype": "bfloat16",
1917
+ "shape": [
1918
+ 128,
1919
+ 704,
1920
+ 44
1921
+ ],
1922
+ "bits": 0,
1923
+ "group_size": 64,
1924
+ "offset": 111017984,
1925
+ "nbytes": 7929856
1926
+ },
1927
+ {
1928
+ "name": "language_model.model.layers.15.experts.switch_glu.gate_proj.scales",
1929
+ "dtype": "bfloat16",
1930
+ "shape": [
1931
+ 128,
1932
+ 704,
1933
+ 44
1934
+ ],
1935
+ "bits": 0,
1936
+ "group_size": 64,
1937
+ "offset": 118947840,
1938
+ "nbytes": 7929856
1939
+ },
1940
+ {
1941
+ "name": "language_model.model.layers.15.experts.switch_glu.gate_proj.weight",
1942
+ "dtype": "uint32",
1943
+ "shape": [
1944
+ 128,
1945
+ 704,
1946
+ 264
1947
+ ],
1948
+ "bits": 3,
1949
+ "group_size": 64,
1950
+ "offset": 126877696,
1951
+ "nbytes": 95158272
1952
+ },
1953
+ {
1954
+ "name": "language_model.model.layers.15.experts.switch_glu.up_proj.biases",
1955
+ "dtype": "bfloat16",
1956
+ "shape": [
1957
+ 128,
1958
+ 704,
1959
+ 44
1960
+ ],
1961
+ "bits": 0,
1962
+ "group_size": 64,
1963
+ "offset": 222035968,
1964
+ "nbytes": 7929856
1965
+ },
1966
+ {
1967
+ "name": "language_model.model.layers.15.experts.switch_glu.up_proj.scales",
1968
+ "dtype": "bfloat16",
1969
+ "shape": [
1970
+ 128,
1971
+ 704,
1972
+ 44
1973
+ ],
1974
+ "bits": 0,
1975
+ "group_size": 64,
1976
+ "offset": 229965824,
1977
+ "nbytes": 7929856
1978
+ },
1979
+ {
1980
+ "name": "language_model.model.layers.15.experts.switch_glu.up_proj.weight",
1981
+ "dtype": "uint32",
1982
+ "shape": [
1983
+ 128,
1984
+ 704,
1985
+ 264
1986
+ ],
1987
+ "bits": 3,
1988
+ "group_size": 64,
1989
+ "offset": 237895680,
1990
+ "nbytes": 95158272
1991
+ }
1992
+ ]
1993
+ },
1994
+ {
1995
+ "layer": 16,
1996
+ "file": "layer_16.bin",
1997
+ "file_size": 333053952,
1998
+ "tensors": [
1999
+ {
2000
+ "name": "language_model.model.layers.16.experts.switch_glu.down_proj.biases",
2001
+ "dtype": "bfloat16",
2002
+ "shape": [
2003
+ 128,
2004
+ 2816,
2005
+ 11
2006
+ ],
2007
+ "bits": 0,
2008
+ "group_size": 64,
2009
+ "offset": 0,
2010
+ "nbytes": 7929856
2011
+ },
2012
+ {
2013
+ "name": "language_model.model.layers.16.experts.switch_glu.down_proj.scales",
2014
+ "dtype": "bfloat16",
2015
+ "shape": [
2016
+ 128,
2017
+ 2816,
2018
+ 11
2019
+ ],
2020
+ "bits": 0,
2021
+ "group_size": 64,
2022
+ "offset": 7929856,
2023
+ "nbytes": 7929856
2024
+ },
2025
+ {
2026
+ "name": "language_model.model.layers.16.experts.switch_glu.down_proj.weight",
2027
+ "dtype": "uint32",
2028
+ "shape": [
2029
+ 128,
2030
+ 2816,
2031
+ 66
2032
+ ],
2033
+ "bits": 3,
2034
+ "group_size": 64,
2035
+ "offset": 15859712,
2036
+ "nbytes": 95158272
2037
+ },
2038
+ {
2039
+ "name": "language_model.model.layers.16.experts.switch_glu.gate_proj.biases",
2040
+ "dtype": "bfloat16",
2041
+ "shape": [
2042
+ 128,
2043
+ 704,
2044
+ 44
2045
+ ],
2046
+ "bits": 0,
2047
+ "group_size": 64,
2048
+ "offset": 111017984,
2049
+ "nbytes": 7929856
2050
+ },
2051
+ {
2052
+ "name": "language_model.model.layers.16.experts.switch_glu.gate_proj.scales",
2053
+ "dtype": "bfloat16",
2054
+ "shape": [
2055
+ 128,
2056
+ 704,
2057
+ 44
2058
+ ],
2059
+ "bits": 0,
2060
+ "group_size": 64,
2061
+ "offset": 118947840,
2062
+ "nbytes": 7929856
2063
+ },
2064
+ {
2065
+ "name": "language_model.model.layers.16.experts.switch_glu.gate_proj.weight",
2066
+ "dtype": "uint32",
2067
+ "shape": [
2068
+ 128,
2069
+ 704,
2070
+ 264
2071
+ ],
2072
+ "bits": 3,
2073
+ "group_size": 64,
2074
+ "offset": 126877696,
2075
+ "nbytes": 95158272
2076
+ },
2077
+ {
2078
+ "name": "language_model.model.layers.16.experts.switch_glu.up_proj.biases",
2079
+ "dtype": "bfloat16",
2080
+ "shape": [
2081
+ 128,
2082
+ 704,
2083
+ 44
2084
+ ],
2085
+ "bits": 0,
2086
+ "group_size": 64,
2087
+ "offset": 222035968,
2088
+ "nbytes": 7929856
2089
+ },
2090
+ {
2091
+ "name": "language_model.model.layers.16.experts.switch_glu.up_proj.scales",
2092
+ "dtype": "bfloat16",
2093
+ "shape": [
2094
+ 128,
2095
+ 704,
2096
+ 44
2097
+ ],
2098
+ "bits": 0,
2099
+ "group_size": 64,
2100
+ "offset": 229965824,
2101
+ "nbytes": 7929856
2102
+ },
2103
+ {
2104
+ "name": "language_model.model.layers.16.experts.switch_glu.up_proj.weight",
2105
+ "dtype": "uint32",
2106
+ "shape": [
2107
+ 128,
2108
+ 704,
2109
+ 264
2110
+ ],
2111
+ "bits": 3,
2112
+ "group_size": 64,
2113
+ "offset": 237895680,
2114
+ "nbytes": 95158272
2115
+ }
2116
+ ]
2117
+ },
2118
+ {
2119
+ "layer": 17,
2120
+ "file": "layer_17.bin",
2121
+ "file_size": 333053952,
2122
+ "tensors": [
2123
+ {
2124
+ "name": "language_model.model.layers.17.experts.switch_glu.down_proj.biases",
2125
+ "dtype": "bfloat16",
2126
+ "shape": [
2127
+ 128,
2128
+ 2816,
2129
+ 11
2130
+ ],
2131
+ "bits": 0,
2132
+ "group_size": 64,
2133
+ "offset": 0,
2134
+ "nbytes": 7929856
2135
+ },
2136
+ {
2137
+ "name": "language_model.model.layers.17.experts.switch_glu.down_proj.scales",
2138
+ "dtype": "bfloat16",
2139
+ "shape": [
2140
+ 128,
2141
+ 2816,
2142
+ 11
2143
+ ],
2144
+ "bits": 0,
2145
+ "group_size": 64,
2146
+ "offset": 7929856,
2147
+ "nbytes": 7929856
2148
+ },
2149
+ {
2150
+ "name": "language_model.model.layers.17.experts.switch_glu.down_proj.weight",
2151
+ "dtype": "uint32",
2152
+ "shape": [
2153
+ 128,
2154
+ 2816,
2155
+ 66
2156
+ ],
2157
+ "bits": 3,
2158
+ "group_size": 64,
2159
+ "offset": 15859712,
2160
+ "nbytes": 95158272
2161
+ },
2162
+ {
2163
+ "name": "language_model.model.layers.17.experts.switch_glu.gate_proj.biases",
2164
+ "dtype": "bfloat16",
2165
+ "shape": [
2166
+ 128,
2167
+ 704,
2168
+ 44
2169
+ ],
2170
+ "bits": 0,
2171
+ "group_size": 64,
2172
+ "offset": 111017984,
2173
+ "nbytes": 7929856
2174
+ },
2175
+ {
2176
+ "name": "language_model.model.layers.17.experts.switch_glu.gate_proj.scales",
2177
+ "dtype": "bfloat16",
2178
+ "shape": [
2179
+ 128,
2180
+ 704,
2181
+ 44
2182
+ ],
2183
+ "bits": 0,
2184
+ "group_size": 64,
2185
+ "offset": 118947840,
2186
+ "nbytes": 7929856
2187
+ },
2188
+ {
2189
+ "name": "language_model.model.layers.17.experts.switch_glu.gate_proj.weight",
2190
+ "dtype": "uint32",
2191
+ "shape": [
2192
+ 128,
2193
+ 704,
2194
+ 264
2195
+ ],
2196
+ "bits": 3,
2197
+ "group_size": 64,
2198
+ "offset": 126877696,
2199
+ "nbytes": 95158272
2200
+ },
2201
+ {
2202
+ "name": "language_model.model.layers.17.experts.switch_glu.up_proj.biases",
2203
+ "dtype": "bfloat16",
2204
+ "shape": [
2205
+ 128,
2206
+ 704,
2207
+ 44
2208
+ ],
2209
+ "bits": 0,
2210
+ "group_size": 64,
2211
+ "offset": 222035968,
2212
+ "nbytes": 7929856
2213
+ },
2214
+ {
2215
+ "name": "language_model.model.layers.17.experts.switch_glu.up_proj.scales",
2216
+ "dtype": "bfloat16",
2217
+ "shape": [
2218
+ 128,
2219
+ 704,
2220
+ 44
2221
+ ],
2222
+ "bits": 0,
2223
+ "group_size": 64,
2224
+ "offset": 229965824,
2225
+ "nbytes": 7929856
2226
+ },
2227
+ {
2228
+ "name": "language_model.model.layers.17.experts.switch_glu.up_proj.weight",
2229
+ "dtype": "uint32",
2230
+ "shape": [
2231
+ 128,
2232
+ 704,
2233
+ 264
2234
+ ],
2235
+ "bits": 3,
2236
+ "group_size": 64,
2237
+ "offset": 237895680,
2238
+ "nbytes": 95158272
2239
+ }
2240
+ ]
2241
+ },
2242
+ {
2243
+ "layer": 18,
2244
+ "file": "layer_18.bin",
2245
+ "file_size": 333053952,
2246
+ "tensors": [
2247
+ {
2248
+ "name": "language_model.model.layers.18.experts.switch_glu.down_proj.biases",
2249
+ "dtype": "bfloat16",
2250
+ "shape": [
2251
+ 128,
2252
+ 2816,
2253
+ 11
2254
+ ],
2255
+ "bits": 0,
2256
+ "group_size": 64,
2257
+ "offset": 0,
2258
+ "nbytes": 7929856
2259
+ },
2260
+ {
2261
+ "name": "language_model.model.layers.18.experts.switch_glu.down_proj.scales",
2262
+ "dtype": "bfloat16",
2263
+ "shape": [
2264
+ 128,
2265
+ 2816,
2266
+ 11
2267
+ ],
2268
+ "bits": 0,
2269
+ "group_size": 64,
2270
+ "offset": 7929856,
2271
+ "nbytes": 7929856
2272
+ },
2273
+ {
2274
+ "name": "language_model.model.layers.18.experts.switch_glu.down_proj.weight",
2275
+ "dtype": "uint32",
2276
+ "shape": [
2277
+ 128,
2278
+ 2816,
2279
+ 66
2280
+ ],
2281
+ "bits": 3,
2282
+ "group_size": 64,
2283
+ "offset": 15859712,
2284
+ "nbytes": 95158272
2285
+ },
2286
+ {
2287
+ "name": "language_model.model.layers.18.experts.switch_glu.gate_proj.biases",
2288
+ "dtype": "bfloat16",
2289
+ "shape": [
2290
+ 128,
2291
+ 704,
2292
+ 44
2293
+ ],
2294
+ "bits": 0,
2295
+ "group_size": 64,
2296
+ "offset": 111017984,
2297
+ "nbytes": 7929856
2298
+ },
2299
+ {
2300
+ "name": "language_model.model.layers.18.experts.switch_glu.gate_proj.scales",
2301
+ "dtype": "bfloat16",
2302
+ "shape": [
2303
+ 128,
2304
+ 704,
2305
+ 44
2306
+ ],
2307
+ "bits": 0,
2308
+ "group_size": 64,
2309
+ "offset": 118947840,
2310
+ "nbytes": 7929856
2311
+ },
2312
+ {
2313
+ "name": "language_model.model.layers.18.experts.switch_glu.gate_proj.weight",
2314
+ "dtype": "uint32",
2315
+ "shape": [
2316
+ 128,
2317
+ 704,
2318
+ 264
2319
+ ],
2320
+ "bits": 3,
2321
+ "group_size": 64,
2322
+ "offset": 126877696,
2323
+ "nbytes": 95158272
2324
+ },
2325
+ {
2326
+ "name": "language_model.model.layers.18.experts.switch_glu.up_proj.biases",
2327
+ "dtype": "bfloat16",
2328
+ "shape": [
2329
+ 128,
2330
+ 704,
2331
+ 44
2332
+ ],
2333
+ "bits": 0,
2334
+ "group_size": 64,
2335
+ "offset": 222035968,
2336
+ "nbytes": 7929856
2337
+ },
2338
+ {
2339
+ "name": "language_model.model.layers.18.experts.switch_glu.up_proj.scales",
2340
+ "dtype": "bfloat16",
2341
+ "shape": [
2342
+ 128,
2343
+ 704,
2344
+ 44
2345
+ ],
2346
+ "bits": 0,
2347
+ "group_size": 64,
2348
+ "offset": 229965824,
2349
+ "nbytes": 7929856
2350
+ },
2351
+ {
2352
+ "name": "language_model.model.layers.18.experts.switch_glu.up_proj.weight",
2353
+ "dtype": "uint32",
2354
+ "shape": [
2355
+ 128,
2356
+ 704,
2357
+ 264
2358
+ ],
2359
+ "bits": 3,
2360
+ "group_size": 64,
2361
+ "offset": 237895680,
2362
+ "nbytes": 95158272
2363
+ }
2364
+ ]
2365
+ },
2366
+ {
2367
+ "layer": 19,
2368
+ "file": "layer_19.bin",
2369
+ "file_size": 333053952,
2370
+ "tensors": [
2371
+ {
2372
+ "name": "language_model.model.layers.19.experts.switch_glu.down_proj.biases",
2373
+ "dtype": "bfloat16",
2374
+ "shape": [
2375
+ 128,
2376
+ 2816,
2377
+ 11
2378
+ ],
2379
+ "bits": 0,
2380
+ "group_size": 64,
2381
+ "offset": 0,
2382
+ "nbytes": 7929856
2383
+ },
2384
+ {
2385
+ "name": "language_model.model.layers.19.experts.switch_glu.down_proj.scales",
2386
+ "dtype": "bfloat16",
2387
+ "shape": [
2388
+ 128,
2389
+ 2816,
2390
+ 11
2391
+ ],
2392
+ "bits": 0,
2393
+ "group_size": 64,
2394
+ "offset": 7929856,
2395
+ "nbytes": 7929856
2396
+ },
2397
+ {
2398
+ "name": "language_model.model.layers.19.experts.switch_glu.down_proj.weight",
2399
+ "dtype": "uint32",
2400
+ "shape": [
2401
+ 128,
2402
+ 2816,
2403
+ 66
2404
+ ],
2405
+ "bits": 3,
2406
+ "group_size": 64,
2407
+ "offset": 15859712,
2408
+ "nbytes": 95158272
2409
+ },
2410
+ {
2411
+ "name": "language_model.model.layers.19.experts.switch_glu.gate_proj.biases",
2412
+ "dtype": "bfloat16",
2413
+ "shape": [
2414
+ 128,
2415
+ 704,
2416
+ 44
2417
+ ],
2418
+ "bits": 0,
2419
+ "group_size": 64,
2420
+ "offset": 111017984,
2421
+ "nbytes": 7929856
2422
+ },
2423
+ {
2424
+ "name": "language_model.model.layers.19.experts.switch_glu.gate_proj.scales",
2425
+ "dtype": "bfloat16",
2426
+ "shape": [
2427
+ 128,
2428
+ 704,
2429
+ 44
2430
+ ],
2431
+ "bits": 0,
2432
+ "group_size": 64,
2433
+ "offset": 118947840,
2434
+ "nbytes": 7929856
2435
+ },
2436
+ {
2437
+ "name": "language_model.model.layers.19.experts.switch_glu.gate_proj.weight",
2438
+ "dtype": "uint32",
2439
+ "shape": [
2440
+ 128,
2441
+ 704,
2442
+ 264
2443
+ ],
2444
+ "bits": 3,
2445
+ "group_size": 64,
2446
+ "offset": 126877696,
2447
+ "nbytes": 95158272
2448
+ },
2449
+ {
2450
+ "name": "language_model.model.layers.19.experts.switch_glu.up_proj.biases",
2451
+ "dtype": "bfloat16",
2452
+ "shape": [
2453
+ 128,
2454
+ 704,
2455
+ 44
2456
+ ],
2457
+ "bits": 0,
2458
+ "group_size": 64,
2459
+ "offset": 222035968,
2460
+ "nbytes": 7929856
2461
+ },
2462
+ {
2463
+ "name": "language_model.model.layers.19.experts.switch_glu.up_proj.scales",
2464
+ "dtype": "bfloat16",
2465
+ "shape": [
2466
+ 128,
2467
+ 704,
2468
+ 44
2469
+ ],
2470
+ "bits": 0,
2471
+ "group_size": 64,
2472
+ "offset": 229965824,
2473
+ "nbytes": 7929856
2474
+ },
2475
+ {
2476
+ "name": "language_model.model.layers.19.experts.switch_glu.up_proj.weight",
2477
+ "dtype": "uint32",
2478
+ "shape": [
2479
+ 128,
2480
+ 704,
2481
+ 264
2482
+ ],
2483
+ "bits": 3,
2484
+ "group_size": 64,
2485
+ "offset": 237895680,
2486
+ "nbytes": 95158272
2487
+ }
2488
+ ]
2489
+ },
2490
+ {
2491
+ "layer": 20,
2492
+ "file": "layer_20.bin",
2493
+ "file_size": 333053952,
2494
+ "tensors": [
2495
+ {
2496
+ "name": "language_model.model.layers.20.experts.switch_glu.down_proj.biases",
2497
+ "dtype": "bfloat16",
2498
+ "shape": [
2499
+ 128,
2500
+ 2816,
2501
+ 11
2502
+ ],
2503
+ "bits": 0,
2504
+ "group_size": 64,
2505
+ "offset": 0,
2506
+ "nbytes": 7929856
2507
+ },
2508
+ {
2509
+ "name": "language_model.model.layers.20.experts.switch_glu.down_proj.scales",
2510
+ "dtype": "bfloat16",
2511
+ "shape": [
2512
+ 128,
2513
+ 2816,
2514
+ 11
2515
+ ],
2516
+ "bits": 0,
2517
+ "group_size": 64,
2518
+ "offset": 7929856,
2519
+ "nbytes": 7929856
2520
+ },
2521
+ {
2522
+ "name": "language_model.model.layers.20.experts.switch_glu.down_proj.weight",
2523
+ "dtype": "uint32",
2524
+ "shape": [
2525
+ 128,
2526
+ 2816,
2527
+ 66
2528
+ ],
2529
+ "bits": 3,
2530
+ "group_size": 64,
2531
+ "offset": 15859712,
2532
+ "nbytes": 95158272
2533
+ },
2534
+ {
2535
+ "name": "language_model.model.layers.20.experts.switch_glu.gate_proj.biases",
2536
+ "dtype": "bfloat16",
2537
+ "shape": [
2538
+ 128,
2539
+ 704,
2540
+ 44
2541
+ ],
2542
+ "bits": 0,
2543
+ "group_size": 64,
2544
+ "offset": 111017984,
2545
+ "nbytes": 7929856
2546
+ },
2547
+ {
2548
+ "name": "language_model.model.layers.20.experts.switch_glu.gate_proj.scales",
2549
+ "dtype": "bfloat16",
2550
+ "shape": [
2551
+ 128,
2552
+ 704,
2553
+ 44
2554
+ ],
2555
+ "bits": 0,
2556
+ "group_size": 64,
2557
+ "offset": 118947840,
2558
+ "nbytes": 7929856
2559
+ },
2560
+ {
2561
+ "name": "language_model.model.layers.20.experts.switch_glu.gate_proj.weight",
2562
+ "dtype": "uint32",
2563
+ "shape": [
2564
+ 128,
2565
+ 704,
2566
+ 264
2567
+ ],
2568
+ "bits": 3,
2569
+ "group_size": 64,
2570
+ "offset": 126877696,
2571
+ "nbytes": 95158272
2572
+ },
2573
+ {
2574
+ "name": "language_model.model.layers.20.experts.switch_glu.up_proj.biases",
2575
+ "dtype": "bfloat16",
2576
+ "shape": [
2577
+ 128,
2578
+ 704,
2579
+ 44
2580
+ ],
2581
+ "bits": 0,
2582
+ "group_size": 64,
2583
+ "offset": 222035968,
2584
+ "nbytes": 7929856
2585
+ },
2586
+ {
2587
+ "name": "language_model.model.layers.20.experts.switch_glu.up_proj.scales",
2588
+ "dtype": "bfloat16",
2589
+ "shape": [
2590
+ 128,
2591
+ 704,
2592
+ 44
2593
+ ],
2594
+ "bits": 0,
2595
+ "group_size": 64,
2596
+ "offset": 229965824,
2597
+ "nbytes": 7929856
2598
+ },
2599
+ {
2600
+ "name": "language_model.model.layers.20.experts.switch_glu.up_proj.weight",
2601
+ "dtype": "uint32",
2602
+ "shape": [
2603
+ 128,
2604
+ 704,
2605
+ 264
2606
+ ],
2607
+ "bits": 3,
2608
+ "group_size": 64,
2609
+ "offset": 237895680,
2610
+ "nbytes": 95158272
2611
+ }
2612
+ ]
2613
+ },
2614
+ {
2615
+ "layer": 21,
2616
+ "file": "layer_21.bin",
2617
+ "file_size": 333053952,
2618
+ "tensors": [
2619
+ {
2620
+ "name": "language_model.model.layers.21.experts.switch_glu.down_proj.biases",
2621
+ "dtype": "bfloat16",
2622
+ "shape": [
2623
+ 128,
2624
+ 2816,
2625
+ 11
2626
+ ],
2627
+ "bits": 0,
2628
+ "group_size": 64,
2629
+ "offset": 0,
2630
+ "nbytes": 7929856
2631
+ },
2632
+ {
2633
+ "name": "language_model.model.layers.21.experts.switch_glu.down_proj.scales",
2634
+ "dtype": "bfloat16",
2635
+ "shape": [
2636
+ 128,
2637
+ 2816,
2638
+ 11
2639
+ ],
2640
+ "bits": 0,
2641
+ "group_size": 64,
2642
+ "offset": 7929856,
2643
+ "nbytes": 7929856
2644
+ },
2645
+ {
2646
+ "name": "language_model.model.layers.21.experts.switch_glu.down_proj.weight",
2647
+ "dtype": "uint32",
2648
+ "shape": [
2649
+ 128,
2650
+ 2816,
2651
+ 66
2652
+ ],
2653
+ "bits": 3,
2654
+ "group_size": 64,
2655
+ "offset": 15859712,
2656
+ "nbytes": 95158272
2657
+ },
2658
+ {
2659
+ "name": "language_model.model.layers.21.experts.switch_glu.gate_proj.biases",
2660
+ "dtype": "bfloat16",
2661
+ "shape": [
2662
+ 128,
2663
+ 704,
2664
+ 44
2665
+ ],
2666
+ "bits": 0,
2667
+ "group_size": 64,
2668
+ "offset": 111017984,
2669
+ "nbytes": 7929856
2670
+ },
2671
+ {
2672
+ "name": "language_model.model.layers.21.experts.switch_glu.gate_proj.scales",
2673
+ "dtype": "bfloat16",
2674
+ "shape": [
2675
+ 128,
2676
+ 704,
2677
+ 44
2678
+ ],
2679
+ "bits": 0,
2680
+ "group_size": 64,
2681
+ "offset": 118947840,
2682
+ "nbytes": 7929856
2683
+ },
2684
+ {
2685
+ "name": "language_model.model.layers.21.experts.switch_glu.gate_proj.weight",
2686
+ "dtype": "uint32",
2687
+ "shape": [
2688
+ 128,
2689
+ 704,
2690
+ 264
2691
+ ],
2692
+ "bits": 3,
2693
+ "group_size": 64,
2694
+ "offset": 126877696,
2695
+ "nbytes": 95158272
2696
+ },
2697
+ {
2698
+ "name": "language_model.model.layers.21.experts.switch_glu.up_proj.biases",
2699
+ "dtype": "bfloat16",
2700
+ "shape": [
2701
+ 128,
2702
+ 704,
2703
+ 44
2704
+ ],
2705
+ "bits": 0,
2706
+ "group_size": 64,
2707
+ "offset": 222035968,
2708
+ "nbytes": 7929856
2709
+ },
2710
+ {
2711
+ "name": "language_model.model.layers.21.experts.switch_glu.up_proj.scales",
2712
+ "dtype": "bfloat16",
2713
+ "shape": [
2714
+ 128,
2715
+ 704,
2716
+ 44
2717
+ ],
2718
+ "bits": 0,
2719
+ "group_size": 64,
2720
+ "offset": 229965824,
2721
+ "nbytes": 7929856
2722
+ },
2723
+ {
2724
+ "name": "language_model.model.layers.21.experts.switch_glu.up_proj.weight",
2725
+ "dtype": "uint32",
2726
+ "shape": [
2727
+ 128,
2728
+ 704,
2729
+ 264
2730
+ ],
2731
+ "bits": 3,
2732
+ "group_size": 64,
2733
+ "offset": 237895680,
2734
+ "nbytes": 95158272
2735
+ }
2736
+ ]
2737
+ },
2738
+ {
2739
+ "layer": 22,
2740
+ "file": "layer_22.bin",
2741
+ "file_size": 333053952,
2742
+ "tensors": [
2743
+ {
2744
+ "name": "language_model.model.layers.22.experts.switch_glu.down_proj.biases",
2745
+ "dtype": "bfloat16",
2746
+ "shape": [
2747
+ 128,
2748
+ 2816,
2749
+ 11
2750
+ ],
2751
+ "bits": 0,
2752
+ "group_size": 64,
2753
+ "offset": 0,
2754
+ "nbytes": 7929856
2755
+ },
2756
+ {
2757
+ "name": "language_model.model.layers.22.experts.switch_glu.down_proj.scales",
2758
+ "dtype": "bfloat16",
2759
+ "shape": [
2760
+ 128,
2761
+ 2816,
2762
+ 11
2763
+ ],
2764
+ "bits": 0,
2765
+ "group_size": 64,
2766
+ "offset": 7929856,
2767
+ "nbytes": 7929856
2768
+ },
2769
+ {
2770
+ "name": "language_model.model.layers.22.experts.switch_glu.down_proj.weight",
2771
+ "dtype": "uint32",
2772
+ "shape": [
2773
+ 128,
2774
+ 2816,
2775
+ 66
2776
+ ],
2777
+ "bits": 3,
2778
+ "group_size": 64,
2779
+ "offset": 15859712,
2780
+ "nbytes": 95158272
2781
+ },
2782
+ {
2783
+ "name": "language_model.model.layers.22.experts.switch_glu.gate_proj.biases",
2784
+ "dtype": "bfloat16",
2785
+ "shape": [
2786
+ 128,
2787
+ 704,
2788
+ 44
2789
+ ],
2790
+ "bits": 0,
2791
+ "group_size": 64,
2792
+ "offset": 111017984,
2793
+ "nbytes": 7929856
2794
+ },
2795
+ {
2796
+ "name": "language_model.model.layers.22.experts.switch_glu.gate_proj.scales",
2797
+ "dtype": "bfloat16",
2798
+ "shape": [
2799
+ 128,
2800
+ 704,
2801
+ 44
2802
+ ],
2803
+ "bits": 0,
2804
+ "group_size": 64,
2805
+ "offset": 118947840,
2806
+ "nbytes": 7929856
2807
+ },
2808
+ {
2809
+ "name": "language_model.model.layers.22.experts.switch_glu.gate_proj.weight",
2810
+ "dtype": "uint32",
2811
+ "shape": [
2812
+ 128,
2813
+ 704,
2814
+ 264
2815
+ ],
2816
+ "bits": 3,
2817
+ "group_size": 64,
2818
+ "offset": 126877696,
2819
+ "nbytes": 95158272
2820
+ },
2821
+ {
2822
+ "name": "language_model.model.layers.22.experts.switch_glu.up_proj.biases",
2823
+ "dtype": "bfloat16",
2824
+ "shape": [
2825
+ 128,
2826
+ 704,
2827
+ 44
2828
+ ],
2829
+ "bits": 0,
2830
+ "group_size": 64,
2831
+ "offset": 222035968,
2832
+ "nbytes": 7929856
2833
+ },
2834
+ {
2835
+ "name": "language_model.model.layers.22.experts.switch_glu.up_proj.scales",
2836
+ "dtype": "bfloat16",
2837
+ "shape": [
2838
+ 128,
2839
+ 704,
2840
+ 44
2841
+ ],
2842
+ "bits": 0,
2843
+ "group_size": 64,
2844
+ "offset": 229965824,
2845
+ "nbytes": 7929856
2846
+ },
2847
+ {
2848
+ "name": "language_model.model.layers.22.experts.switch_glu.up_proj.weight",
2849
+ "dtype": "uint32",
2850
+ "shape": [
2851
+ 128,
2852
+ 704,
2853
+ 264
2854
+ ],
2855
+ "bits": 3,
2856
+ "group_size": 64,
2857
+ "offset": 237895680,
2858
+ "nbytes": 95158272
2859
+ }
2860
+ ]
2861
+ },
2862
+ {
2863
+ "layer": 23,
2864
+ "file": "layer_23.bin",
2865
+ "file_size": 333053952,
2866
+ "tensors": [
2867
+ {
2868
+ "name": "language_model.model.layers.23.experts.switch_glu.down_proj.biases",
2869
+ "dtype": "bfloat16",
2870
+ "shape": [
2871
+ 128,
2872
+ 2816,
2873
+ 11
2874
+ ],
2875
+ "bits": 0,
2876
+ "group_size": 64,
2877
+ "offset": 0,
2878
+ "nbytes": 7929856
2879
+ },
2880
+ {
2881
+ "name": "language_model.model.layers.23.experts.switch_glu.down_proj.scales",
2882
+ "dtype": "bfloat16",
2883
+ "shape": [
2884
+ 128,
2885
+ 2816,
2886
+ 11
2887
+ ],
2888
+ "bits": 0,
2889
+ "group_size": 64,
2890
+ "offset": 7929856,
2891
+ "nbytes": 7929856
2892
+ },
2893
+ {
2894
+ "name": "language_model.model.layers.23.experts.switch_glu.down_proj.weight",
2895
+ "dtype": "uint32",
2896
+ "shape": [
2897
+ 128,
2898
+ 2816,
2899
+ 66
2900
+ ],
2901
+ "bits": 3,
2902
+ "group_size": 64,
2903
+ "offset": 15859712,
2904
+ "nbytes": 95158272
2905
+ },
2906
+ {
2907
+ "name": "language_model.model.layers.23.experts.switch_glu.gate_proj.biases",
2908
+ "dtype": "bfloat16",
2909
+ "shape": [
2910
+ 128,
2911
+ 704,
2912
+ 44
2913
+ ],
2914
+ "bits": 0,
2915
+ "group_size": 64,
2916
+ "offset": 111017984,
2917
+ "nbytes": 7929856
2918
+ },
2919
+ {
2920
+ "name": "language_model.model.layers.23.experts.switch_glu.gate_proj.scales",
2921
+ "dtype": "bfloat16",
2922
+ "shape": [
2923
+ 128,
2924
+ 704,
2925
+ 44
2926
+ ],
2927
+ "bits": 0,
2928
+ "group_size": 64,
2929
+ "offset": 118947840,
2930
+ "nbytes": 7929856
2931
+ },
2932
+ {
2933
+ "name": "language_model.model.layers.23.experts.switch_glu.gate_proj.weight",
2934
+ "dtype": "uint32",
2935
+ "shape": [
2936
+ 128,
2937
+ 704,
2938
+ 264
2939
+ ],
2940
+ "bits": 3,
2941
+ "group_size": 64,
2942
+ "offset": 126877696,
2943
+ "nbytes": 95158272
2944
+ },
2945
+ {
2946
+ "name": "language_model.model.layers.23.experts.switch_glu.up_proj.biases",
2947
+ "dtype": "bfloat16",
2948
+ "shape": [
2949
+ 128,
2950
+ 704,
2951
+ 44
2952
+ ],
2953
+ "bits": 0,
2954
+ "group_size": 64,
2955
+ "offset": 222035968,
2956
+ "nbytes": 7929856
2957
+ },
2958
+ {
2959
+ "name": "language_model.model.layers.23.experts.switch_glu.up_proj.scales",
2960
+ "dtype": "bfloat16",
2961
+ "shape": [
2962
+ 128,
2963
+ 704,
2964
+ 44
2965
+ ],
2966
+ "bits": 0,
2967
+ "group_size": 64,
2968
+ "offset": 229965824,
2969
+ "nbytes": 7929856
2970
+ },
2971
+ {
2972
+ "name": "language_model.model.layers.23.experts.switch_glu.up_proj.weight",
2973
+ "dtype": "uint32",
2974
+ "shape": [
2975
+ 128,
2976
+ 704,
2977
+ 264
2978
+ ],
2979
+ "bits": 3,
2980
+ "group_size": 64,
2981
+ "offset": 237895680,
2982
+ "nbytes": 95158272
2983
+ }
2984
+ ]
2985
+ },
2986
+ {
2987
+ "layer": 24,
2988
+ "file": "layer_24.bin",
2989
+ "file_size": 333053952,
2990
+ "tensors": [
2991
+ {
2992
+ "name": "language_model.model.layers.24.experts.switch_glu.down_proj.biases",
2993
+ "dtype": "bfloat16",
2994
+ "shape": [
2995
+ 128,
2996
+ 2816,
2997
+ 11
2998
+ ],
2999
+ "bits": 0,
3000
+ "group_size": 64,
3001
+ "offset": 0,
3002
+ "nbytes": 7929856
3003
+ },
3004
+ {
3005
+ "name": "language_model.model.layers.24.experts.switch_glu.down_proj.scales",
3006
+ "dtype": "bfloat16",
3007
+ "shape": [
3008
+ 128,
3009
+ 2816,
3010
+ 11
3011
+ ],
3012
+ "bits": 0,
3013
+ "group_size": 64,
3014
+ "offset": 7929856,
3015
+ "nbytes": 7929856
3016
+ },
3017
+ {
3018
+ "name": "language_model.model.layers.24.experts.switch_glu.down_proj.weight",
3019
+ "dtype": "uint32",
3020
+ "shape": [
3021
+ 128,
3022
+ 2816,
3023
+ 66
3024
+ ],
3025
+ "bits": 3,
3026
+ "group_size": 64,
3027
+ "offset": 15859712,
3028
+ "nbytes": 95158272
3029
+ },
3030
+ {
3031
+ "name": "language_model.model.layers.24.experts.switch_glu.gate_proj.biases",
3032
+ "dtype": "bfloat16",
3033
+ "shape": [
3034
+ 128,
3035
+ 704,
3036
+ 44
3037
+ ],
3038
+ "bits": 0,
3039
+ "group_size": 64,
3040
+ "offset": 111017984,
3041
+ "nbytes": 7929856
3042
+ },
3043
+ {
3044
+ "name": "language_model.model.layers.24.experts.switch_glu.gate_proj.scales",
3045
+ "dtype": "bfloat16",
3046
+ "shape": [
3047
+ 128,
3048
+ 704,
3049
+ 44
3050
+ ],
3051
+ "bits": 0,
3052
+ "group_size": 64,
3053
+ "offset": 118947840,
3054
+ "nbytes": 7929856
3055
+ },
3056
+ {
3057
+ "name": "language_model.model.layers.24.experts.switch_glu.gate_proj.weight",
3058
+ "dtype": "uint32",
3059
+ "shape": [
3060
+ 128,
3061
+ 704,
3062
+ 264
3063
+ ],
3064
+ "bits": 3,
3065
+ "group_size": 64,
3066
+ "offset": 126877696,
3067
+ "nbytes": 95158272
3068
+ },
3069
+ {
3070
+ "name": "language_model.model.layers.24.experts.switch_glu.up_proj.biases",
3071
+ "dtype": "bfloat16",
3072
+ "shape": [
3073
+ 128,
3074
+ 704,
3075
+ 44
3076
+ ],
3077
+ "bits": 0,
3078
+ "group_size": 64,
3079
+ "offset": 222035968,
3080
+ "nbytes": 7929856
3081
+ },
3082
+ {
3083
+ "name": "language_model.model.layers.24.experts.switch_glu.up_proj.scales",
3084
+ "dtype": "bfloat16",
3085
+ "shape": [
3086
+ 128,
3087
+ 704,
3088
+ 44
3089
+ ],
3090
+ "bits": 0,
3091
+ "group_size": 64,
3092
+ "offset": 229965824,
3093
+ "nbytes": 7929856
3094
+ },
3095
+ {
3096
+ "name": "language_model.model.layers.24.experts.switch_glu.up_proj.weight",
3097
+ "dtype": "uint32",
3098
+ "shape": [
3099
+ 128,
3100
+ 704,
3101
+ 264
3102
+ ],
3103
+ "bits": 3,
3104
+ "group_size": 64,
3105
+ "offset": 237895680,
3106
+ "nbytes": 95158272
3107
+ }
3108
+ ]
3109
+ },
3110
+ {
3111
+ "layer": 25,
3112
+ "file": "layer_25.bin",
3113
+ "file_size": 333053952,
3114
+ "tensors": [
3115
+ {
3116
+ "name": "language_model.model.layers.25.experts.switch_glu.down_proj.biases",
3117
+ "dtype": "bfloat16",
3118
+ "shape": [
3119
+ 128,
3120
+ 2816,
3121
+ 11
3122
+ ],
3123
+ "bits": 0,
3124
+ "group_size": 64,
3125
+ "offset": 0,
3126
+ "nbytes": 7929856
3127
+ },
3128
+ {
3129
+ "name": "language_model.model.layers.25.experts.switch_glu.down_proj.scales",
3130
+ "dtype": "bfloat16",
3131
+ "shape": [
3132
+ 128,
3133
+ 2816,
3134
+ 11
3135
+ ],
3136
+ "bits": 0,
3137
+ "group_size": 64,
3138
+ "offset": 7929856,
3139
+ "nbytes": 7929856
3140
+ },
3141
+ {
3142
+ "name": "language_model.model.layers.25.experts.switch_glu.down_proj.weight",
3143
+ "dtype": "uint32",
3144
+ "shape": [
3145
+ 128,
3146
+ 2816,
3147
+ 66
3148
+ ],
3149
+ "bits": 3,
3150
+ "group_size": 64,
3151
+ "offset": 15859712,
3152
+ "nbytes": 95158272
3153
+ },
3154
+ {
3155
+ "name": "language_model.model.layers.25.experts.switch_glu.gate_proj.biases",
3156
+ "dtype": "bfloat16",
3157
+ "shape": [
3158
+ 128,
3159
+ 704,
3160
+ 44
3161
+ ],
3162
+ "bits": 0,
3163
+ "group_size": 64,
3164
+ "offset": 111017984,
3165
+ "nbytes": 7929856
3166
+ },
3167
+ {
3168
+ "name": "language_model.model.layers.25.experts.switch_glu.gate_proj.scales",
3169
+ "dtype": "bfloat16",
3170
+ "shape": [
3171
+ 128,
3172
+ 704,
3173
+ 44
3174
+ ],
3175
+ "bits": 0,
3176
+ "group_size": 64,
3177
+ "offset": 118947840,
3178
+ "nbytes": 7929856
3179
+ },
3180
+ {
3181
+ "name": "language_model.model.layers.25.experts.switch_glu.gate_proj.weight",
3182
+ "dtype": "uint32",
3183
+ "shape": [
3184
+ 128,
3185
+ 704,
3186
+ 264
3187
+ ],
3188
+ "bits": 3,
3189
+ "group_size": 64,
3190
+ "offset": 126877696,
3191
+ "nbytes": 95158272
3192
+ },
3193
+ {
3194
+ "name": "language_model.model.layers.25.experts.switch_glu.up_proj.biases",
3195
+ "dtype": "bfloat16",
3196
+ "shape": [
3197
+ 128,
3198
+ 704,
3199
+ 44
3200
+ ],
3201
+ "bits": 0,
3202
+ "group_size": 64,
3203
+ "offset": 222035968,
3204
+ "nbytes": 7929856
3205
+ },
3206
+ {
3207
+ "name": "language_model.model.layers.25.experts.switch_glu.up_proj.scales",
3208
+ "dtype": "bfloat16",
3209
+ "shape": [
3210
+ 128,
3211
+ 704,
3212
+ 44
3213
+ ],
3214
+ "bits": 0,
3215
+ "group_size": 64,
3216
+ "offset": 229965824,
3217
+ "nbytes": 7929856
3218
+ },
3219
+ {
3220
+ "name": "language_model.model.layers.25.experts.switch_glu.up_proj.weight",
3221
+ "dtype": "uint32",
3222
+ "shape": [
3223
+ 128,
3224
+ 704,
3225
+ 264
3226
+ ],
3227
+ "bits": 3,
3228
+ "group_size": 64,
3229
+ "offset": 237895680,
3230
+ "nbytes": 95158272
3231
+ }
3232
+ ]
3233
+ },
3234
+ {
3235
+ "layer": 26,
3236
+ "file": "layer_26.bin",
3237
+ "file_size": 333053952,
3238
+ "tensors": [
3239
+ {
3240
+ "name": "language_model.model.layers.26.experts.switch_glu.down_proj.biases",
3241
+ "dtype": "bfloat16",
3242
+ "shape": [
3243
+ 128,
3244
+ 2816,
3245
+ 11
3246
+ ],
3247
+ "bits": 0,
3248
+ "group_size": 64,
3249
+ "offset": 0,
3250
+ "nbytes": 7929856
3251
+ },
3252
+ {
3253
+ "name": "language_model.model.layers.26.experts.switch_glu.down_proj.scales",
3254
+ "dtype": "bfloat16",
3255
+ "shape": [
3256
+ 128,
3257
+ 2816,
3258
+ 11
3259
+ ],
3260
+ "bits": 0,
3261
+ "group_size": 64,
3262
+ "offset": 7929856,
3263
+ "nbytes": 7929856
3264
+ },
3265
+ {
3266
+ "name": "language_model.model.layers.26.experts.switch_glu.down_proj.weight",
3267
+ "dtype": "uint32",
3268
+ "shape": [
3269
+ 128,
3270
+ 2816,
3271
+ 66
3272
+ ],
3273
+ "bits": 3,
3274
+ "group_size": 64,
3275
+ "offset": 15859712,
3276
+ "nbytes": 95158272
3277
+ },
3278
+ {
3279
+ "name": "language_model.model.layers.26.experts.switch_glu.gate_proj.biases",
3280
+ "dtype": "bfloat16",
3281
+ "shape": [
3282
+ 128,
3283
+ 704,
3284
+ 44
3285
+ ],
3286
+ "bits": 0,
3287
+ "group_size": 64,
3288
+ "offset": 111017984,
3289
+ "nbytes": 7929856
3290
+ },
3291
+ {
3292
+ "name": "language_model.model.layers.26.experts.switch_glu.gate_proj.scales",
3293
+ "dtype": "bfloat16",
3294
+ "shape": [
3295
+ 128,
3296
+ 704,
3297
+ 44
3298
+ ],
3299
+ "bits": 0,
3300
+ "group_size": 64,
3301
+ "offset": 118947840,
3302
+ "nbytes": 7929856
3303
+ },
3304
+ {
3305
+ "name": "language_model.model.layers.26.experts.switch_glu.gate_proj.weight",
3306
+ "dtype": "uint32",
3307
+ "shape": [
3308
+ 128,
3309
+ 704,
3310
+ 264
3311
+ ],
3312
+ "bits": 3,
3313
+ "group_size": 64,
3314
+ "offset": 126877696,
3315
+ "nbytes": 95158272
3316
+ },
3317
+ {
3318
+ "name": "language_model.model.layers.26.experts.switch_glu.up_proj.biases",
3319
+ "dtype": "bfloat16",
3320
+ "shape": [
3321
+ 128,
3322
+ 704,
3323
+ 44
3324
+ ],
3325
+ "bits": 0,
3326
+ "group_size": 64,
3327
+ "offset": 222035968,
3328
+ "nbytes": 7929856
3329
+ },
3330
+ {
3331
+ "name": "language_model.model.layers.26.experts.switch_glu.up_proj.scales",
3332
+ "dtype": "bfloat16",
3333
+ "shape": [
3334
+ 128,
3335
+ 704,
3336
+ 44
3337
+ ],
3338
+ "bits": 0,
3339
+ "group_size": 64,
3340
+ "offset": 229965824,
3341
+ "nbytes": 7929856
3342
+ },
3343
+ {
3344
+ "name": "language_model.model.layers.26.experts.switch_glu.up_proj.weight",
3345
+ "dtype": "uint32",
3346
+ "shape": [
3347
+ 128,
3348
+ 704,
3349
+ 264
3350
+ ],
3351
+ "bits": 3,
3352
+ "group_size": 64,
3353
+ "offset": 237895680,
3354
+ "nbytes": 95158272
3355
+ }
3356
+ ]
3357
+ },
3358
+ {
3359
+ "layer": 27,
3360
+ "file": "layer_27.bin",
3361
+ "file_size": 333053952,
3362
+ "tensors": [
3363
+ {
3364
+ "name": "language_model.model.layers.27.experts.switch_glu.down_proj.biases",
3365
+ "dtype": "bfloat16",
3366
+ "shape": [
3367
+ 128,
3368
+ 2816,
3369
+ 11
3370
+ ],
3371
+ "bits": 0,
3372
+ "group_size": 64,
3373
+ "offset": 0,
3374
+ "nbytes": 7929856
3375
+ },
3376
+ {
3377
+ "name": "language_model.model.layers.27.experts.switch_glu.down_proj.scales",
3378
+ "dtype": "bfloat16",
3379
+ "shape": [
3380
+ 128,
3381
+ 2816,
3382
+ 11
3383
+ ],
3384
+ "bits": 0,
3385
+ "group_size": 64,
3386
+ "offset": 7929856,
3387
+ "nbytes": 7929856
3388
+ },
3389
+ {
3390
+ "name": "language_model.model.layers.27.experts.switch_glu.down_proj.weight",
3391
+ "dtype": "uint32",
3392
+ "shape": [
3393
+ 128,
3394
+ 2816,
3395
+ 66
3396
+ ],
3397
+ "bits": 3,
3398
+ "group_size": 64,
3399
+ "offset": 15859712,
3400
+ "nbytes": 95158272
3401
+ },
3402
+ {
3403
+ "name": "language_model.model.layers.27.experts.switch_glu.gate_proj.biases",
3404
+ "dtype": "bfloat16",
3405
+ "shape": [
3406
+ 128,
3407
+ 704,
3408
+ 44
3409
+ ],
3410
+ "bits": 0,
3411
+ "group_size": 64,
3412
+ "offset": 111017984,
3413
+ "nbytes": 7929856
3414
+ },
3415
+ {
3416
+ "name": "language_model.model.layers.27.experts.switch_glu.gate_proj.scales",
3417
+ "dtype": "bfloat16",
3418
+ "shape": [
3419
+ 128,
3420
+ 704,
3421
+ 44
3422
+ ],
3423
+ "bits": 0,
3424
+ "group_size": 64,
3425
+ "offset": 118947840,
3426
+ "nbytes": 7929856
3427
+ },
3428
+ {
3429
+ "name": "language_model.model.layers.27.experts.switch_glu.gate_proj.weight",
3430
+ "dtype": "uint32",
3431
+ "shape": [
3432
+ 128,
3433
+ 704,
3434
+ 264
3435
+ ],
3436
+ "bits": 3,
3437
+ "group_size": 64,
3438
+ "offset": 126877696,
3439
+ "nbytes": 95158272
3440
+ },
3441
+ {
3442
+ "name": "language_model.model.layers.27.experts.switch_glu.up_proj.biases",
3443
+ "dtype": "bfloat16",
3444
+ "shape": [
3445
+ 128,
3446
+ 704,
3447
+ 44
3448
+ ],
3449
+ "bits": 0,
3450
+ "group_size": 64,
3451
+ "offset": 222035968,
3452
+ "nbytes": 7929856
3453
+ },
3454
+ {
3455
+ "name": "language_model.model.layers.27.experts.switch_glu.up_proj.scales",
3456
+ "dtype": "bfloat16",
3457
+ "shape": [
3458
+ 128,
3459
+ 704,
3460
+ 44
3461
+ ],
3462
+ "bits": 0,
3463
+ "group_size": 64,
3464
+ "offset": 229965824,
3465
+ "nbytes": 7929856
3466
+ },
3467
+ {
3468
+ "name": "language_model.model.layers.27.experts.switch_glu.up_proj.weight",
3469
+ "dtype": "uint32",
3470
+ "shape": [
3471
+ 128,
3472
+ 704,
3473
+ 264
3474
+ ],
3475
+ "bits": 3,
3476
+ "group_size": 64,
3477
+ "offset": 237895680,
3478
+ "nbytes": 95158272
3479
+ }
3480
+ ]
3481
+ },
3482
+ {
3483
+ "layer": 28,
3484
+ "file": "layer_28.bin",
3485
+ "file_size": 333053952,
3486
+ "tensors": [
3487
+ {
3488
+ "name": "language_model.model.layers.28.experts.switch_glu.down_proj.biases",
3489
+ "dtype": "bfloat16",
3490
+ "shape": [
3491
+ 128,
3492
+ 2816,
3493
+ 11
3494
+ ],
3495
+ "bits": 0,
3496
+ "group_size": 64,
3497
+ "offset": 0,
3498
+ "nbytes": 7929856
3499
+ },
3500
+ {
3501
+ "name": "language_model.model.layers.28.experts.switch_glu.down_proj.scales",
3502
+ "dtype": "bfloat16",
3503
+ "shape": [
3504
+ 128,
3505
+ 2816,
3506
+ 11
3507
+ ],
3508
+ "bits": 0,
3509
+ "group_size": 64,
3510
+ "offset": 7929856,
3511
+ "nbytes": 7929856
3512
+ },
3513
+ {
3514
+ "name": "language_model.model.layers.28.experts.switch_glu.down_proj.weight",
3515
+ "dtype": "uint32",
3516
+ "shape": [
3517
+ 128,
3518
+ 2816,
3519
+ 66
3520
+ ],
3521
+ "bits": 3,
3522
+ "group_size": 64,
3523
+ "offset": 15859712,
3524
+ "nbytes": 95158272
3525
+ },
3526
+ {
3527
+ "name": "language_model.model.layers.28.experts.switch_glu.gate_proj.biases",
3528
+ "dtype": "bfloat16",
3529
+ "shape": [
3530
+ 128,
3531
+ 704,
3532
+ 44
3533
+ ],
3534
+ "bits": 0,
3535
+ "group_size": 64,
3536
+ "offset": 111017984,
3537
+ "nbytes": 7929856
3538
+ },
3539
+ {
3540
+ "name": "language_model.model.layers.28.experts.switch_glu.gate_proj.scales",
3541
+ "dtype": "bfloat16",
3542
+ "shape": [
3543
+ 128,
3544
+ 704,
3545
+ 44
3546
+ ],
3547
+ "bits": 0,
3548
+ "group_size": 64,
3549
+ "offset": 118947840,
3550
+ "nbytes": 7929856
3551
+ },
3552
+ {
3553
+ "name": "language_model.model.layers.28.experts.switch_glu.gate_proj.weight",
3554
+ "dtype": "uint32",
3555
+ "shape": [
3556
+ 128,
3557
+ 704,
3558
+ 264
3559
+ ],
3560
+ "bits": 3,
3561
+ "group_size": 64,
3562
+ "offset": 126877696,
3563
+ "nbytes": 95158272
3564
+ },
3565
+ {
3566
+ "name": "language_model.model.layers.28.experts.switch_glu.up_proj.biases",
3567
+ "dtype": "bfloat16",
3568
+ "shape": [
3569
+ 128,
3570
+ 704,
3571
+ 44
3572
+ ],
3573
+ "bits": 0,
3574
+ "group_size": 64,
3575
+ "offset": 222035968,
3576
+ "nbytes": 7929856
3577
+ },
3578
+ {
3579
+ "name": "language_model.model.layers.28.experts.switch_glu.up_proj.scales",
3580
+ "dtype": "bfloat16",
3581
+ "shape": [
3582
+ 128,
3583
+ 704,
3584
+ 44
3585
+ ],
3586
+ "bits": 0,
3587
+ "group_size": 64,
3588
+ "offset": 229965824,
3589
+ "nbytes": 7929856
3590
+ },
3591
+ {
3592
+ "name": "language_model.model.layers.28.experts.switch_glu.up_proj.weight",
3593
+ "dtype": "uint32",
3594
+ "shape": [
3595
+ 128,
3596
+ 704,
3597
+ 264
3598
+ ],
3599
+ "bits": 3,
3600
+ "group_size": 64,
3601
+ "offset": 237895680,
3602
+ "nbytes": 95158272
3603
+ }
3604
+ ]
3605
+ },
3606
+ {
3607
+ "layer": 29,
3608
+ "file": "layer_29.bin",
3609
+ "file_size": 333053952,
3610
+ "tensors": [
3611
+ {
3612
+ "name": "language_model.model.layers.29.experts.switch_glu.down_proj.biases",
3613
+ "dtype": "bfloat16",
3614
+ "shape": [
3615
+ 128,
3616
+ 2816,
3617
+ 11
3618
+ ],
3619
+ "bits": 0,
3620
+ "group_size": 64,
3621
+ "offset": 0,
3622
+ "nbytes": 7929856
3623
+ },
3624
+ {
3625
+ "name": "language_model.model.layers.29.experts.switch_glu.down_proj.scales",
3626
+ "dtype": "bfloat16",
3627
+ "shape": [
3628
+ 128,
3629
+ 2816,
3630
+ 11
3631
+ ],
3632
+ "bits": 0,
3633
+ "group_size": 64,
3634
+ "offset": 7929856,
3635
+ "nbytes": 7929856
3636
+ },
3637
+ {
3638
+ "name": "language_model.model.layers.29.experts.switch_glu.down_proj.weight",
3639
+ "dtype": "uint32",
3640
+ "shape": [
3641
+ 128,
3642
+ 2816,
3643
+ 66
3644
+ ],
3645
+ "bits": 3,
3646
+ "group_size": 64,
3647
+ "offset": 15859712,
3648
+ "nbytes": 95158272
3649
+ },
3650
+ {
3651
+ "name": "language_model.model.layers.29.experts.switch_glu.gate_proj.biases",
3652
+ "dtype": "bfloat16",
3653
+ "shape": [
3654
+ 128,
3655
+ 704,
3656
+ 44
3657
+ ],
3658
+ "bits": 0,
3659
+ "group_size": 64,
3660
+ "offset": 111017984,
3661
+ "nbytes": 7929856
3662
+ },
3663
+ {
3664
+ "name": "language_model.model.layers.29.experts.switch_glu.gate_proj.scales",
3665
+ "dtype": "bfloat16",
3666
+ "shape": [
3667
+ 128,
3668
+ 704,
3669
+ 44
3670
+ ],
3671
+ "bits": 0,
3672
+ "group_size": 64,
3673
+ "offset": 118947840,
3674
+ "nbytes": 7929856
3675
+ },
3676
+ {
3677
+ "name": "language_model.model.layers.29.experts.switch_glu.gate_proj.weight",
3678
+ "dtype": "uint32",
3679
+ "shape": [
3680
+ 128,
3681
+ 704,
3682
+ 264
3683
+ ],
3684
+ "bits": 3,
3685
+ "group_size": 64,
3686
+ "offset": 126877696,
3687
+ "nbytes": 95158272
3688
+ },
3689
+ {
3690
+ "name": "language_model.model.layers.29.experts.switch_glu.up_proj.biases",
3691
+ "dtype": "bfloat16",
3692
+ "shape": [
3693
+ 128,
3694
+ 704,
3695
+ 44
3696
+ ],
3697
+ "bits": 0,
3698
+ "group_size": 64,
3699
+ "offset": 222035968,
3700
+ "nbytes": 7929856
3701
+ },
3702
+ {
3703
+ "name": "language_model.model.layers.29.experts.switch_glu.up_proj.scales",
3704
+ "dtype": "bfloat16",
3705
+ "shape": [
3706
+ 128,
3707
+ 704,
3708
+ 44
3709
+ ],
3710
+ "bits": 0,
3711
+ "group_size": 64,
3712
+ "offset": 229965824,
3713
+ "nbytes": 7929856
3714
+ },
3715
+ {
3716
+ "name": "language_model.model.layers.29.experts.switch_glu.up_proj.weight",
3717
+ "dtype": "uint32",
3718
+ "shape": [
3719
+ 128,
3720
+ 704,
3721
+ 264
3722
+ ],
3723
+ "bits": 3,
3724
+ "group_size": 64,
3725
+ "offset": 237895680,
3726
+ "nbytes": 95158272
3727
+ }
3728
+ ]
3729
+ }
3730
+ ],
3731
+ "notes": [
3732
+ "Preserves quantized tensors exactly as stored in the MLX checkpoint.",
3733
+ "This is a mixed-precision expert export; no re-quantization is performed."
3734
+ ]
3735
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
tokenizer_config.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "image_token": "<|image|>",
17
+ "is_local": true,
18
+ "mask_token": "<mask>",
19
+ "model_max_length": 1000000000000000019884624838656,
20
+ "model_specific_special_tokens": {
21
+ "audio_token": "<|audio|>",
22
+ "boa_token": "<|audio>",
23
+ "boi_token": "<|image>",
24
+ "eoa_token": "<audio|>",
25
+ "eoc_token": "<channel|>",
26
+ "eoi_token": "<image|>",
27
+ "eot_token": "<turn|>",
28
+ "escape_token": "<|\"|>",
29
+ "etc_token": "<tool_call|>",
30
+ "etd_token": "<tool|>",
31
+ "etr_token": "<tool_response|>",
32
+ "image_token": "<|image|>",
33
+ "soc_token": "<|channel>",
34
+ "sot_token": "<|turn>",
35
+ "stc_token": "<|tool_call>",
36
+ "std_token": "<|tool>",
37
+ "str_token": "<|tool_response>",
38
+ "think_token": "<|think|>",
39
+ "video_token": "<|video|>"
40
+ },
41
+ "pad_token": "<pad>",
42
+ "padding_side": "left",
43
+ "processor_class": "Gemma4Processor",
44
+ "response_schema": {
45
+ "properties": {
46
+ "content": {
47
+ "type": "string"
48
+ },
49
+ "role": {
50
+ "const": "assistant"
51
+ },
52
+ "thinking": {
53
+ "type": "string"
54
+ },
55
+ "tool_calls": {
56
+ "items": {
57
+ "properties": {
58
+ "function": {
59
+ "properties": {
60
+ "arguments": {
61
+ "additionalProperties": {},
62
+ "type": "object",
63
+ "x-parser": "gemma4-tool-call"
64
+ },
65
+ "name": {
66
+ "type": "string"
67
+ }
68
+ },
69
+ "type": "object",
70
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
71
+ },
72
+ "type": {
73
+ "const": "function"
74
+ }
75
+ },
76
+ "type": "object"
77
+ },
78
+ "type": "array",
79
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
80
+ }
81
+ },
82
+ "type": "object",
83
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
84
+ },
85
+ "soc_token": "<|channel>",
86
+ "sot_token": "<|turn>",
87
+ "stc_token": "<|tool_call>",
88
+ "std_token": "<|tool>",
89
+ "str_token": "<|tool_response>",
90
+ "think_token": "<|think|>",
91
+ "tokenizer_class": "GemmaTokenizer",
92
+ "unk_token": "<unk>",
93
+ "video_token": "<|video|>"
94
+ }