more log files
Browse files- main.log +0 -0
- zephyr_int8.txt +651 -0
main.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
zephyr_int8.txt
ADDED
@@ -0,0 +1,651 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
(dream) tb@IBM-PF38WZKF:~/funstreams/AI$ python llama.cpp/convert.py zephyr-7b-beta --outfile zephyr_int8.gguf --outtype q8_0
|
2 |
+
Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors
|
3 |
+
Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors
|
4 |
+
Loading model file zephyr-7b-beta/model-00002-of-00008.safetensors
|
5 |
+
Loading model file zephyr-7b-beta/model-00003-of-00008.safetensors
|
6 |
+
Loading model file zephyr-7b-beta/model-00004-of-00008.safetensors
|
7 |
+
Loading model file zephyr-7b-beta/model-00005-of-00008.safetensors
|
8 |
+
Loading model file zephyr-7b-beta/model-00006-of-00008.safetensors
|
9 |
+
Loading model file zephyr-7b-beta/model-00007-of-00008.safetensors
|
10 |
+
Loading model file zephyr-7b-beta/model-00008-of-00008.safetensors
|
11 |
+
params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=32768, n_ff=14336, n_head=32, n_head_kv=8, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=<GGMLFileType.MostlyQ8_0: 7>, path_model=PosixPath('zephyr-7b-beta'))
|
12 |
+
32000 32000
|
13 |
+
Vocab info: <VocabLoader with 32000 base tokens and 0 added tokens>
|
14 |
+
Special vocab info: <SpecialVocab with 58980 merges, special tokens {'bos': 1, 'eos': 2, 'unk': 0, 'pad': 2}, add special tokens unset>
|
15 |
+
Permuting layer 0
|
16 |
+
Permuting layer 1
|
17 |
+
Permuting layer 2
|
18 |
+
Permuting layer 3
|
19 |
+
Permuting layer 4
|
20 |
+
Permuting layer 5
|
21 |
+
Permuting layer 6
|
22 |
+
Permuting layer 7
|
23 |
+
Permuting layer 8
|
24 |
+
Permuting layer 9
|
25 |
+
Permuting layer 10
|
26 |
+
Permuting layer 11
|
27 |
+
Permuting layer 12
|
28 |
+
Permuting layer 13
|
29 |
+
Permuting layer 14
|
30 |
+
Permuting layer 15
|
31 |
+
Permuting layer 16
|
32 |
+
Permuting layer 17
|
33 |
+
Permuting layer 18
|
34 |
+
Permuting layer 19
|
35 |
+
Permuting layer 20
|
36 |
+
Permuting layer 21
|
37 |
+
Permuting layer 22
|
38 |
+
Permuting layer 23
|
39 |
+
Permuting layer 24
|
40 |
+
Permuting layer 25
|
41 |
+
Permuting layer 26
|
42 |
+
Permuting layer 27
|
43 |
+
Permuting layer 28
|
44 |
+
Permuting layer 29
|
45 |
+
Permuting layer 30
|
46 |
+
Permuting layer 31
|
47 |
+
model.embed_tokens.weight -> token_embd.weight | BF16 | [32000, 4096]
|
48 |
+
model.layers.0.input_layernorm.weight -> blk.0.attn_norm.weight | BF16 | [4096]
|
49 |
+
model.layers.0.mlp.down_proj.weight -> blk.0.ffn_down.weight | BF16 | [4096, 14336]
|
50 |
+
model.layers.0.mlp.gate_proj.weight -> blk.0.ffn_gate.weight | BF16 | [14336, 4096]
|
51 |
+
model.layers.0.mlp.up_proj.weight -> blk.0.ffn_up.weight | BF16 | [14336, 4096]
|
52 |
+
model.layers.0.post_attention_layernorm.weight -> blk.0.ffn_norm.weight | BF16 | [4096]
|
53 |
+
model.layers.0.self_attn.k_proj.weight -> blk.0.attn_k.weight | BF16 | [1024, 4096]
|
54 |
+
model.layers.0.self_attn.o_proj.weight -> blk.0.attn_output.weight | BF16 | [4096, 4096]
|
55 |
+
model.layers.0.self_attn.q_proj.weight -> blk.0.attn_q.weight | BF16 | [4096, 4096]
|
56 |
+
model.layers.0.self_attn.v_proj.weight -> blk.0.attn_v.weight | BF16 | [1024, 4096]
|
57 |
+
model.layers.1.input_layernorm.weight -> blk.1.attn_norm.weight | BF16 | [4096]
|
58 |
+
model.layers.1.mlp.down_proj.weight -> blk.1.ffn_down.weight | BF16 | [4096, 14336]
|
59 |
+
model.layers.1.mlp.gate_proj.weight -> blk.1.ffn_gate.weight | BF16 | [14336, 4096]
|
60 |
+
model.layers.1.mlp.up_proj.weight -> blk.1.ffn_up.weight | BF16 | [14336, 4096]
|
61 |
+
model.layers.1.post_attention_layernorm.weight -> blk.1.ffn_norm.weight | BF16 | [4096]
|
62 |
+
model.layers.1.self_attn.k_proj.weight -> blk.1.attn_k.weight | BF16 | [1024, 4096]
|
63 |
+
model.layers.1.self_attn.o_proj.weight -> blk.1.attn_output.weight | BF16 | [4096, 4096]
|
64 |
+
model.layers.1.self_attn.q_proj.weight -> blk.1.attn_q.weight | BF16 | [4096, 4096]
|
65 |
+
model.layers.1.self_attn.v_proj.weight -> blk.1.attn_v.weight | BF16 | [1024, 4096]
|
66 |
+
model.layers.2.input_layernorm.weight -> blk.2.attn_norm.weight | BF16 | [4096]
|
67 |
+
model.layers.2.mlp.down_proj.weight -> blk.2.ffn_down.weight | BF16 | [4096, 14336]
|
68 |
+
model.layers.2.mlp.gate_proj.weight -> blk.2.ffn_gate.weight | BF16 | [14336, 4096]
|
69 |
+
model.layers.2.mlp.up_proj.weight -> blk.2.ffn_up.weight | BF16 | [14336, 4096]
|
70 |
+
model.layers.2.post_attention_layernorm.weight -> blk.2.ffn_norm.weight | BF16 | [4096]
|
71 |
+
model.layers.2.self_attn.k_proj.weight -> blk.2.attn_k.weight | BF16 | [1024, 4096]
|
72 |
+
model.layers.2.self_attn.o_proj.weight -> blk.2.attn_output.weight | BF16 | [4096, 4096]
|
73 |
+
model.layers.2.self_attn.q_proj.weight -> blk.2.attn_q.weight | BF16 | [4096, 4096]
|
74 |
+
model.layers.2.self_attn.v_proj.weight -> blk.2.attn_v.weight | BF16 | [1024, 4096]
|
75 |
+
model.layers.3.mlp.gate_proj.weight -> blk.3.ffn_gate.weight | BF16 | [14336, 4096]
|
76 |
+
model.layers.3.mlp.up_proj.weight -> blk.3.ffn_up.weight | BF16 | [14336, 4096]
|
77 |
+
model.layers.3.self_attn.k_proj.weight -> blk.3.attn_k.weight | BF16 | [1024, 4096]
|
78 |
+
model.layers.3.self_attn.o_proj.weight -> blk.3.attn_output.weight | BF16 | [4096, 4096]
|
79 |
+
model.layers.3.self_attn.q_proj.weight -> blk.3.attn_q.weight | BF16 | [4096, 4096]
|
80 |
+
model.layers.3.self_attn.v_proj.weight -> blk.3.attn_v.weight | BF16 | [1024, 4096]
|
81 |
+
model.layers.3.input_layernorm.weight -> blk.3.attn_norm.weight | BF16 | [4096]
|
82 |
+
model.layers.3.mlp.down_proj.weight -> blk.3.ffn_down.weight | BF16 | [4096, 14336]
|
83 |
+
model.layers.3.post_attention_layernorm.weight -> blk.3.ffn_norm.weight | BF16 | [4096]
|
84 |
+
model.layers.4.input_layernorm.weight -> blk.4.attn_norm.weight | BF16 | [4096]
|
85 |
+
model.layers.4.mlp.down_proj.weight -> blk.4.ffn_down.weight | BF16 | [4096, 14336]
|
86 |
+
model.layers.4.mlp.gate_proj.weight -> blk.4.ffn_gate.weight | BF16 | [14336, 4096]
|
87 |
+
model.layers.4.mlp.up_proj.weight -> blk.4.ffn_up.weight | BF16 | [14336, 4096]
|
88 |
+
model.layers.4.post_attention_layernorm.weight -> blk.4.ffn_norm.weight | BF16 | [4096]
|
89 |
+
model.layers.4.self_attn.k_proj.weight -> blk.4.attn_k.weight | BF16 | [1024, 4096]
|
90 |
+
model.layers.4.self_attn.o_proj.weight -> blk.4.attn_output.weight | BF16 | [4096, 4096]
|
91 |
+
model.layers.4.self_attn.q_proj.weight -> blk.4.attn_q.weight | BF16 | [4096, 4096]
|
92 |
+
model.layers.4.self_attn.v_proj.weight -> blk.4.attn_v.weight | BF16 | [1024, 4096]
|
93 |
+
model.layers.5.input_layernorm.weight -> blk.5.attn_norm.weight | BF16 | [4096]
|
94 |
+
model.layers.5.mlp.down_proj.weight -> blk.5.ffn_down.weight | BF16 | [4096, 14336]
|
95 |
+
model.layers.5.mlp.gate_proj.weight -> blk.5.ffn_gate.weight | BF16 | [14336, 4096]
|
96 |
+
model.layers.5.mlp.up_proj.weight -> blk.5.ffn_up.weight | BF16 | [14336, 4096]
|
97 |
+
model.layers.5.post_attention_layernorm.weight -> blk.5.ffn_norm.weight | BF16 | [4096]
|
98 |
+
model.layers.5.self_attn.k_proj.weight -> blk.5.attn_k.weight | BF16 | [1024, 4096]
|
99 |
+
model.layers.5.self_attn.o_proj.weight -> blk.5.attn_output.weight | BF16 | [4096, 4096]
|
100 |
+
model.layers.5.self_attn.q_proj.weight -> blk.5.attn_q.weight | BF16 | [4096, 4096]
|
101 |
+
model.layers.5.self_attn.v_proj.weight -> blk.5.attn_v.weight | BF16 | [1024, 4096]
|
102 |
+
model.layers.6.input_layernorm.weight -> blk.6.attn_norm.weight | BF16 | [4096]
|
103 |
+
model.layers.6.mlp.down_proj.weight -> blk.6.ffn_down.weight | BF16 | [4096, 14336]
|
104 |
+
model.layers.6.mlp.gate_proj.weight -> blk.6.ffn_gate.weight | BF16 | [14336, 4096]
|
105 |
+
model.layers.6.mlp.up_proj.weight -> blk.6.ffn_up.weight | BF16 | [14336, 4096]
|
106 |
+
model.layers.6.post_attention_layernorm.weight -> blk.6.ffn_norm.weight | BF16 | [4096]
|
107 |
+
model.layers.6.self_attn.k_proj.weight -> blk.6.attn_k.weight | BF16 | [1024, 4096]
|
108 |
+
model.layers.6.self_attn.o_proj.weight -> blk.6.attn_output.weight | BF16 | [4096, 4096]
|
109 |
+
model.layers.6.self_attn.q_proj.weight -> blk.6.attn_q.weight | BF16 | [4096, 4096]
|
110 |
+
model.layers.6.self_attn.v_proj.weight -> blk.6.attn_v.weight | BF16 | [1024, 4096]
|
111 |
+
model.layers.7.input_layernorm.weight -> blk.7.attn_norm.weight | BF16 | [4096]
|
112 |
+
model.layers.7.mlp.down_proj.weight -> blk.7.ffn_down.weight | BF16 | [4096, 14336]
|
113 |
+
model.layers.7.mlp.gate_proj.weight -> blk.7.ffn_gate.weight | BF16 | [14336, 4096]
|
114 |
+
model.layers.7.mlp.up_proj.weight -> blk.7.ffn_up.weight | BF16 | [14336, 4096]
|
115 |
+
model.layers.7.post_attention_layernorm.weight -> blk.7.ffn_norm.weight | BF16 | [4096]
|
116 |
+
model.layers.7.self_attn.k_proj.weight -> blk.7.attn_k.weight | BF16 | [1024, 4096]
|
117 |
+
model.layers.7.self_attn.o_proj.weight -> blk.7.attn_output.weight | BF16 | [4096, 4096]
|
118 |
+
model.layers.7.self_attn.q_proj.weight -> blk.7.attn_q.weight | BF16 | [4096, 4096]
|
119 |
+
model.layers.7.self_attn.v_proj.weight -> blk.7.attn_v.weight | BF16 | [1024, 4096]
|
120 |
+
model.layers.8.self_attn.k_proj.weight -> blk.8.attn_k.weight | BF16 | [1024, 4096]
|
121 |
+
model.layers.8.self_attn.o_proj.weight -> blk.8.attn_output.weight | BF16 | [4096, 4096]
|
122 |
+
model.layers.8.self_attn.q_proj.weight -> blk.8.attn_q.weight | BF16 | [4096, 4096]
|
123 |
+
model.layers.8.self_attn.v_proj.weight -> blk.8.attn_v.weight | BF16 | [1024, 4096]
|
124 |
+
model.layers.10.input_layernorm.weight -> blk.10.attn_norm.weight | BF16 | [4096]
|
125 |
+
model.layers.10.mlp.down_proj.weight -> blk.10.ffn_down.weight | BF16 | [4096, 14336]
|
126 |
+
model.layers.10.mlp.gate_proj.weight -> blk.10.ffn_gate.weight | BF16 | [14336, 4096]
|
127 |
+
model.layers.10.mlp.up_proj.weight -> blk.10.ffn_up.weight | BF16 | [14336, 4096]
|
128 |
+
model.layers.10.post_attention_layernorm.weight -> blk.10.ffn_norm.weight | BF16 | [4096]
|
129 |
+
model.layers.10.self_attn.k_proj.weight -> blk.10.attn_k.weight | BF16 | [1024, 4096]
|
130 |
+
model.layers.10.self_attn.o_proj.weight -> blk.10.attn_output.weight | BF16 | [4096, 4096]
|
131 |
+
model.layers.10.self_attn.q_proj.weight -> blk.10.attn_q.weight | BF16 | [4096, 4096]
|
132 |
+
model.layers.10.self_attn.v_proj.weight -> blk.10.attn_v.weight | BF16 | [1024, 4096]
|
133 |
+
model.layers.11.input_layernorm.weight -> blk.11.attn_norm.weight | BF16 | [4096]
|
134 |
+
model.layers.11.mlp.down_proj.weight -> blk.11.ffn_down.weight | BF16 | [4096, 14336]
|
135 |
+
model.layers.11.mlp.gate_proj.weight -> blk.11.ffn_gate.weight | BF16 | [14336, 4096]
|
136 |
+
model.layers.11.mlp.up_proj.weight -> blk.11.ffn_up.weight | BF16 | [14336, 4096]
|
137 |
+
model.layers.11.post_attention_layernorm.weight -> blk.11.ffn_norm.weight | BF16 | [4096]
|
138 |
+
model.layers.11.self_attn.k_proj.weight -> blk.11.attn_k.weight | BF16 | [1024, 4096]
|
139 |
+
model.layers.11.self_attn.o_proj.weight -> blk.11.attn_output.weight | BF16 | [4096, 4096]
|
140 |
+
model.layers.11.self_attn.q_proj.weight -> blk.11.attn_q.weight | BF16 | [4096, 4096]
|
141 |
+
model.layers.11.self_attn.v_proj.weight -> blk.11.attn_v.weight | BF16 | [1024, 4096]
|
142 |
+
model.layers.12.mlp.gate_proj.weight -> blk.12.ffn_gate.weight | BF16 | [14336, 4096]
|
143 |
+
model.layers.12.mlp.up_proj.weight -> blk.12.ffn_up.weight | BF16 | [14336, 4096]
|
144 |
+
model.layers.12.self_attn.k_proj.weight -> blk.12.attn_k.weight | BF16 | [1024, 4096]
|
145 |
+
model.layers.12.self_attn.o_proj.weight -> blk.12.attn_output.weight | BF16 | [4096, 4096]
|
146 |
+
model.layers.12.self_attn.q_proj.weight -> blk.12.attn_q.weight | BF16 | [4096, 4096]
|
147 |
+
model.layers.12.self_attn.v_proj.weight -> blk.12.attn_v.weight | BF16 | [1024, 4096]
|
148 |
+
model.layers.8.input_layernorm.weight -> blk.8.attn_norm.weight | BF16 | [4096]
|
149 |
+
model.layers.8.mlp.down_proj.weight -> blk.8.ffn_down.weight | BF16 | [4096, 14336]
|
150 |
+
model.layers.8.mlp.gate_proj.weight -> blk.8.ffn_gate.weight | BF16 | [14336, 4096]
|
151 |
+
model.layers.8.mlp.up_proj.weight -> blk.8.ffn_up.weight | BF16 | [14336, 4096]
|
152 |
+
model.layers.8.post_attention_layernorm.weight -> blk.8.ffn_norm.weight | BF16 | [4096]
|
153 |
+
model.layers.9.input_layernorm.weight -> blk.9.attn_norm.weight | BF16 | [4096]
|
154 |
+
model.layers.9.mlp.down_proj.weight -> blk.9.ffn_down.weight | BF16 | [4096, 14336]
|
155 |
+
model.layers.9.mlp.gate_proj.weight -> blk.9.ffn_gate.weight | BF16 | [14336, 4096]
|
156 |
+
model.layers.9.mlp.up_proj.weight -> blk.9.ffn_up.weight | BF16 | [14336, 4096]
|
157 |
+
model.layers.9.post_attention_layernorm.weight -> blk.9.ffn_norm.weight | BF16 | [4096]
|
158 |
+
model.layers.9.self_attn.k_proj.weight -> blk.9.attn_k.weight | BF16 | [1024, 4096]
|
159 |
+
model.layers.9.self_attn.o_proj.weight -> blk.9.attn_output.weight | BF16 | [4096, 4096]
|
160 |
+
model.layers.9.self_attn.q_proj.weight -> blk.9.attn_q.weight | BF16 | [4096, 4096]
|
161 |
+
model.layers.9.self_attn.v_proj.weight -> blk.9.attn_v.weight | BF16 | [1024, 4096]
|
162 |
+
model.layers.12.input_layernorm.weight -> blk.12.attn_norm.weight | BF16 | [4096]
|
163 |
+
model.layers.12.mlp.down_proj.weight -> blk.12.ffn_down.weight | BF16 | [4096, 14336]
|
164 |
+
model.layers.12.post_attention_layernorm.weight -> blk.12.ffn_norm.weight | BF16 | [4096]
|
165 |
+
model.layers.13.input_layernorm.weight -> blk.13.attn_norm.weight | BF16 | [4096]
|
166 |
+
model.layers.13.mlp.down_proj.weight -> blk.13.ffn_down.weight | BF16 | [4096, 14336]
|
167 |
+
model.layers.13.mlp.gate_proj.weight -> blk.13.ffn_gate.weight | BF16 | [14336, 4096]
|
168 |
+
model.layers.13.mlp.up_proj.weight -> blk.13.ffn_up.weight | BF16 | [14336, 4096]
|
169 |
+
model.layers.13.post_attention_layernorm.weight -> blk.13.ffn_norm.weight | BF16 | [4096]
|
170 |
+
model.layers.13.self_attn.k_proj.weight -> blk.13.attn_k.weight | BF16 | [1024, 4096]
|
171 |
+
model.layers.13.self_attn.o_proj.weight -> blk.13.attn_output.weight | BF16 | [4096, 4096]
|
172 |
+
model.layers.13.self_attn.q_proj.weight -> blk.13.attn_q.weight | BF16 | [4096, 4096]
|
173 |
+
model.layers.13.self_attn.v_proj.weight -> blk.13.attn_v.weight | BF16 | [1024, 4096]
|
174 |
+
model.layers.14.input_layernorm.weight -> blk.14.attn_norm.weight | BF16 | [4096]
|
175 |
+
model.layers.14.mlp.down_proj.weight -> blk.14.ffn_down.weight | BF16 | [4096, 14336]
|
176 |
+
model.layers.14.mlp.gate_proj.weight -> blk.14.ffn_gate.weight | BF16 | [14336, 4096]
|
177 |
+
model.layers.14.mlp.up_proj.weight -> blk.14.ffn_up.weight | BF16 | [14336, 4096]
|
178 |
+
model.layers.14.post_attention_layernorm.weight -> blk.14.ffn_norm.weight | BF16 | [4096]
|
179 |
+
model.layers.14.self_attn.k_proj.weight -> blk.14.attn_k.weight | BF16 | [1024, 4096]
|
180 |
+
model.layers.14.self_attn.o_proj.weight -> blk.14.attn_output.weight | BF16 | [4096, 4096]
|
181 |
+
model.layers.14.self_attn.q_proj.weight -> blk.14.attn_q.weight | BF16 | [4096, 4096]
|
182 |
+
model.layers.14.self_attn.v_proj.weight -> blk.14.attn_v.weight | BF16 | [1024, 4096]
|
183 |
+
model.layers.15.input_layernorm.weight -> blk.15.attn_norm.weight | BF16 | [4096]
|
184 |
+
model.layers.15.mlp.down_proj.weight -> blk.15.ffn_down.weight | BF16 | [4096, 14336]
|
185 |
+
model.layers.15.mlp.gate_proj.weight -> blk.15.ffn_gate.weight | BF16 | [14336, 4096]
|
186 |
+
model.layers.15.mlp.up_proj.weight -> blk.15.ffn_up.weight | BF16 | [14336, 4096]
|
187 |
+
model.layers.15.post_attention_layernorm.weight -> blk.15.ffn_norm.weight | BF16 | [4096]
|
188 |
+
model.layers.15.self_attn.k_proj.weight -> blk.15.attn_k.weight | BF16 | [1024, 4096]
|
189 |
+
model.layers.15.self_attn.o_proj.weight -> blk.15.attn_output.weight | BF16 | [4096, 4096]
|
190 |
+
model.layers.15.self_attn.q_proj.weight -> blk.15.attn_q.weight | BF16 | [4096, 4096]
|
191 |
+
model.layers.15.self_attn.v_proj.weight -> blk.15.attn_v.weight | BF16 | [1024, 4096]
|
192 |
+
model.layers.16.input_layernorm.weight -> blk.16.attn_norm.weight | BF16 | [4096]
|
193 |
+
model.layers.16.mlp.down_proj.weight -> blk.16.ffn_down.weight | BF16 | [4096, 14336]
|
194 |
+
model.layers.16.mlp.gate_proj.weight -> blk.16.ffn_gate.weight | BF16 | [14336, 4096]
|
195 |
+
model.layers.16.mlp.up_proj.weight -> blk.16.ffn_up.weight | BF16 | [14336, 4096]
|
196 |
+
model.layers.16.post_attention_layernorm.weight -> blk.16.ffn_norm.weight | BF16 | [4096]
|
197 |
+
model.layers.16.self_attn.k_proj.weight -> blk.16.attn_k.weight | BF16 | [1024, 4096]
|
198 |
+
model.layers.16.self_attn.o_proj.weight -> blk.16.attn_output.weight | BF16 | [4096, 4096]
|
199 |
+
model.layers.16.self_attn.q_proj.weight -> blk.16.attn_q.weight | BF16 | [4096, 4096]
|
200 |
+
model.layers.16.self_attn.v_proj.weight -> blk.16.attn_v.weight | BF16 | [1024, 4096]
|
201 |
+
model.layers.17.self_attn.k_proj.weight -> blk.17.attn_k.weight | BF16 | [1024, 4096]
|
202 |
+
model.layers.17.self_attn.o_proj.weight -> blk.17.attn_output.weight | BF16 | [4096, 4096]
|
203 |
+
model.layers.17.self_attn.q_proj.weight -> blk.17.attn_q.weight | BF16 | [4096, 4096]
|
204 |
+
model.layers.17.self_attn.v_proj.weight -> blk.17.attn_v.weight | BF16 | [1024, 4096]
|
205 |
+
model.layers.17.input_layernorm.weight -> blk.17.attn_norm.weight | BF16 | [4096]
|
206 |
+
model.layers.17.mlp.down_proj.weight -> blk.17.ffn_down.weight | BF16 | [4096, 14336]
|
207 |
+
model.layers.17.mlp.gate_proj.weight -> blk.17.ffn_gate.weight | BF16 | [14336, 4096]
|
208 |
+
model.layers.17.mlp.up_proj.weight -> blk.17.ffn_up.weight | BF16 | [14336, 4096]
|
209 |
+
model.layers.17.post_attention_layernorm.weight -> blk.17.ffn_norm.weight | BF16 | [4096]
|
210 |
+
model.layers.18.input_layernorm.weight -> blk.18.attn_norm.weight | BF16 | [4096]
|
211 |
+
model.layers.18.mlp.down_proj.weight -> blk.18.ffn_down.weight | BF16 | [4096, 14336]
|
212 |
+
model.layers.18.mlp.gate_proj.weight -> blk.18.ffn_gate.weight | BF16 | [14336, 4096]
|
213 |
+
model.layers.18.mlp.up_proj.weight -> blk.18.ffn_up.weight | BF16 | [14336, 4096]
|
214 |
+
model.layers.18.post_attention_layernorm.weight -> blk.18.ffn_norm.weight | BF16 | [4096]
|
215 |
+
model.layers.18.self_attn.k_proj.weight -> blk.18.attn_k.weight | BF16 | [1024, 4096]
|
216 |
+
model.layers.18.self_attn.o_proj.weight -> blk.18.attn_output.weight | BF16 | [4096, 4096]
|
217 |
+
model.layers.18.self_attn.q_proj.weight -> blk.18.attn_q.weight | BF16 | [4096, 4096]
|
218 |
+
model.layers.18.self_attn.v_proj.weight -> blk.18.attn_v.weight | BF16 | [1024, 4096]
|
219 |
+
model.layers.19.input_layernorm.weight -> blk.19.attn_norm.weight | BF16 | [4096]
|
220 |
+
model.layers.19.mlp.down_proj.weight -> blk.19.ffn_down.weight | BF16 | [4096, 14336]
|
221 |
+
model.layers.19.mlp.gate_proj.weight -> blk.19.ffn_gate.weight | BF16 | [14336, 4096]
|
222 |
+
model.layers.19.mlp.up_proj.weight -> blk.19.ffn_up.weight | BF16 | [14336, 4096]
|
223 |
+
model.layers.19.post_attention_layernorm.weight -> blk.19.ffn_norm.weight | BF16 | [4096]
|
224 |
+
model.layers.19.self_attn.k_proj.weight -> blk.19.attn_k.weight | BF16 | [1024, 4096]
|
225 |
+
model.layers.19.self_attn.o_proj.weight -> blk.19.attn_output.weight | BF16 | [4096, 4096]
|
226 |
+
model.layers.19.self_attn.q_proj.weight -> blk.19.attn_q.weight | BF16 | [4096, 4096]
|
227 |
+
model.layers.19.self_attn.v_proj.weight -> blk.19.attn_v.weight | BF16 | [1024, 4096]
|
228 |
+
model.layers.20.input_layernorm.weight -> blk.20.attn_norm.weight | BF16 | [4096]
|
229 |
+
model.layers.20.mlp.down_proj.weight -> blk.20.ffn_down.weight | BF16 | [4096, 14336]
|
230 |
+
model.layers.20.mlp.gate_proj.weight -> blk.20.ffn_gate.weight | BF16 | [14336, 4096]
|
231 |
+
model.layers.20.mlp.up_proj.weight -> blk.20.ffn_up.weight | BF16 | [14336, 4096]
|
232 |
+
model.layers.20.post_attention_layernorm.weight -> blk.20.ffn_norm.weight | BF16 | [4096]
|
233 |
+
model.layers.20.self_attn.k_proj.weight -> blk.20.attn_k.weight | BF16 | [1024, 4096]
|
234 |
+
model.layers.20.self_attn.o_proj.weight -> blk.20.attn_output.weight | BF16 | [4096, 4096]
|
235 |
+
model.layers.20.self_attn.q_proj.weight -> blk.20.attn_q.weight | BF16 | [4096, 4096]
|
236 |
+
model.layers.20.self_attn.v_proj.weight -> blk.20.attn_v.weight | BF16 | [1024, 4096]
|
237 |
+
model.layers.21.mlp.gate_proj.weight -> blk.21.ffn_gate.weight | BF16 | [14336, 4096]
|
238 |
+
model.layers.21.mlp.up_proj.weight -> blk.21.ffn_up.weight | BF16 | [14336, 4096]
|
239 |
+
model.layers.21.self_attn.k_proj.weight -> blk.21.attn_k.weight | BF16 | [1024, 4096]
|
240 |
+
model.layers.21.self_attn.o_proj.weight -> blk.21.attn_output.weight | BF16 | [4096, 4096]
|
241 |
+
model.layers.21.self_attn.q_proj.weight -> blk.21.attn_q.weight | BF16 | [4096, 4096]
|
242 |
+
model.layers.21.self_attn.v_proj.weight -> blk.21.attn_v.weight | BF16 | [1024, 4096]
|
243 |
+
model.layers.21.input_layernorm.weight -> blk.21.attn_norm.weight | BF16 | [4096]
|
244 |
+
model.layers.21.mlp.down_proj.weight -> blk.21.ffn_down.weight | BF16 | [4096, 14336]
|
245 |
+
model.layers.21.post_attention_layernorm.weight -> blk.21.ffn_norm.weight | BF16 | [4096]
|
246 |
+
model.layers.22.input_layernorm.weight -> blk.22.attn_norm.weight | BF16 | [4096]
|
247 |
+
model.layers.22.mlp.down_proj.weight -> blk.22.ffn_down.weight | BF16 | [4096, 14336]
|
248 |
+
model.layers.22.mlp.gate_proj.weight -> blk.22.ffn_gate.weight | BF16 | [14336, 4096]
|
249 |
+
model.layers.22.mlp.up_proj.weight -> blk.22.ffn_up.weight | BF16 | [14336, 4096]
|
250 |
+
model.layers.22.post_attention_layernorm.weight -> blk.22.ffn_norm.weight | BF16 | [4096]
|
251 |
+
model.layers.22.self_attn.k_proj.weight -> blk.22.attn_k.weight | BF16 | [1024, 4096]
|
252 |
+
model.layers.22.self_attn.o_proj.weight -> blk.22.attn_output.weight | BF16 | [4096, 4096]
|
253 |
+
model.layers.22.self_attn.q_proj.weight -> blk.22.attn_q.weight | BF16 | [4096, 4096]
|
254 |
+
model.layers.22.self_attn.v_proj.weight -> blk.22.attn_v.weight | BF16 | [1024, 4096]
|
255 |
+
model.layers.23.input_layernorm.weight -> blk.23.attn_norm.weight | BF16 | [4096]
|
256 |
+
model.layers.23.mlp.down_proj.weight -> blk.23.ffn_down.weight | BF16 | [4096, 14336]
|
257 |
+
model.layers.23.mlp.gate_proj.weight -> blk.23.ffn_gate.weight | BF16 | [14336, 4096]
|
258 |
+
model.layers.23.mlp.up_proj.weight -> blk.23.ffn_up.weight | BF16 | [14336, 4096]
|
259 |
+
model.layers.23.post_attention_layernorm.weight -> blk.23.ffn_norm.weight | BF16 | [4096]
|
260 |
+
model.layers.23.self_attn.k_proj.weight -> blk.23.attn_k.weight | BF16 | [1024, 4096]
|
261 |
+
model.layers.23.self_attn.o_proj.weight -> blk.23.attn_output.weight | BF16 | [4096, 4096]
|
262 |
+
model.layers.23.self_attn.q_proj.weight -> blk.23.attn_q.weight | BF16 | [4096, 4096]
|
263 |
+
model.layers.23.self_attn.v_proj.weight -> blk.23.attn_v.weight | BF16 | [1024, 4096]
|
264 |
+
model.layers.24.input_layernorm.weight -> blk.24.attn_norm.weight | BF16 | [4096]
|
265 |
+
model.layers.24.mlp.down_proj.weight -> blk.24.ffn_down.weight | BF16 | [4096, 14336]
|
266 |
+
model.layers.24.mlp.gate_proj.weight -> blk.24.ffn_gate.weight | BF16 | [14336, 4096]
|
267 |
+
model.layers.24.mlp.up_proj.weight -> blk.24.ffn_up.weight | BF16 | [14336, 4096]
|
268 |
+
model.layers.24.post_attention_layernorm.weight -> blk.24.ffn_norm.weight | BF16 | [4096]
|
269 |
+
model.layers.24.self_attn.k_proj.weight -> blk.24.attn_k.weight | BF16 | [1024, 4096]
|
270 |
+
model.layers.24.self_attn.o_proj.weight -> blk.24.attn_output.weight | BF16 | [4096, 4096]
|
271 |
+
model.layers.24.self_attn.q_proj.weight -> blk.24.attn_q.weight | BF16 | [4096, 4096]
|
272 |
+
model.layers.24.self_attn.v_proj.weight -> blk.24.attn_v.weight | BF16 | [1024, 4096]
|
273 |
+
model.layers.25.input_layernorm.weight -> blk.25.attn_norm.weight | BF16 | [4096]
|
274 |
+
model.layers.25.mlp.down_proj.weight -> blk.25.ffn_down.weight | BF16 | [4096, 14336]
|
275 |
+
model.layers.25.mlp.gate_proj.weight -> blk.25.ffn_gate.weight | BF16 | [14336, 4096]
|
276 |
+
model.layers.25.mlp.up_proj.weight -> blk.25.ffn_up.weight | BF16 | [14336, 4096]
|
277 |
+
model.layers.25.post_attention_layernorm.weight -> blk.25.ffn_norm.weight | BF16 | [4096]
|
278 |
+
model.layers.25.self_attn.k_proj.weight -> blk.25.attn_k.weight | BF16 | [1024, 4096]
|
279 |
+
model.layers.25.self_attn.o_proj.weight -> blk.25.attn_output.weight | BF16 | [4096, 4096]
|
280 |
+
model.layers.25.self_attn.q_proj.weight -> blk.25.attn_q.weight | BF16 | [4096, 4096]
|
281 |
+
model.layers.25.self_attn.v_proj.weight -> blk.25.attn_v.weight | BF16 | [1024, 4096]
|
282 |
+
model.layers.26.self_attn.k_proj.weight -> blk.26.attn_k.weight | BF16 | [1024, 4096]
|
283 |
+
model.layers.26.self_attn.o_proj.weight -> blk.26.attn_output.weight | BF16 | [4096, 4096]
|
284 |
+
model.layers.26.self_attn.q_proj.weight -> blk.26.attn_q.weight | BF16 | [4096, 4096]
|
285 |
+
model.layers.26.self_attn.v_proj.weight -> blk.26.attn_v.weight | BF16 | [1024, 4096]
|
286 |
+
model.layers.26.input_layernorm.weight -> blk.26.attn_norm.weight | BF16 | [4096]
|
287 |
+
model.layers.26.mlp.down_proj.weight -> blk.26.ffn_down.weight | BF16 | [4096, 14336]
|
288 |
+
model.layers.26.mlp.gate_proj.weight -> blk.26.ffn_gate.weight | BF16 | [14336, 4096]
|
289 |
+
model.layers.26.mlp.up_proj.weight -> blk.26.ffn_up.weight | BF16 | [14336, 4096]
|
290 |
+
model.layers.26.post_attention_layernorm.weight -> blk.26.ffn_norm.weight | BF16 | [4096]
|
291 |
+
model.layers.27.input_layernorm.weight -> blk.27.attn_norm.weight | BF16 | [4096]
|
292 |
+
model.layers.27.mlp.down_proj.weight -> blk.27.ffn_down.weight | BF16 | [4096, 14336]
|
293 |
+
model.layers.27.mlp.gate_proj.weight -> blk.27.ffn_gate.weight | BF16 | [14336, 4096]
|
294 |
+
model.layers.27.mlp.up_proj.weight -> blk.27.ffn_up.weight | BF16 | [14336, 4096]
|
295 |
+
model.layers.27.post_attention_layernorm.weight -> blk.27.ffn_norm.weight | BF16 | [4096]
|
296 |
+
model.layers.27.self_attn.k_proj.weight -> blk.27.attn_k.weight | BF16 | [1024, 4096]
|
297 |
+
model.layers.27.self_attn.o_proj.weight -> blk.27.attn_output.weight | BF16 | [4096, 4096]
|
298 |
+
model.layers.27.self_attn.q_proj.weight -> blk.27.attn_q.weight | BF16 | [4096, 4096]
|
299 |
+
model.layers.27.self_attn.v_proj.weight -> blk.27.attn_v.weight | BF16 | [1024, 4096]
|
300 |
+
model.layers.28.input_layernorm.weight -> blk.28.attn_norm.weight | BF16 | [4096]
|
301 |
+
model.layers.28.mlp.down_proj.weight -> blk.28.ffn_down.weight | BF16 | [4096, 14336]
|
302 |
+
model.layers.28.mlp.gate_proj.weight -> blk.28.ffn_gate.weight | BF16 | [14336, 4096]
|
303 |
+
model.layers.28.mlp.up_proj.weight -> blk.28.ffn_up.weight | BF16 | [14336, 4096]
|
304 |
+
model.layers.28.post_attention_layernorm.weight -> blk.28.ffn_norm.weight | BF16 | [4096]
|
305 |
+
model.layers.28.self_attn.k_proj.weight -> blk.28.attn_k.weight | BF16 | [1024, 4096]
|
306 |
+
model.layers.28.self_attn.o_proj.weight -> blk.28.attn_output.weight | BF16 | [4096, 4096]
|
307 |
+
model.layers.28.self_attn.q_proj.weight -> blk.28.attn_q.weight | BF16 | [4096, 4096]
|
308 |
+
model.layers.28.self_attn.v_proj.weight -> blk.28.attn_v.weight | BF16 | [1024, 4096]
|
309 |
+
model.layers.29.input_layernorm.weight -> blk.29.attn_norm.weight | BF16 | [4096]
|
310 |
+
model.layers.29.mlp.down_proj.weight -> blk.29.ffn_down.weight | BF16 | [4096, 14336]
|
311 |
+
model.layers.29.mlp.gate_proj.weight -> blk.29.ffn_gate.weight | BF16 | [14336, 4096]
|
312 |
+
model.layers.29.mlp.up_proj.weight -> blk.29.ffn_up.weight | BF16 | [14336, 4096]
|
313 |
+
model.layers.29.post_attention_layernorm.weight -> blk.29.ffn_norm.weight | BF16 | [4096]
|
314 |
+
model.layers.29.self_attn.k_proj.weight -> blk.29.attn_k.weight | BF16 | [1024, 4096]
|
315 |
+
model.layers.29.self_attn.o_proj.weight -> blk.29.attn_output.weight | BF16 | [4096, 4096]
|
316 |
+
model.layers.29.self_attn.q_proj.weight -> blk.29.attn_q.weight | BF16 | [4096, 4096]
|
317 |
+
model.layers.29.self_attn.v_proj.weight -> blk.29.attn_v.weight | BF16 | [1024, 4096]
|
318 |
+
model.layers.30.mlp.gate_proj.weight -> blk.30.ffn_gate.weight | BF16 | [14336, 4096]
|
319 |
+
model.layers.30.mlp.up_proj.weight -> blk.30.ffn_up.weight | BF16 | [14336, 4096]
|
320 |
+
model.layers.30.self_attn.k_proj.weight -> blk.30.attn_k.weight | BF16 | [1024, 4096]
|
321 |
+
model.layers.30.self_attn.o_proj.weight -> blk.30.attn_output.weight | BF16 | [4096, 4096]
|
322 |
+
model.layers.30.self_attn.q_proj.weight -> blk.30.attn_q.weight | BF16 | [4096, 4096]
|
323 |
+
model.layers.30.self_attn.v_proj.weight -> blk.30.attn_v.weight | BF16 | [1024, 4096]
|
324 |
+
lm_head.weight -> output.weight | BF16 | [32000, 4096]
|
325 |
+
model.layers.30.input_layernorm.weight -> blk.30.attn_norm.weight | BF16 | [4096]
|
326 |
+
model.layers.30.mlp.down_proj.weight -> blk.30.ffn_down.weight | BF16 | [4096, 14336]
|
327 |
+
model.layers.30.post_attention_layernorm.weight -> blk.30.ffn_norm.weight | BF16 | [4096]
|
328 |
+
model.layers.31.input_layernorm.weight -> blk.31.attn_norm.weight | BF16 | [4096]
|
329 |
+
model.layers.31.mlp.down_proj.weight -> blk.31.ffn_down.weight | BF16 | [4096, 14336]
|
330 |
+
model.layers.31.mlp.gate_proj.weight -> blk.31.ffn_gate.weight | BF16 | [14336, 4096]
|
331 |
+
model.layers.31.mlp.up_proj.weight -> blk.31.ffn_up.weight | BF16 | [14336, 4096]
|
332 |
+
model.layers.31.post_attention_layernorm.weight -> blk.31.ffn_norm.weight | BF16 | [4096]
|
333 |
+
model.layers.31.self_attn.k_proj.weight -> blk.31.attn_k.weight | BF16 | [1024, 4096]
|
334 |
+
model.layers.31.self_attn.o_proj.weight -> blk.31.attn_output.weight | BF16 | [4096, 4096]
|
335 |
+
model.layers.31.self_attn.q_proj.weight -> blk.31.attn_q.weight | BF16 | [4096, 4096]
|
336 |
+
model.layers.31.self_attn.v_proj.weight -> blk.31.attn_v.weight | BF16 | [1024, 4096]
|
337 |
+
model.norm.weight -> output_norm.weight | BF16 | [4096]
|
338 |
+
Writing zephyr_f32.gguf, format 7
|
339 |
+
gguf: This GGUF file is for Little Endian only
|
340 |
+
gguf: Adding 58980 merge(s).
|
341 |
+
gguf: Setting special token type bos to 1
|
342 |
+
gguf: Setting special token type eos to 2
|
343 |
+
gguf: Setting special token type unk to 0
|
344 |
+
gguf: Setting special token type pad to 2
|
345 |
+
gguf: Setting chat_template to {% for message in messages %}
|
346 |
+
{% if message['role'] == 'user' %}
|
347 |
+
{{ '<|user|>
|
348 |
+
' + message['content'] + eos_token }}
|
349 |
+
{% elif message['role'] == 'system' %}
|
350 |
+
{{ '<|system|>
|
351 |
+
' + message['content'] + eos_token }}
|
352 |
+
{% elif message['role'] == 'assistant' %}
|
353 |
+
{{ '<|assistant|>
|
354 |
+
' + message['content'] + eos_token }}
|
355 |
+
{% endif %}
|
356 |
+
{% if loop.last and add_generation_prompt %}
|
357 |
+
{{ '<|assistant|>' }}
|
358 |
+
{% endif %}
|
359 |
+
{% endfor %}
|
360 |
+
[ 1/291] Writing tensor token_embd.weight | size 32000 x 4096 | type Q8_0 | T+ 19
|
361 |
+
[ 2/291] Writing tensor blk.0.attn_norm.weight | size 4096 | type F32 | T+ 19
|
362 |
+
[ 3/291] Writing tensor blk.0.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 19
|
363 |
+
[ 4/291] Writing tensor blk.0.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 19
|
364 |
+
[ 5/291] Writing tensor blk.0.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 19
|
365 |
+
[ 6/291] Writing tensor blk.0.ffn_norm.weight | size 4096 | type F32 | T+ 19
|
366 |
+
[ 7/291] Writing tensor blk.0.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 19
|
367 |
+
[ 8/291] Writing tensor blk.0.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 19
|
368 |
+
[ 9/291] Writing tensor blk.0.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 21
|
369 |
+
[ 10/291] Writing tensor blk.0.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 22
|
370 |
+
[ 11/291] Writing tensor blk.1.attn_norm.weight | size 4096 | type F32 | T+ 22
|
371 |
+
[ 12/291] Writing tensor blk.1.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 23
|
372 |
+
[ 13/291] Writing tensor blk.1.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 27
|
373 |
+
[ 14/291] Writing tensor blk.1.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 27
|
374 |
+
[ 15/291] Writing tensor blk.1.ffn_norm.weight | size 4096 | type F32 | T+ 27
|
375 |
+
[ 16/291] Writing tensor blk.1.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 27
|
376 |
+
[ 17/291] Writing tensor blk.1.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 27
|
377 |
+
[ 18/291] Writing tensor blk.1.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 27
|
378 |
+
[ 19/291] Writing tensor blk.1.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 27
|
379 |
+
[ 20/291] Writing tensor blk.2.attn_norm.weight | size 4096 | type F32 | T+ 27
|
380 |
+
[ 21/291] Writing tensor blk.2.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 34
|
381 |
+
[ 22/291] Writing tensor blk.2.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 35
|
382 |
+
[ 23/291] Writing tensor blk.2.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 35
|
383 |
+
[ 24/291] Writing tensor blk.2.ffn_norm.weight | size 4096 | type F32 | T+ 35
|
384 |
+
[ 25/291] Writing tensor blk.2.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 35
|
385 |
+
[ 26/291] Writing tensor blk.2.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 35
|
386 |
+
[ 27/291] Writing tensor blk.2.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 35
|
387 |
+
[ 28/291] Writing tensor blk.2.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 35
|
388 |
+
[ 29/291] Writing tensor blk.3.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 38
|
389 |
+
[ 30/291] Writing tensor blk.3.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 38
|
390 |
+
[ 31/291] Writing tensor blk.3.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 42
|
391 |
+
[ 32/291] Writing tensor blk.3.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 42
|
392 |
+
[ 33/291] Writing tensor blk.3.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 42
|
393 |
+
[ 34/291] Writing tensor blk.3.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 42
|
394 |
+
[ 35/291] Writing tensor blk.3.attn_norm.weight | size 4096 | type F32 | T+ 42
|
395 |
+
[ 36/291] Writing tensor blk.3.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 42
|
396 |
+
[ 37/291] Writing tensor blk.3.ffn_norm.weight | size 4096 | type F32 | T+ 42
|
397 |
+
[ 38/291] Writing tensor blk.4.attn_norm.weight | size 4096 | type F32 | T+ 42
|
398 |
+
[ 39/291] Writing tensor blk.4.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 45
|
399 |
+
[ 40/291] Writing tensor blk.4.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 49
|
400 |
+
[ 41/291] Writing tensor blk.4.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 49
|
401 |
+
[ 42/291] Writing tensor blk.4.ffn_norm.weight | size 4096 | type F32 | T+ 49
|
402 |
+
[ 43/291] Writing tensor blk.4.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 49
|
403 |
+
[ 44/291] Writing tensor blk.4.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 49
|
404 |
+
[ 45/291] Writing tensor blk.4.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 49
|
405 |
+
[ 46/291] Writing tensor blk.4.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 49
|
406 |
+
[ 47/291] Writing tensor blk.5.attn_norm.weight | size 4096 | type F32 | T+ 49
|
407 |
+
[ 48/291] Writing tensor blk.5.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 53
|
408 |
+
[ 49/291] Writing tensor blk.5.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 57
|
409 |
+
[ 50/291] Writing tensor blk.5.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 57
|
410 |
+
[ 51/291] Writing tensor blk.5.ffn_norm.weight | size 4096 | type F32 | T+ 57
|
411 |
+
[ 52/291] Writing tensor blk.5.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 57
|
412 |
+
[ 53/291] Writing tensor blk.5.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 57
|
413 |
+
[ 54/291] Writing tensor blk.5.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 57
|
414 |
+
[ 55/291] Writing tensor blk.5.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 57
|
415 |
+
[ 56/291] Writing tensor blk.6.attn_norm.weight | size 4096 | type F32 | T+ 57
|
416 |
+
[ 57/291] Writing tensor blk.6.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 61
|
417 |
+
[ 58/291] Writing tensor blk.6.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 64
|
418 |
+
[ 59/291] Writing tensor blk.6.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 65
|
419 |
+
[ 60/291] Writing tensor blk.6.ffn_norm.weight | size 4096 | type F32 | T+ 65
|
420 |
+
[ 61/291] Writing tensor blk.6.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 65
|
421 |
+
[ 62/291] Writing tensor blk.6.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 65
|
422 |
+
[ 63/291] Writing tensor blk.6.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 65
|
423 |
+
[ 64/291] Writing tensor blk.6.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 65
|
424 |
+
[ 65/291] Writing tensor blk.7.attn_norm.weight | size 4096 | type F32 | T+ 65
|
425 |
+
[ 66/291] Writing tensor blk.7.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 68
|
426 |
+
[ 67/291] Writing tensor blk.7.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 70
|
427 |
+
[ 68/291] Writing tensor blk.7.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 70
|
428 |
+
[ 69/291] Writing tensor blk.7.ffn_norm.weight | size 4096 | type F32 | T+ 70
|
429 |
+
[ 70/291] Writing tensor blk.7.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 70
|
430 |
+
[ 71/291] Writing tensor blk.7.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
431 |
+
[ 72/291] Writing tensor blk.7.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
432 |
+
[ 73/291] Writing tensor blk.7.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
433 |
+
[ 74/291] Writing tensor blk.8.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
434 |
+
[ 75/291] Writing tensor blk.8.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
435 |
+
[ 76/291] Writing tensor blk.8.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
436 |
+
[ 77/291] Writing tensor blk.8.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
437 |
+
[ 78/291] Writing tensor blk.10.attn_norm.weight | size 4096 | type F32 | T+ 74
|
438 |
+
[ 79/291] Writing tensor blk.10.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 78
|
439 |
+
[ 80/291] Writing tensor blk.10.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 82
|
440 |
+
[ 81/291] Writing tensor blk.10.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 82
|
441 |
+
[ 82/291] Writing tensor blk.10.ffn_norm.weight | size 4096 | type F32 | T+ 82
|
442 |
+
[ 83/291] Writing tensor blk.10.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 82
|
443 |
+
[ 84/291] Writing tensor blk.10.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 82
|
444 |
+
[ 85/291] Writing tensor blk.10.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 82
|
445 |
+
[ 86/291] Writing tensor blk.10.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 82
|
446 |
+
[ 87/291] Writing tensor blk.11.attn_norm.weight | size 4096 | type F32 | T+ 82
|
447 |
+
[ 88/291] Writing tensor blk.11.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 90
|
448 |
+
[ 89/291] Writing tensor blk.11.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 90
|
449 |
+
[ 90/291] Writing tensor blk.11.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 90
|
450 |
+
[ 91/291] Writing tensor blk.11.ffn_norm.weight | size 4096 | type F32 | T+ 90
|
451 |
+
[ 92/291] Writing tensor blk.11.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 90
|
452 |
+
[ 93/291] Writing tensor blk.11.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 90
|
453 |
+
[ 94/291] Writing tensor blk.11.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 91
|
454 |
+
[ 95/291] Writing tensor blk.11.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 91
|
455 |
+
[ 96/291] Writing tensor blk.12.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 99
|
456 |
+
[ 97/291] Writing tensor blk.12.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 99
|
457 |
+
[ 98/291] Writing tensor blk.12.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 99
|
458 |
+
[ 99/291] Writing tensor blk.12.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 99
|
459 |
+
[100/291] Writing tensor blk.12.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 99
|
460 |
+
[101/291] Writing tensor blk.12.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 100
|
461 |
+
[102/291] Writing tensor blk.8.attn_norm.weight | size 4096 | type F32 | T+ 100
|
462 |
+
[103/291] Writing tensor blk.8.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 100
|
463 |
+
[104/291] Writing tensor blk.8.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 104
|
464 |
+
[105/291] Writing tensor blk.8.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 104
|
465 |
+
[106/291] Writing tensor blk.8.ffn_norm.weight | size 4096 | type F32 | T+ 104
|
466 |
+
[107/291] Writing tensor blk.9.attn_norm.weight | size 4096 | type F32 | T+ 104
|
467 |
+
[108/291] Writing tensor blk.9.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 105
|
468 |
+
[109/291] Writing tensor blk.9.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 108
|
469 |
+
[110/291] Writing tensor blk.9.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 108
|
470 |
+
[111/291] Writing tensor blk.9.ffn_norm.weight | size 4096 | type F32 | T+ 108
|
471 |
+
[112/291] Writing tensor blk.9.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 112
|
472 |
+
[113/291] Writing tensor blk.9.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 112
|
473 |
+
[114/291] Writing tensor blk.9.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 112
|
474 |
+
[115/291] Writing tensor blk.9.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 112
|
475 |
+
[116/291] Writing tensor blk.12.attn_norm.weight | size 4096 | type F32 | T+ 112
|
476 |
+
[117/291] Writing tensor blk.12.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 112
|
477 |
+
[118/291] Writing tensor blk.12.ffn_norm.weight | size 4096 | type F32 | T+ 112
|
478 |
+
[119/291] Writing tensor blk.13.attn_norm.weight | size 4096 | type F32 | T+ 112
|
479 |
+
[120/291] Writing tensor blk.13.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 117
|
480 |
+
[121/291] Writing tensor blk.13.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 120
|
481 |
+
[122/291] Writing tensor blk.13.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 120
|
482 |
+
[123/291] Writing tensor blk.13.ffn_norm.weight | size 4096 | type F32 | T+ 120
|
483 |
+
[124/291] Writing tensor blk.13.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 120
|
484 |
+
[125/291] Writing tensor blk.13.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 120
|
485 |
+
[126/291] Writing tensor blk.13.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 120
|
486 |
+
[127/291] Writing tensor blk.13.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 120
|
487 |
+
[128/291] Writing tensor blk.14.attn_norm.weight | size 4096 | type F32 | T+ 120
|
488 |
+
[129/291] Writing tensor blk.14.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 124
|
489 |
+
[130/291] Writing tensor blk.14.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 128
|
490 |
+
[131/291] Writing tensor blk.14.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 129
|
491 |
+
[132/291] Writing tensor blk.14.ffn_norm.weight | size 4096 | type F32 | T+ 129
|
492 |
+
[133/291] Writing tensor blk.14.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 129
|
493 |
+
[134/291] Writing tensor blk.14.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 129
|
494 |
+
[135/291] Writing tensor blk.14.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 129
|
495 |
+
[136/291] Writing tensor blk.14.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 129
|
496 |
+
[137/291] Writing tensor blk.15.attn_norm.weight | size 4096 | type F32 | T+ 129
|
497 |
+
[138/291] Writing tensor blk.15.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 132
|
498 |
+
[139/291] Writing tensor blk.15.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 136
|
499 |
+
[140/291] Writing tensor blk.15.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 137
|
500 |
+
[141/291] Writing tensor blk.15.ffn_norm.weight | size 4096 | type F32 | T+ 137
|
501 |
+
[142/291] Writing tensor blk.15.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 137
|
502 |
+
[143/291] Writing tensor blk.15.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 137
|
503 |
+
[144/291] Writing tensor blk.15.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 137
|
504 |
+
[145/291] Writing tensor blk.15.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 137
|
505 |
+
[146/291] Writing tensor blk.16.attn_norm.weight | size 4096 | type F32 | T+ 137
|
506 |
+
[147/291] Writing tensor blk.16.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 140
|
507 |
+
[148/291] Writing tensor blk.16.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 142
|
508 |
+
[149/291] Writing tensor blk.16.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 142
|
509 |
+
[150/291] Writing tensor blk.16.ffn_norm.weight | size 4096 | type F32 | T+ 143
|
510 |
+
[151/291] Writing tensor blk.16.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 143
|
511 |
+
[152/291] Writing tensor blk.16.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
512 |
+
[153/291] Writing tensor blk.16.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
513 |
+
[154/291] Writing tensor blk.16.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 146
|
514 |
+
[155/291] Writing tensor blk.17.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 146
|
515 |
+
[156/291] Writing tensor blk.17.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
516 |
+
[157/291] Writing tensor blk.17.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 152
|
517 |
+
[158/291] Writing tensor blk.17.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 152
|
518 |
+
[159/291] Writing tensor blk.17.attn_norm.weight | size 4096 | type F32 | T+ 152
|
519 |
+
[160/291] Writing tensor blk.17.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 152
|
520 |
+
[161/291] Writing tensor blk.17.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 152
|
521 |
+
[162/291] Writing tensor blk.17.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 152
|
522 |
+
[163/291] Writing tensor blk.17.ffn_norm.weight | size 4096 | type F32 | T+ 152
|
523 |
+
[164/291] Writing tensor blk.18.attn_norm.weight | size 4096 | type F32 | T+ 152
|
524 |
+
[165/291] Writing tensor blk.18.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 156
|
525 |
+
[166/291] Writing tensor blk.18.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 160
|
526 |
+
[167/291] Writing tensor blk.18.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 160
|
527 |
+
[168/291] Writing tensor blk.18.ffn_norm.weight | size 4096 | type F32 | T+ 160
|
528 |
+
[169/291] Writing tensor blk.18.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 160
|
529 |
+
[170/291] Writing tensor blk.18.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 160
|
530 |
+
[171/291] Writing tensor blk.18.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 160
|
531 |
+
[172/291] Writing tensor blk.18.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 160
|
532 |
+
[173/291] Writing tensor blk.19.attn_norm.weight | size 4096 | type F32 | T+ 160
|
533 |
+
[174/291] Writing tensor blk.19.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 163
|
534 |
+
[175/291] Writing tensor blk.19.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 167
|
535 |
+
[176/291] Writing tensor blk.19.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 168
|
536 |
+
[177/291] Writing tensor blk.19.ffn_norm.weight | size 4096 | type F32 | T+ 168
|
537 |
+
[178/291] Writing tensor blk.19.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 168
|
538 |
+
[179/291] Writing tensor blk.19.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 168
|
539 |
+
[180/291] Writing tensor blk.19.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 168
|
540 |
+
[181/291] Writing tensor blk.19.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 168
|
541 |
+
[182/291] Writing tensor blk.20.attn_norm.weight | size 4096 | type F32 | T+ 168
|
542 |
+
[183/291] Writing tensor blk.20.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 175
|
543 |
+
[184/291] Writing tensor blk.20.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 175
|
544 |
+
[185/291] Writing tensor blk.20.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 175
|
545 |
+
[186/291] Writing tensor blk.20.ffn_norm.weight | size 4096 | type F32 | T+ 176
|
546 |
+
[187/291] Writing tensor blk.20.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 176
|
547 |
+
[188/291] Writing tensor blk.20.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 176
|
548 |
+
[189/291] Writing tensor blk.20.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 176
|
549 |
+
[190/291] Writing tensor blk.20.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 176
|
550 |
+
[191/291] Writing tensor blk.21.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 179
|
551 |
+
[192/291] Writing tensor blk.21.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 179
|
552 |
+
[193/291] Writing tensor blk.21.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 183
|
553 |
+
[194/291] Writing tensor blk.21.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 184
|
554 |
+
[195/291] Writing tensor blk.21.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 184
|
555 |
+
[196/291] Writing tensor blk.21.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 184
|
556 |
+
[197/291] Writing tensor blk.21.attn_norm.weight | size 4096 | type F32 | T+ 184
|
557 |
+
[198/291] Writing tensor blk.21.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 184
|
558 |
+
[199/291] Writing tensor blk.21.ffn_norm.weight | size 4096 | type F32 | T+ 184
|
559 |
+
[200/291] Writing tensor blk.22.attn_norm.weight | size 4096 | type F32 | T+ 184
|
560 |
+
[201/291] Writing tensor blk.22.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 188
|
561 |
+
[202/291] Writing tensor blk.22.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 192
|
562 |
+
[203/291] Writing tensor blk.22.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 192
|
563 |
+
[204/291] Writing tensor blk.22.ffn_norm.weight | size 4096 | type F32 | T+ 192
|
564 |
+
[205/291] Writing tensor blk.22.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 192
|
565 |
+
[206/291] Writing tensor blk.22.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 192
|
566 |
+
[207/291] Writing tensor blk.22.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 192
|
567 |
+
[208/291] Writing tensor blk.22.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 192
|
568 |
+
[209/291] Writing tensor blk.23.attn_norm.weight | size 4096 | type F32 | T+ 192
|
569 |
+
[210/291] Writing tensor blk.23.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 196
|
570 |
+
[211/291] Writing tensor blk.23.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 200
|
571 |
+
[212/291] Writing tensor blk.23.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 200
|
572 |
+
[213/291] Writing tensor blk.23.ffn_norm.weight | size 4096 | type F32 | T+ 200
|
573 |
+
[214/291] Writing tensor blk.23.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 200
|
574 |
+
[215/291] Writing tensor blk.23.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 200
|
575 |
+
[216/291] Writing tensor blk.23.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 200
|
576 |
+
[217/291] Writing tensor blk.23.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 200
|
577 |
+
[218/291] Writing tensor blk.24.attn_norm.weight | size 4096 | type F32 | T+ 200
|
578 |
+
[219/291] Writing tensor blk.24.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 204
|
579 |
+
[220/291] Writing tensor blk.24.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 208
|
580 |
+
[221/291] Writing tensor blk.24.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 208
|
581 |
+
[222/291] Writing tensor blk.24.ffn_norm.weight | size 4096 | type F32 | T+ 208
|
582 |
+
[223/291] Writing tensor blk.24.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 208
|
583 |
+
[224/291] Writing tensor blk.24.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 208
|
584 |
+
[225/291] Writing tensor blk.24.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 208
|
585 |
+
[226/291] Writing tensor blk.24.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 208
|
586 |
+
[227/291] Writing tensor blk.25.attn_norm.weight | size 4096 | type F32 | T+ 208
|
587 |
+
[228/291] Writing tensor blk.25.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 212
|
588 |
+
[229/291] Writing tensor blk.25.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 213
|
589 |
+
[230/291] Writing tensor blk.25.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 214
|
590 |
+
[231/291] Writing tensor blk.25.ffn_norm.weight | size 4096 | type F32 | T+ 214
|
591 |
+
[232/291] Writing tensor blk.25.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 214
|
592 |
+
[233/291] Writing tensor blk.25.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
593 |
+
[234/291] Writing tensor blk.25.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
594 |
+
[235/291] Writing tensor blk.25.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 217
|
595 |
+
[236/291] Writing tensor blk.26.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 217
|
596 |
+
[237/291] Writing tensor blk.26.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
597 |
+
[238/291] Writing tensor blk.26.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 223
|
598 |
+
[239/291] Writing tensor blk.26.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 223
|
599 |
+
[240/291] Writing tensor blk.26.attn_norm.weight | size 4096 | type F32 | T+ 223
|
600 |
+
[241/291] Writing tensor blk.26.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 223
|
601 |
+
[242/291] Writing tensor blk.26.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 223
|
602 |
+
[243/291] Writing tensor blk.26.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 223
|
603 |
+
[244/291] Writing tensor blk.26.ffn_norm.weight | size 4096 | type F32 | T+ 223
|
604 |
+
[245/291] Writing tensor blk.27.attn_norm.weight | size 4096 | type F32 | T+ 223
|
605 |
+
[246/291] Writing tensor blk.27.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 227
|
606 |
+
[247/291] Writing tensor blk.27.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 231
|
607 |
+
[248/291] Writing tensor blk.27.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 231
|
608 |
+
[249/291] Writing tensor blk.27.ffn_norm.weight | size 4096 | type F32 | T+ 231
|
609 |
+
[250/291] Writing tensor blk.27.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 231
|
610 |
+
[251/291] Writing tensor blk.27.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 231
|
611 |
+
[252/291] Writing tensor blk.27.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 231
|
612 |
+
[253/291] Writing tensor blk.27.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 231
|
613 |
+
[254/291] Writing tensor blk.28.attn_norm.weight | size 4096 | type F32 | T+ 231
|
614 |
+
[255/291] Writing tensor blk.28.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 235
|
615 |
+
[256/291] Writing tensor blk.28.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 240
|
616 |
+
[257/291] Writing tensor blk.28.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 240
|
617 |
+
[258/291] Writing tensor blk.28.ffn_norm.weight | size 4096 | type F32 | T+ 240
|
618 |
+
[259/291] Writing tensor blk.28.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 240
|
619 |
+
[260/291] Writing tensor blk.28.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 240
|
620 |
+
[261/291] Writing tensor blk.28.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 240
|
621 |
+
[262/291] Writing tensor blk.28.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 240
|
622 |
+
[263/291] Writing tensor blk.29.attn_norm.weight | size 4096 | type F32 | T+ 240
|
623 |
+
[264/291] Writing tensor blk.29.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 247
|
624 |
+
[265/291] Writing tensor blk.29.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 248
|
625 |
+
[266/291] Writing tensor blk.29.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 248
|
626 |
+
[267/291] Writing tensor blk.29.ffn_norm.weight | size 4096 | type F32 | T+ 248
|
627 |
+
[268/291] Writing tensor blk.29.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 248
|
628 |
+
[269/291] Writing tensor blk.29.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 248
|
629 |
+
[270/291] Writing tensor blk.29.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
630 |
+
[271/291] Writing tensor blk.29.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
631 |
+
[272/291] Writing tensor blk.30.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 259
|
632 |
+
[273/291] Writing tensor blk.30.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 259
|
633 |
+
[274/291] Writing tensor blk.30.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
634 |
+
[275/291] Writing tensor blk.30.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
635 |
+
[276/291] Writing tensor blk.30.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
636 |
+
[277/291] Writing tensor blk.30.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
637 |
+
[278/291] Writing tensor output.weight | size 32000 x 4096 | type Q8_0 | T+ 266
|
638 |
+
[279/291] Writing tensor blk.30.attn_norm.weight | size 4096 | type F32 | T+ 266
|
639 |
+
[280/291] Writing tensor blk.30.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266
|
640 |
+
[281/291] Writing tensor blk.30.ffn_norm.weight | size 4096 | type F32 | T+ 266
|
641 |
+
[282/291] Writing tensor blk.31.attn_norm.weight | size 4096 | type F32 | T+ 266
|
642 |
+
[283/291] Writing tensor blk.31.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266
|
643 |
+
[284/291] Writing tensor blk.31.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 266
|
644 |
+
[285/291] Writing tensor blk.31.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 266
|
645 |
+
[286/291] Writing tensor blk.31.ffn_norm.weight | size 4096 | type F32 | T+ 267
|
646 |
+
[287/291] Writing tensor blk.31.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 267
|
647 |
+
[288/291] Writing tensor blk.31.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 267
|
648 |
+
[289/291] Writing tensor blk.31.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 267
|
649 |
+
[290/291] Writing tensor blk.31.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 267
|
650 |
+
[291/291] Writing tensor output_norm.weight | size 4096 | type F32 | T+ 267
|
651 |
+
Wrote zephyr_int8.gguf
|