|
(dream) tb@IBM-PF38WZKF:~/funstreams/AI$ python llama.cpp/convert.py zephyr-7b-beta --outfile zephyr_int8.gguf --outtype q8_0
|
|
Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00002-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00003-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00004-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00005-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00006-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00007-of-00008.safetensors
|
|
Loading model file zephyr-7b-beta/model-00008-of-00008.safetensors
|
|
params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=32768, n_ff=14336, n_head=32, n_head_kv=8, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=<GGMLFileType.MostlyQ8_0: 7>, path_model=PosixPath('zephyr-7b-beta'))
|
|
32000 32000
|
|
Vocab info: <VocabLoader with 32000 base tokens and 0 added tokens>
|
|
Special vocab info: <SpecialVocab with 58980 merges, special tokens {'bos': 1, 'eos': 2, 'unk': 0, 'pad': 2}, add special tokens unset>
|
|
Permuting layer 0
|
|
Permuting layer 1
|
|
Permuting layer 2
|
|
Permuting layer 3
|
|
Permuting layer 4
|
|
Permuting layer 5
|
|
Permuting layer 6
|
|
Permuting layer 7
|
|
Permuting layer 8
|
|
Permuting layer 9
|
|
Permuting layer 10
|
|
Permuting layer 11
|
|
Permuting layer 12
|
|
Permuting layer 13
|
|
Permuting layer 14
|
|
Permuting layer 15
|
|
Permuting layer 16
|
|
Permuting layer 17
|
|
Permuting layer 18
|
|
Permuting layer 19
|
|
Permuting layer 20
|
|
Permuting layer 21
|
|
Permuting layer 22
|
|
Permuting layer 23
|
|
Permuting layer 24
|
|
Permuting layer 25
|
|
Permuting layer 26
|
|
Permuting layer 27
|
|
Permuting layer 28
|
|
Permuting layer 29
|
|
Permuting layer 30
|
|
Permuting layer 31
|
|
model.embed_tokens.weight -> token_embd.weight | BF16 | [32000, 4096]
|
|
model.layers.0.input_layernorm.weight -> blk.0.attn_norm.weight | BF16 | [4096]
|
|
model.layers.0.mlp.down_proj.weight -> blk.0.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.0.mlp.gate_proj.weight -> blk.0.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.0.mlp.up_proj.weight -> blk.0.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.0.post_attention_layernorm.weight -> blk.0.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.0.self_attn.k_proj.weight -> blk.0.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.0.self_attn.o_proj.weight -> blk.0.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.0.self_attn.q_proj.weight -> blk.0.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.0.self_attn.v_proj.weight -> blk.0.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.1.input_layernorm.weight -> blk.1.attn_norm.weight | BF16 | [4096]
|
|
model.layers.1.mlp.down_proj.weight -> blk.1.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.1.mlp.gate_proj.weight -> blk.1.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.1.mlp.up_proj.weight -> blk.1.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.1.post_attention_layernorm.weight -> blk.1.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.1.self_attn.k_proj.weight -> blk.1.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.1.self_attn.o_proj.weight -> blk.1.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.1.self_attn.q_proj.weight -> blk.1.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.1.self_attn.v_proj.weight -> blk.1.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.2.input_layernorm.weight -> blk.2.attn_norm.weight | BF16 | [4096]
|
|
model.layers.2.mlp.down_proj.weight -> blk.2.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.2.mlp.gate_proj.weight -> blk.2.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.2.mlp.up_proj.weight -> blk.2.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.2.post_attention_layernorm.weight -> blk.2.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.2.self_attn.k_proj.weight -> blk.2.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.2.self_attn.o_proj.weight -> blk.2.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.2.self_attn.q_proj.weight -> blk.2.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.2.self_attn.v_proj.weight -> blk.2.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.3.mlp.gate_proj.weight -> blk.3.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.3.mlp.up_proj.weight -> blk.3.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.3.self_attn.k_proj.weight -> blk.3.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.3.self_attn.o_proj.weight -> blk.3.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.3.self_attn.q_proj.weight -> blk.3.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.3.self_attn.v_proj.weight -> blk.3.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.3.input_layernorm.weight -> blk.3.attn_norm.weight | BF16 | [4096]
|
|
model.layers.3.mlp.down_proj.weight -> blk.3.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.3.post_attention_layernorm.weight -> blk.3.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.4.input_layernorm.weight -> blk.4.attn_norm.weight | BF16 | [4096]
|
|
model.layers.4.mlp.down_proj.weight -> blk.4.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.4.mlp.gate_proj.weight -> blk.4.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.4.mlp.up_proj.weight -> blk.4.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.4.post_attention_layernorm.weight -> blk.4.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.4.self_attn.k_proj.weight -> blk.4.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.4.self_attn.o_proj.weight -> blk.4.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.4.self_attn.q_proj.weight -> blk.4.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.4.self_attn.v_proj.weight -> blk.4.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.5.input_layernorm.weight -> blk.5.attn_norm.weight | BF16 | [4096]
|
|
model.layers.5.mlp.down_proj.weight -> blk.5.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.5.mlp.gate_proj.weight -> blk.5.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.5.mlp.up_proj.weight -> blk.5.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.5.post_attention_layernorm.weight -> blk.5.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.5.self_attn.k_proj.weight -> blk.5.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.5.self_attn.o_proj.weight -> blk.5.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.5.self_attn.q_proj.weight -> blk.5.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.5.self_attn.v_proj.weight -> blk.5.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.6.input_layernorm.weight -> blk.6.attn_norm.weight | BF16 | [4096]
|
|
model.layers.6.mlp.down_proj.weight -> blk.6.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.6.mlp.gate_proj.weight -> blk.6.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.6.mlp.up_proj.weight -> blk.6.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.6.post_attention_layernorm.weight -> blk.6.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.6.self_attn.k_proj.weight -> blk.6.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.6.self_attn.o_proj.weight -> blk.6.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.6.self_attn.q_proj.weight -> blk.6.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.6.self_attn.v_proj.weight -> blk.6.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.7.input_layernorm.weight -> blk.7.attn_norm.weight | BF16 | [4096]
|
|
model.layers.7.mlp.down_proj.weight -> blk.7.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.7.mlp.gate_proj.weight -> blk.7.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.7.mlp.up_proj.weight -> blk.7.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.7.post_attention_layernorm.weight -> blk.7.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.7.self_attn.k_proj.weight -> blk.7.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.7.self_attn.o_proj.weight -> blk.7.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.7.self_attn.q_proj.weight -> blk.7.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.7.self_attn.v_proj.weight -> blk.7.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.8.self_attn.k_proj.weight -> blk.8.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.8.self_attn.o_proj.weight -> blk.8.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.8.self_attn.q_proj.weight -> blk.8.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.8.self_attn.v_proj.weight -> blk.8.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.10.input_layernorm.weight -> blk.10.attn_norm.weight | BF16 | [4096]
|
|
model.layers.10.mlp.down_proj.weight -> blk.10.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.10.mlp.gate_proj.weight -> blk.10.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.10.mlp.up_proj.weight -> blk.10.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.10.post_attention_layernorm.weight -> blk.10.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.10.self_attn.k_proj.weight -> blk.10.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.10.self_attn.o_proj.weight -> blk.10.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.10.self_attn.q_proj.weight -> blk.10.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.10.self_attn.v_proj.weight -> blk.10.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.11.input_layernorm.weight -> blk.11.attn_norm.weight | BF16 | [4096]
|
|
model.layers.11.mlp.down_proj.weight -> blk.11.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.11.mlp.gate_proj.weight -> blk.11.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.11.mlp.up_proj.weight -> blk.11.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.11.post_attention_layernorm.weight -> blk.11.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.11.self_attn.k_proj.weight -> blk.11.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.11.self_attn.o_proj.weight -> blk.11.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.11.self_attn.q_proj.weight -> blk.11.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.11.self_attn.v_proj.weight -> blk.11.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.12.mlp.gate_proj.weight -> blk.12.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.12.mlp.up_proj.weight -> blk.12.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.12.self_attn.k_proj.weight -> blk.12.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.12.self_attn.o_proj.weight -> blk.12.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.12.self_attn.q_proj.weight -> blk.12.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.12.self_attn.v_proj.weight -> blk.12.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.8.input_layernorm.weight -> blk.8.attn_norm.weight | BF16 | [4096]
|
|
model.layers.8.mlp.down_proj.weight -> blk.8.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.8.mlp.gate_proj.weight -> blk.8.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.8.mlp.up_proj.weight -> blk.8.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.8.post_attention_layernorm.weight -> blk.8.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.9.input_layernorm.weight -> blk.9.attn_norm.weight | BF16 | [4096]
|
|
model.layers.9.mlp.down_proj.weight -> blk.9.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.9.mlp.gate_proj.weight -> blk.9.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.9.mlp.up_proj.weight -> blk.9.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.9.post_attention_layernorm.weight -> blk.9.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.9.self_attn.k_proj.weight -> blk.9.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.9.self_attn.o_proj.weight -> blk.9.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.9.self_attn.q_proj.weight -> blk.9.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.9.self_attn.v_proj.weight -> blk.9.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.12.input_layernorm.weight -> blk.12.attn_norm.weight | BF16 | [4096]
|
|
model.layers.12.mlp.down_proj.weight -> blk.12.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.12.post_attention_layernorm.weight -> blk.12.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.13.input_layernorm.weight -> blk.13.attn_norm.weight | BF16 | [4096]
|
|
model.layers.13.mlp.down_proj.weight -> blk.13.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.13.mlp.gate_proj.weight -> blk.13.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.13.mlp.up_proj.weight -> blk.13.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.13.post_attention_layernorm.weight -> blk.13.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.13.self_attn.k_proj.weight -> blk.13.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.13.self_attn.o_proj.weight -> blk.13.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.13.self_attn.q_proj.weight -> blk.13.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.13.self_attn.v_proj.weight -> blk.13.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.14.input_layernorm.weight -> blk.14.attn_norm.weight | BF16 | [4096]
|
|
model.layers.14.mlp.down_proj.weight -> blk.14.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.14.mlp.gate_proj.weight -> blk.14.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.14.mlp.up_proj.weight -> blk.14.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.14.post_attention_layernorm.weight -> blk.14.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.14.self_attn.k_proj.weight -> blk.14.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.14.self_attn.o_proj.weight -> blk.14.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.14.self_attn.q_proj.weight -> blk.14.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.14.self_attn.v_proj.weight -> blk.14.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.15.input_layernorm.weight -> blk.15.attn_norm.weight | BF16 | [4096]
|
|
model.layers.15.mlp.down_proj.weight -> blk.15.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.15.mlp.gate_proj.weight -> blk.15.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.15.mlp.up_proj.weight -> blk.15.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.15.post_attention_layernorm.weight -> blk.15.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.15.self_attn.k_proj.weight -> blk.15.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.15.self_attn.o_proj.weight -> blk.15.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.15.self_attn.q_proj.weight -> blk.15.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.15.self_attn.v_proj.weight -> blk.15.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.16.input_layernorm.weight -> blk.16.attn_norm.weight | BF16 | [4096]
|
|
model.layers.16.mlp.down_proj.weight -> blk.16.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.16.mlp.gate_proj.weight -> blk.16.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.16.mlp.up_proj.weight -> blk.16.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.16.post_attention_layernorm.weight -> blk.16.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.16.self_attn.k_proj.weight -> blk.16.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.16.self_attn.o_proj.weight -> blk.16.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.16.self_attn.q_proj.weight -> blk.16.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.16.self_attn.v_proj.weight -> blk.16.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.17.self_attn.k_proj.weight -> blk.17.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.17.self_attn.o_proj.weight -> blk.17.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.17.self_attn.q_proj.weight -> blk.17.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.17.self_attn.v_proj.weight -> blk.17.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.17.input_layernorm.weight -> blk.17.attn_norm.weight | BF16 | [4096]
|
|
model.layers.17.mlp.down_proj.weight -> blk.17.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.17.mlp.gate_proj.weight -> blk.17.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.17.mlp.up_proj.weight -> blk.17.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.17.post_attention_layernorm.weight -> blk.17.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.18.input_layernorm.weight -> blk.18.attn_norm.weight | BF16 | [4096]
|
|
model.layers.18.mlp.down_proj.weight -> blk.18.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.18.mlp.gate_proj.weight -> blk.18.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.18.mlp.up_proj.weight -> blk.18.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.18.post_attention_layernorm.weight -> blk.18.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.18.self_attn.k_proj.weight -> blk.18.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.18.self_attn.o_proj.weight -> blk.18.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.18.self_attn.q_proj.weight -> blk.18.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.18.self_attn.v_proj.weight -> blk.18.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.19.input_layernorm.weight -> blk.19.attn_norm.weight | BF16 | [4096]
|
|
model.layers.19.mlp.down_proj.weight -> blk.19.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.19.mlp.gate_proj.weight -> blk.19.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.19.mlp.up_proj.weight -> blk.19.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.19.post_attention_layernorm.weight -> blk.19.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.19.self_attn.k_proj.weight -> blk.19.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.19.self_attn.o_proj.weight -> blk.19.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.19.self_attn.q_proj.weight -> blk.19.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.19.self_attn.v_proj.weight -> blk.19.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.20.input_layernorm.weight -> blk.20.attn_norm.weight | BF16 | [4096]
|
|
model.layers.20.mlp.down_proj.weight -> blk.20.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.20.mlp.gate_proj.weight -> blk.20.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.20.mlp.up_proj.weight -> blk.20.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.20.post_attention_layernorm.weight -> blk.20.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.20.self_attn.k_proj.weight -> blk.20.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.20.self_attn.o_proj.weight -> blk.20.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.20.self_attn.q_proj.weight -> blk.20.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.20.self_attn.v_proj.weight -> blk.20.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.21.mlp.gate_proj.weight -> blk.21.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.21.mlp.up_proj.weight -> blk.21.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.21.self_attn.k_proj.weight -> blk.21.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.21.self_attn.o_proj.weight -> blk.21.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.21.self_attn.q_proj.weight -> blk.21.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.21.self_attn.v_proj.weight -> blk.21.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.21.input_layernorm.weight -> blk.21.attn_norm.weight | BF16 | [4096]
|
|
model.layers.21.mlp.down_proj.weight -> blk.21.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.21.post_attention_layernorm.weight -> blk.21.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.22.input_layernorm.weight -> blk.22.attn_norm.weight | BF16 | [4096]
|
|
model.layers.22.mlp.down_proj.weight -> blk.22.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.22.mlp.gate_proj.weight -> blk.22.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.22.mlp.up_proj.weight -> blk.22.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.22.post_attention_layernorm.weight -> blk.22.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.22.self_attn.k_proj.weight -> blk.22.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.22.self_attn.o_proj.weight -> blk.22.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.22.self_attn.q_proj.weight -> blk.22.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.22.self_attn.v_proj.weight -> blk.22.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.23.input_layernorm.weight -> blk.23.attn_norm.weight | BF16 | [4096]
|
|
model.layers.23.mlp.down_proj.weight -> blk.23.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.23.mlp.gate_proj.weight -> blk.23.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.23.mlp.up_proj.weight -> blk.23.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.23.post_attention_layernorm.weight -> blk.23.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.23.self_attn.k_proj.weight -> blk.23.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.23.self_attn.o_proj.weight -> blk.23.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.23.self_attn.q_proj.weight -> blk.23.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.23.self_attn.v_proj.weight -> blk.23.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.24.input_layernorm.weight -> blk.24.attn_norm.weight | BF16 | [4096]
|
|
model.layers.24.mlp.down_proj.weight -> blk.24.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.24.mlp.gate_proj.weight -> blk.24.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.24.mlp.up_proj.weight -> blk.24.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.24.post_attention_layernorm.weight -> blk.24.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.24.self_attn.k_proj.weight -> blk.24.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.24.self_attn.o_proj.weight -> blk.24.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.24.self_attn.q_proj.weight -> blk.24.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.24.self_attn.v_proj.weight -> blk.24.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.25.input_layernorm.weight -> blk.25.attn_norm.weight | BF16 | [4096]
|
|
model.layers.25.mlp.down_proj.weight -> blk.25.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.25.mlp.gate_proj.weight -> blk.25.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.25.mlp.up_proj.weight -> blk.25.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.25.post_attention_layernorm.weight -> blk.25.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.25.self_attn.k_proj.weight -> blk.25.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.25.self_attn.o_proj.weight -> blk.25.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.25.self_attn.q_proj.weight -> blk.25.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.25.self_attn.v_proj.weight -> blk.25.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.26.self_attn.k_proj.weight -> blk.26.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.26.self_attn.o_proj.weight -> blk.26.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.26.self_attn.q_proj.weight -> blk.26.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.26.self_attn.v_proj.weight -> blk.26.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.26.input_layernorm.weight -> blk.26.attn_norm.weight | BF16 | [4096]
|
|
model.layers.26.mlp.down_proj.weight -> blk.26.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.26.mlp.gate_proj.weight -> blk.26.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.26.mlp.up_proj.weight -> blk.26.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.26.post_attention_layernorm.weight -> blk.26.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.27.input_layernorm.weight -> blk.27.attn_norm.weight | BF16 | [4096]
|
|
model.layers.27.mlp.down_proj.weight -> blk.27.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.27.mlp.gate_proj.weight -> blk.27.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.27.mlp.up_proj.weight -> blk.27.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.27.post_attention_layernorm.weight -> blk.27.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.27.self_attn.k_proj.weight -> blk.27.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.27.self_attn.o_proj.weight -> blk.27.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.27.self_attn.q_proj.weight -> blk.27.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.27.self_attn.v_proj.weight -> blk.27.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.28.input_layernorm.weight -> blk.28.attn_norm.weight | BF16 | [4096]
|
|
model.layers.28.mlp.down_proj.weight -> blk.28.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.28.mlp.gate_proj.weight -> blk.28.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.28.mlp.up_proj.weight -> blk.28.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.28.post_attention_layernorm.weight -> blk.28.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.28.self_attn.k_proj.weight -> blk.28.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.28.self_attn.o_proj.weight -> blk.28.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.28.self_attn.q_proj.weight -> blk.28.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.28.self_attn.v_proj.weight -> blk.28.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.29.input_layernorm.weight -> blk.29.attn_norm.weight | BF16 | [4096]
|
|
model.layers.29.mlp.down_proj.weight -> blk.29.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.29.mlp.gate_proj.weight -> blk.29.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.29.mlp.up_proj.weight -> blk.29.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.29.post_attention_layernorm.weight -> blk.29.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.29.self_attn.k_proj.weight -> blk.29.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.29.self_attn.o_proj.weight -> blk.29.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.29.self_attn.q_proj.weight -> blk.29.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.29.self_attn.v_proj.weight -> blk.29.attn_v.weight | BF16 | [1024, 4096]
|
|
model.layers.30.mlp.gate_proj.weight -> blk.30.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.30.mlp.up_proj.weight -> blk.30.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.30.self_attn.k_proj.weight -> blk.30.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.30.self_attn.o_proj.weight -> blk.30.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.30.self_attn.q_proj.weight -> blk.30.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.30.self_attn.v_proj.weight -> blk.30.attn_v.weight | BF16 | [1024, 4096]
|
|
lm_head.weight -> output.weight | BF16 | [32000, 4096]
|
|
model.layers.30.input_layernorm.weight -> blk.30.attn_norm.weight | BF16 | [4096]
|
|
model.layers.30.mlp.down_proj.weight -> blk.30.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.30.post_attention_layernorm.weight -> blk.30.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.31.input_layernorm.weight -> blk.31.attn_norm.weight | BF16 | [4096]
|
|
model.layers.31.mlp.down_proj.weight -> blk.31.ffn_down.weight | BF16 | [4096, 14336]
|
|
model.layers.31.mlp.gate_proj.weight -> blk.31.ffn_gate.weight | BF16 | [14336, 4096]
|
|
model.layers.31.mlp.up_proj.weight -> blk.31.ffn_up.weight | BF16 | [14336, 4096]
|
|
model.layers.31.post_attention_layernorm.weight -> blk.31.ffn_norm.weight | BF16 | [4096]
|
|
model.layers.31.self_attn.k_proj.weight -> blk.31.attn_k.weight | BF16 | [1024, 4096]
|
|
model.layers.31.self_attn.o_proj.weight -> blk.31.attn_output.weight | BF16 | [4096, 4096]
|
|
model.layers.31.self_attn.q_proj.weight -> blk.31.attn_q.weight | BF16 | [4096, 4096]
|
|
model.layers.31.self_attn.v_proj.weight -> blk.31.attn_v.weight | BF16 | [1024, 4096]
|
|
model.norm.weight -> output_norm.weight | BF16 | [4096]
|
|
Writing zephyr_f32.gguf, format 7
|
|
gguf: This GGUF file is for Little Endian only
|
|
gguf: Adding 58980 merge(s).
|
|
gguf: Setting special token type bos to 1
|
|
gguf: Setting special token type eos to 2
|
|
gguf: Setting special token type unk to 0
|
|
gguf: Setting special token type pad to 2
|
|
gguf: Setting chat_template to {% for message in messages %}
|
|
{% if message['role'] == 'user' %}
|
|
{{ '<|user|>
|
|
' + message['content'] + eos_token }}
|
|
{% elif message['role'] == 'system' %}
|
|
{{ '<|system|>
|
|
' + message['content'] + eos_token }}
|
|
{% elif message['role'] == 'assistant' %}
|
|
{{ '<|assistant|>
|
|
' + message['content'] + eos_token }}
|
|
{% endif %}
|
|
{% if loop.last and add_generation_prompt %}
|
|
{{ '<|assistant|>' }}
|
|
{% endif %}
|
|
{% endfor %}
|
|
[ 1/291] Writing tensor token_embd.weight | size 32000 x 4096 | type Q8_0 | T+ 19
|
|
[ 2/291] Writing tensor blk.0.attn_norm.weight | size 4096 | type F32 | T+ 19
|
|
[ 3/291] Writing tensor blk.0.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 19
|
|
[ 4/291] Writing tensor blk.0.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 19
|
|
[ 5/291] Writing tensor blk.0.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 19
|
|
[ 6/291] Writing tensor blk.0.ffn_norm.weight | size 4096 | type F32 | T+ 19
|
|
[ 7/291] Writing tensor blk.0.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 19
|
|
[ 8/291] Writing tensor blk.0.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 19
|
|
[ 9/291] Writing tensor blk.0.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 21
|
|
[ 10/291] Writing tensor blk.0.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 22
|
|
[ 11/291] Writing tensor blk.1.attn_norm.weight | size 4096 | type F32 | T+ 22
|
|
[ 12/291] Writing tensor blk.1.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 23
|
|
[ 13/291] Writing tensor blk.1.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 27
|
|
[ 14/291] Writing tensor blk.1.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 27
|
|
[ 15/291] Writing tensor blk.1.ffn_norm.weight | size 4096 | type F32 | T+ 27
|
|
[ 16/291] Writing tensor blk.1.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 27
|
|
[ 17/291] Writing tensor blk.1.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 27
|
|
[ 18/291] Writing tensor blk.1.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 27
|
|
[ 19/291] Writing tensor blk.1.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 27
|
|
[ 20/291] Writing tensor blk.2.attn_norm.weight | size 4096 | type F32 | T+ 27
|
|
[ 21/291] Writing tensor blk.2.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 34
|
|
[ 22/291] Writing tensor blk.2.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 35
|
|
[ 23/291] Writing tensor blk.2.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 35
|
|
[ 24/291] Writing tensor blk.2.ffn_norm.weight | size 4096 | type F32 | T+ 35
|
|
[ 25/291] Writing tensor blk.2.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 35
|
|
[ 26/291] Writing tensor blk.2.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 35
|
|
[ 27/291] Writing tensor blk.2.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 35
|
|
[ 28/291] Writing tensor blk.2.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 35
|
|
[ 29/291] Writing tensor blk.3.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 38
|
|
[ 30/291] Writing tensor blk.3.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 38
|
|
[ 31/291] Writing tensor blk.3.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 42
|
|
[ 32/291] Writing tensor blk.3.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 42
|
|
[ 33/291] Writing tensor blk.3.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 42
|
|
[ 34/291] Writing tensor blk.3.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 42
|
|
[ 35/291] Writing tensor blk.3.attn_norm.weight | size 4096 | type F32 | T+ 42
|
|
[ 36/291] Writing tensor blk.3.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 42
|
|
[ 37/291] Writing tensor blk.3.ffn_norm.weight | size 4096 | type F32 | T+ 42
|
|
[ 38/291] Writing tensor blk.4.attn_norm.weight | size 4096 | type F32 | T+ 42
|
|
[ 39/291] Writing tensor blk.4.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 45
|
|
[ 40/291] Writing tensor blk.4.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 49
|
|
[ 41/291] Writing tensor blk.4.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 49
|
|
[ 42/291] Writing tensor blk.4.ffn_norm.weight | size 4096 | type F32 | T+ 49
|
|
[ 43/291] Writing tensor blk.4.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 49
|
|
[ 44/291] Writing tensor blk.4.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 49
|
|
[ 45/291] Writing tensor blk.4.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 49
|
|
[ 46/291] Writing tensor blk.4.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 49
|
|
[ 47/291] Writing tensor blk.5.attn_norm.weight | size 4096 | type F32 | T+ 49
|
|
[ 48/291] Writing tensor blk.5.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 53
|
|
[ 49/291] Writing tensor blk.5.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 57
|
|
[ 50/291] Writing tensor blk.5.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 57
|
|
[ 51/291] Writing tensor blk.5.ffn_norm.weight | size 4096 | type F32 | T+ 57
|
|
[ 52/291] Writing tensor blk.5.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 57
|
|
[ 53/291] Writing tensor blk.5.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 57
|
|
[ 54/291] Writing tensor blk.5.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 57
|
|
[ 55/291] Writing tensor blk.5.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 57
|
|
[ 56/291] Writing tensor blk.6.attn_norm.weight | size 4096 | type F32 | T+ 57
|
|
[ 57/291] Writing tensor blk.6.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 61
|
|
[ 58/291] Writing tensor blk.6.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 64
|
|
[ 59/291] Writing tensor blk.6.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 65
|
|
[ 60/291] Writing tensor blk.6.ffn_norm.weight | size 4096 | type F32 | T+ 65
|
|
[ 61/291] Writing tensor blk.6.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 65
|
|
[ 62/291] Writing tensor blk.6.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 65
|
|
[ 63/291] Writing tensor blk.6.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 65
|
|
[ 64/291] Writing tensor blk.6.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 65
|
|
[ 65/291] Writing tensor blk.7.attn_norm.weight | size 4096 | type F32 | T+ 65
|
|
[ 66/291] Writing tensor blk.7.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 68
|
|
[ 67/291] Writing tensor blk.7.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 70
|
|
[ 68/291] Writing tensor blk.7.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 70
|
|
[ 69/291] Writing tensor blk.7.ffn_norm.weight | size 4096 | type F32 | T+ 70
|
|
[ 70/291] Writing tensor blk.7.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 70
|
|
[ 71/291] Writing tensor blk.7.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
|
[ 72/291] Writing tensor blk.7.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
|
[ 73/291] Writing tensor blk.7.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
|
[ 74/291] Writing tensor blk.8.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
|
[ 75/291] Writing tensor blk.8.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
|
[ 76/291] Writing tensor blk.8.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74
|
|
[ 77/291] Writing tensor blk.8.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74
|
|
[ 78/291] Writing tensor blk.10.attn_norm.weight | size 4096 | type F32 | T+ 74
|
|
[ 79/291] Writing tensor blk.10.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 78
|
|
[ 80/291] Writing tensor blk.10.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 82
|
|
[ 81/291] Writing tensor blk.10.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 82
|
|
[ 82/291] Writing tensor blk.10.ffn_norm.weight | size 4096 | type F32 | T+ 82
|
|
[ 83/291] Writing tensor blk.10.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 82
|
|
[ 84/291] Writing tensor blk.10.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 82
|
|
[ 85/291] Writing tensor blk.10.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 82
|
|
[ 86/291] Writing tensor blk.10.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 82
|
|
[ 87/291] Writing tensor blk.11.attn_norm.weight | size 4096 | type F32 | T+ 82
|
|
[ 88/291] Writing tensor blk.11.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 90
|
|
[ 89/291] Writing tensor blk.11.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 90
|
|
[ 90/291] Writing tensor blk.11.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 90
|
|
[ 91/291] Writing tensor blk.11.ffn_norm.weight | size 4096 | type F32 | T+ 90
|
|
[ 92/291] Writing tensor blk.11.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 90
|
|
[ 93/291] Writing tensor blk.11.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 90
|
|
[ 94/291] Writing tensor blk.11.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 91
|
|
[ 95/291] Writing tensor blk.11.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 91
|
|
[ 96/291] Writing tensor blk.12.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 99
|
|
[ 97/291] Writing tensor blk.12.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 99
|
|
[ 98/291] Writing tensor blk.12.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 99
|
|
[ 99/291] Writing tensor blk.12.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 99
|
|
[100/291] Writing tensor blk.12.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 99
|
|
[101/291] Writing tensor blk.12.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 100
|
|
[102/291] Writing tensor blk.8.attn_norm.weight | size 4096 | type F32 | T+ 100
|
|
[103/291] Writing tensor blk.8.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 100
|
|
[104/291] Writing tensor blk.8.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 104
|
|
[105/291] Writing tensor blk.8.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 104
|
|
[106/291] Writing tensor blk.8.ffn_norm.weight | size 4096 | type F32 | T+ 104
|
|
[107/291] Writing tensor blk.9.attn_norm.weight | size 4096 | type F32 | T+ 104
|
|
[108/291] Writing tensor blk.9.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 105
|
|
[109/291] Writing tensor blk.9.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 108
|
|
[110/291] Writing tensor blk.9.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 108
|
|
[111/291] Writing tensor blk.9.ffn_norm.weight | size 4096 | type F32 | T+ 108
|
|
[112/291] Writing tensor blk.9.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 112
|
|
[113/291] Writing tensor blk.9.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 112
|
|
[114/291] Writing tensor blk.9.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 112
|
|
[115/291] Writing tensor blk.9.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 112
|
|
[116/291] Writing tensor blk.12.attn_norm.weight | size 4096 | type F32 | T+ 112
|
|
[117/291] Writing tensor blk.12.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 112
|
|
[118/291] Writing tensor blk.12.ffn_norm.weight | size 4096 | type F32 | T+ 112
|
|
[119/291] Writing tensor blk.13.attn_norm.weight | size 4096 | type F32 | T+ 112
|
|
[120/291] Writing tensor blk.13.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 117
|
|
[121/291] Writing tensor blk.13.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 120
|
|
[122/291] Writing tensor blk.13.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 120
|
|
[123/291] Writing tensor blk.13.ffn_norm.weight | size 4096 | type F32 | T+ 120
|
|
[124/291] Writing tensor blk.13.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 120
|
|
[125/291] Writing tensor blk.13.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 120
|
|
[126/291] Writing tensor blk.13.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 120
|
|
[127/291] Writing tensor blk.13.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 120
|
|
[128/291] Writing tensor blk.14.attn_norm.weight | size 4096 | type F32 | T+ 120
|
|
[129/291] Writing tensor blk.14.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 124
|
|
[130/291] Writing tensor blk.14.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 128
|
|
[131/291] Writing tensor blk.14.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 129
|
|
[132/291] Writing tensor blk.14.ffn_norm.weight | size 4096 | type F32 | T+ 129
|
|
[133/291] Writing tensor blk.14.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 129
|
|
[134/291] Writing tensor blk.14.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 129
|
|
[135/291] Writing tensor blk.14.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 129
|
|
[136/291] Writing tensor blk.14.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 129
|
|
[137/291] Writing tensor blk.15.attn_norm.weight | size 4096 | type F32 | T+ 129
|
|
[138/291] Writing tensor blk.15.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 132
|
|
[139/291] Writing tensor blk.15.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 136
|
|
[140/291] Writing tensor blk.15.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 137
|
|
[141/291] Writing tensor blk.15.ffn_norm.weight | size 4096 | type F32 | T+ 137
|
|
[142/291] Writing tensor blk.15.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 137
|
|
[143/291] Writing tensor blk.15.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 137
|
|
[144/291] Writing tensor blk.15.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 137
|
|
[145/291] Writing tensor blk.15.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 137
|
|
[146/291] Writing tensor blk.16.attn_norm.weight | size 4096 | type F32 | T+ 137
|
|
[147/291] Writing tensor blk.16.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 140
|
|
[148/291] Writing tensor blk.16.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 142
|
|
[149/291] Writing tensor blk.16.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 142
|
|
[150/291] Writing tensor blk.16.ffn_norm.weight | size 4096 | type F32 | T+ 143
|
|
[151/291] Writing tensor blk.16.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 143
|
|
[152/291] Writing tensor blk.16.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
|
[153/291] Writing tensor blk.16.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
|
[154/291] Writing tensor blk.16.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 146
|
|
[155/291] Writing tensor blk.17.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 146
|
|
[156/291] Writing tensor blk.17.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146
|
|
[157/291] Writing tensor blk.17.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 152
|
|
[158/291] Writing tensor blk.17.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 152
|
|
[159/291] Writing tensor blk.17.attn_norm.weight | size 4096 | type F32 | T+ 152
|
|
[160/291] Writing tensor blk.17.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 152
|
|
[161/291] Writing tensor blk.17.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 152
|
|
[162/291] Writing tensor blk.17.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 152
|
|
[163/291] Writing tensor blk.17.ffn_norm.weight | size 4096 | type F32 | T+ 152
|
|
[164/291] Writing tensor blk.18.attn_norm.weight | size 4096 | type F32 | T+ 152
|
|
[165/291] Writing tensor blk.18.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 156
|
|
[166/291] Writing tensor blk.18.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 160
|
|
[167/291] Writing tensor blk.18.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 160
|
|
[168/291] Writing tensor blk.18.ffn_norm.weight | size 4096 | type F32 | T+ 160
|
|
[169/291] Writing tensor blk.18.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 160
|
|
[170/291] Writing tensor blk.18.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 160
|
|
[171/291] Writing tensor blk.18.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 160
|
|
[172/291] Writing tensor blk.18.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 160
|
|
[173/291] Writing tensor blk.19.attn_norm.weight | size 4096 | type F32 | T+ 160
|
|
[174/291] Writing tensor blk.19.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 163
|
|
[175/291] Writing tensor blk.19.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 167
|
|
[176/291] Writing tensor blk.19.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 168
|
|
[177/291] Writing tensor blk.19.ffn_norm.weight | size 4096 | type F32 | T+ 168
|
|
[178/291] Writing tensor blk.19.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 168
|
|
[179/291] Writing tensor blk.19.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 168
|
|
[180/291] Writing tensor blk.19.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 168
|
|
[181/291] Writing tensor blk.19.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 168
|
|
[182/291] Writing tensor blk.20.attn_norm.weight | size 4096 | type F32 | T+ 168
|
|
[183/291] Writing tensor blk.20.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 175
|
|
[184/291] Writing tensor blk.20.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 175
|
|
[185/291] Writing tensor blk.20.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 175
|
|
[186/291] Writing tensor blk.20.ffn_norm.weight | size 4096 | type F32 | T+ 176
|
|
[187/291] Writing tensor blk.20.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 176
|
|
[188/291] Writing tensor blk.20.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 176
|
|
[189/291] Writing tensor blk.20.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 176
|
|
[190/291] Writing tensor blk.20.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 176
|
|
[191/291] Writing tensor blk.21.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 179
|
|
[192/291] Writing tensor blk.21.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 179
|
|
[193/291] Writing tensor blk.21.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 183
|
|
[194/291] Writing tensor blk.21.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 184
|
|
[195/291] Writing tensor blk.21.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 184
|
|
[196/291] Writing tensor blk.21.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 184
|
|
[197/291] Writing tensor blk.21.attn_norm.weight | size 4096 | type F32 | T+ 184
|
|
[198/291] Writing tensor blk.21.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 184
|
|
[199/291] Writing tensor blk.21.ffn_norm.weight | size 4096 | type F32 | T+ 184
|
|
[200/291] Writing tensor blk.22.attn_norm.weight | size 4096 | type F32 | T+ 184
|
|
[201/291] Writing tensor blk.22.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 188
|
|
[202/291] Writing tensor blk.22.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 192
|
|
[203/291] Writing tensor blk.22.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 192
|
|
[204/291] Writing tensor blk.22.ffn_norm.weight | size 4096 | type F32 | T+ 192
|
|
[205/291] Writing tensor blk.22.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 192
|
|
[206/291] Writing tensor blk.22.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 192
|
|
[207/291] Writing tensor blk.22.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 192
|
|
[208/291] Writing tensor blk.22.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 192
|
|
[209/291] Writing tensor blk.23.attn_norm.weight | size 4096 | type F32 | T+ 192
|
|
[210/291] Writing tensor blk.23.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 196
|
|
[211/291] Writing tensor blk.23.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 200
|
|
[212/291] Writing tensor blk.23.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 200
|
|
[213/291] Writing tensor blk.23.ffn_norm.weight | size 4096 | type F32 | T+ 200
|
|
[214/291] Writing tensor blk.23.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 200
|
|
[215/291] Writing tensor blk.23.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 200
|
|
[216/291] Writing tensor blk.23.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 200
|
|
[217/291] Writing tensor blk.23.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 200
|
|
[218/291] Writing tensor blk.24.attn_norm.weight | size 4096 | type F32 | T+ 200
|
|
[219/291] Writing tensor blk.24.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 204
|
|
[220/291] Writing tensor blk.24.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 208
|
|
[221/291] Writing tensor blk.24.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 208
|
|
[222/291] Writing tensor blk.24.ffn_norm.weight | size 4096 | type F32 | T+ 208
|
|
[223/291] Writing tensor blk.24.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 208
|
|
[224/291] Writing tensor blk.24.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 208
|
|
[225/291] Writing tensor blk.24.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 208
|
|
[226/291] Writing tensor blk.24.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 208
|
|
[227/291] Writing tensor blk.25.attn_norm.weight | size 4096 | type F32 | T+ 208
|
|
[228/291] Writing tensor blk.25.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 212
|
|
[229/291] Writing tensor blk.25.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 213
|
|
[230/291] Writing tensor blk.25.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 214
|
|
[231/291] Writing tensor blk.25.ffn_norm.weight | size 4096 | type F32 | T+ 214
|
|
[232/291] Writing tensor blk.25.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 214
|
|
[233/291] Writing tensor blk.25.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
|
[234/291] Writing tensor blk.25.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
|
[235/291] Writing tensor blk.25.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 217
|
|
[236/291] Writing tensor blk.26.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 217
|
|
[237/291] Writing tensor blk.26.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217
|
|
[238/291] Writing tensor blk.26.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 223
|
|
[239/291] Writing tensor blk.26.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 223
|
|
[240/291] Writing tensor blk.26.attn_norm.weight | size 4096 | type F32 | T+ 223
|
|
[241/291] Writing tensor blk.26.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 223
|
|
[242/291] Writing tensor blk.26.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 223
|
|
[243/291] Writing tensor blk.26.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 223
|
|
[244/291] Writing tensor blk.26.ffn_norm.weight | size 4096 | type F32 | T+ 223
|
|
[245/291] Writing tensor blk.27.attn_norm.weight | size 4096 | type F32 | T+ 223
|
|
[246/291] Writing tensor blk.27.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 227
|
|
[247/291] Writing tensor blk.27.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 231
|
|
[248/291] Writing tensor blk.27.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 231
|
|
[249/291] Writing tensor blk.27.ffn_norm.weight | size 4096 | type F32 | T+ 231
|
|
[250/291] Writing tensor blk.27.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 231
|
|
[251/291] Writing tensor blk.27.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 231
|
|
[252/291] Writing tensor blk.27.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 231
|
|
[253/291] Writing tensor blk.27.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 231
|
|
[254/291] Writing tensor blk.28.attn_norm.weight | size 4096 | type F32 | T+ 231
|
|
[255/291] Writing tensor blk.28.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 235
|
|
[256/291] Writing tensor blk.28.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 240
|
|
[257/291] Writing tensor blk.28.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 240
|
|
[258/291] Writing tensor blk.28.ffn_norm.weight | size 4096 | type F32 | T+ 240
|
|
[259/291] Writing tensor blk.28.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 240
|
|
[260/291] Writing tensor blk.28.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 240
|
|
[261/291] Writing tensor blk.28.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 240
|
|
[262/291] Writing tensor blk.28.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 240
|
|
[263/291] Writing tensor blk.29.attn_norm.weight | size 4096 | type F32 | T+ 240
|
|
[264/291] Writing tensor blk.29.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 247
|
|
[265/291] Writing tensor blk.29.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 248
|
|
[266/291] Writing tensor blk.29.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 248
|
|
[267/291] Writing tensor blk.29.ffn_norm.weight | size 4096 | type F32 | T+ 248
|
|
[268/291] Writing tensor blk.29.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 248
|
|
[269/291] Writing tensor blk.29.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 248
|
|
[270/291] Writing tensor blk.29.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
|
[271/291] Writing tensor blk.29.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
|
[272/291] Writing tensor blk.30.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 259
|
|
[273/291] Writing tensor blk.30.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 259
|
|
[274/291] Writing tensor blk.30.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
|
[275/291] Writing tensor blk.30.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
|
[276/291] Writing tensor blk.30.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259
|
|
[277/291] Writing tensor blk.30.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259
|
|
[278/291] Writing tensor output.weight | size 32000 x 4096 | type Q8_0 | T+ 266
|
|
[279/291] Writing tensor blk.30.attn_norm.weight | size 4096 | type F32 | T+ 266
|
|
[280/291] Writing tensor blk.30.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266
|
|
[281/291] Writing tensor blk.30.ffn_norm.weight | size 4096 | type F32 | T+ 266
|
|
[282/291] Writing tensor blk.31.attn_norm.weight | size 4096 | type F32 | T+ 266
|
|
[283/291] Writing tensor blk.31.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266
|
|
[284/291] Writing tensor blk.31.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 266
|
|
[285/291] Writing tensor blk.31.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 266
|
|
[286/291] Writing tensor blk.31.ffn_norm.weight | size 4096 | type F32 | T+ 267
|
|
[287/291] Writing tensor blk.31.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 267
|
|
[288/291] Writing tensor blk.31.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 267
|
|
[289/291] Writing tensor blk.31.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 267
|
|
[290/291] Writing tensor blk.31.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 267
|
|
[291/291] Writing tensor output_norm.weight | size 4096 | type F32 | T+ 267
|
|
Wrote zephyr_int8.gguf |