(dream) tb@IBM-PF38WZKF:~/funstreams/AI$ python llama.cpp/convert.py zephyr-7b-beta --outfile zephyr_int8.gguf --outtype q8_0 Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors Loading model file zephyr-7b-beta/model-00001-of-00008.safetensors Loading model file zephyr-7b-beta/model-00002-of-00008.safetensors Loading model file zephyr-7b-beta/model-00003-of-00008.safetensors Loading model file zephyr-7b-beta/model-00004-of-00008.safetensors Loading model file zephyr-7b-beta/model-00005-of-00008.safetensors Loading model file zephyr-7b-beta/model-00006-of-00008.safetensors Loading model file zephyr-7b-beta/model-00007-of-00008.safetensors Loading model file zephyr-7b-beta/model-00008-of-00008.safetensors params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=32768, n_ff=14336, n_head=32, n_head_kv=8, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=, path_model=PosixPath('zephyr-7b-beta')) 32000 32000 Vocab info: Special vocab info: Permuting layer 0 Permuting layer 1 Permuting layer 2 Permuting layer 3 Permuting layer 4 Permuting layer 5 Permuting layer 6 Permuting layer 7 Permuting layer 8 Permuting layer 9 Permuting layer 10 Permuting layer 11 Permuting layer 12 Permuting layer 13 Permuting layer 14 Permuting layer 15 Permuting layer 16 Permuting layer 17 Permuting layer 18 Permuting layer 19 Permuting layer 20 Permuting layer 21 Permuting layer 22 Permuting layer 23 Permuting layer 24 Permuting layer 25 Permuting layer 26 Permuting layer 27 Permuting layer 28 Permuting layer 29 Permuting layer 30 Permuting layer 31 model.embed_tokens.weight -> token_embd.weight | BF16 | [32000, 4096] model.layers.0.input_layernorm.weight -> blk.0.attn_norm.weight | BF16 | [4096] model.layers.0.mlp.down_proj.weight -> blk.0.ffn_down.weight | BF16 | [4096, 14336] model.layers.0.mlp.gate_proj.weight -> blk.0.ffn_gate.weight | BF16 | [14336, 4096] model.layers.0.mlp.up_proj.weight -> blk.0.ffn_up.weight | BF16 | [14336, 4096] model.layers.0.post_attention_layernorm.weight -> blk.0.ffn_norm.weight | BF16 | [4096] model.layers.0.self_attn.k_proj.weight -> blk.0.attn_k.weight | BF16 | [1024, 4096] model.layers.0.self_attn.o_proj.weight -> blk.0.attn_output.weight | BF16 | [4096, 4096] model.layers.0.self_attn.q_proj.weight -> blk.0.attn_q.weight | BF16 | [4096, 4096] model.layers.0.self_attn.v_proj.weight -> blk.0.attn_v.weight | BF16 | [1024, 4096] model.layers.1.input_layernorm.weight -> blk.1.attn_norm.weight | BF16 | [4096] model.layers.1.mlp.down_proj.weight -> blk.1.ffn_down.weight | BF16 | [4096, 14336] model.layers.1.mlp.gate_proj.weight -> blk.1.ffn_gate.weight | BF16 | [14336, 4096] model.layers.1.mlp.up_proj.weight -> blk.1.ffn_up.weight | BF16 | [14336, 4096] model.layers.1.post_attention_layernorm.weight -> blk.1.ffn_norm.weight | BF16 | [4096] model.layers.1.self_attn.k_proj.weight -> blk.1.attn_k.weight | BF16 | [1024, 4096] model.layers.1.self_attn.o_proj.weight -> blk.1.attn_output.weight | BF16 | [4096, 4096] model.layers.1.self_attn.q_proj.weight -> blk.1.attn_q.weight | BF16 | [4096, 4096] model.layers.1.self_attn.v_proj.weight -> blk.1.attn_v.weight | BF16 | [1024, 4096] model.layers.2.input_layernorm.weight -> blk.2.attn_norm.weight | BF16 | [4096] model.layers.2.mlp.down_proj.weight -> blk.2.ffn_down.weight | BF16 | [4096, 14336] model.layers.2.mlp.gate_proj.weight -> blk.2.ffn_gate.weight | BF16 | [14336, 4096] model.layers.2.mlp.up_proj.weight -> blk.2.ffn_up.weight | BF16 | [14336, 4096] model.layers.2.post_attention_layernorm.weight -> blk.2.ffn_norm.weight | BF16 | [4096] model.layers.2.self_attn.k_proj.weight -> blk.2.attn_k.weight | BF16 | [1024, 4096] model.layers.2.self_attn.o_proj.weight -> blk.2.attn_output.weight | BF16 | [4096, 4096] model.layers.2.self_attn.q_proj.weight -> blk.2.attn_q.weight | BF16 | [4096, 4096] model.layers.2.self_attn.v_proj.weight -> blk.2.attn_v.weight | BF16 | [1024, 4096] model.layers.3.mlp.gate_proj.weight -> blk.3.ffn_gate.weight | BF16 | [14336, 4096] model.layers.3.mlp.up_proj.weight -> blk.3.ffn_up.weight | BF16 | [14336, 4096] model.layers.3.self_attn.k_proj.weight -> blk.3.attn_k.weight | BF16 | [1024, 4096] model.layers.3.self_attn.o_proj.weight -> blk.3.attn_output.weight | BF16 | [4096, 4096] model.layers.3.self_attn.q_proj.weight -> blk.3.attn_q.weight | BF16 | [4096, 4096] model.layers.3.self_attn.v_proj.weight -> blk.3.attn_v.weight | BF16 | [1024, 4096] model.layers.3.input_layernorm.weight -> blk.3.attn_norm.weight | BF16 | [4096] model.layers.3.mlp.down_proj.weight -> blk.3.ffn_down.weight | BF16 | [4096, 14336] model.layers.3.post_attention_layernorm.weight -> blk.3.ffn_norm.weight | BF16 | [4096] model.layers.4.input_layernorm.weight -> blk.4.attn_norm.weight | BF16 | [4096] model.layers.4.mlp.down_proj.weight -> blk.4.ffn_down.weight | BF16 | [4096, 14336] model.layers.4.mlp.gate_proj.weight -> blk.4.ffn_gate.weight | BF16 | [14336, 4096] model.layers.4.mlp.up_proj.weight -> blk.4.ffn_up.weight | BF16 | [14336, 4096] model.layers.4.post_attention_layernorm.weight -> blk.4.ffn_norm.weight | BF16 | [4096] model.layers.4.self_attn.k_proj.weight -> blk.4.attn_k.weight | BF16 | [1024, 4096] model.layers.4.self_attn.o_proj.weight -> blk.4.attn_output.weight | BF16 | [4096, 4096] model.layers.4.self_attn.q_proj.weight -> blk.4.attn_q.weight | BF16 | [4096, 4096] model.layers.4.self_attn.v_proj.weight -> blk.4.attn_v.weight | BF16 | [1024, 4096] model.layers.5.input_layernorm.weight -> blk.5.attn_norm.weight | BF16 | [4096] model.layers.5.mlp.down_proj.weight -> blk.5.ffn_down.weight | BF16 | [4096, 14336] model.layers.5.mlp.gate_proj.weight -> blk.5.ffn_gate.weight | BF16 | [14336, 4096] model.layers.5.mlp.up_proj.weight -> blk.5.ffn_up.weight | BF16 | [14336, 4096] model.layers.5.post_attention_layernorm.weight -> blk.5.ffn_norm.weight | BF16 | [4096] model.layers.5.self_attn.k_proj.weight -> blk.5.attn_k.weight | BF16 | [1024, 4096] model.layers.5.self_attn.o_proj.weight -> blk.5.attn_output.weight | BF16 | [4096, 4096] model.layers.5.self_attn.q_proj.weight -> blk.5.attn_q.weight | BF16 | [4096, 4096] model.layers.5.self_attn.v_proj.weight -> blk.5.attn_v.weight | BF16 | [1024, 4096] model.layers.6.input_layernorm.weight -> blk.6.attn_norm.weight | BF16 | [4096] model.layers.6.mlp.down_proj.weight -> blk.6.ffn_down.weight | BF16 | [4096, 14336] model.layers.6.mlp.gate_proj.weight -> blk.6.ffn_gate.weight | BF16 | [14336, 4096] model.layers.6.mlp.up_proj.weight -> blk.6.ffn_up.weight | BF16 | [14336, 4096] model.layers.6.post_attention_layernorm.weight -> blk.6.ffn_norm.weight | BF16 | [4096] model.layers.6.self_attn.k_proj.weight -> blk.6.attn_k.weight | BF16 | [1024, 4096] model.layers.6.self_attn.o_proj.weight -> blk.6.attn_output.weight | BF16 | [4096, 4096] model.layers.6.self_attn.q_proj.weight -> blk.6.attn_q.weight | BF16 | [4096, 4096] model.layers.6.self_attn.v_proj.weight -> blk.6.attn_v.weight | BF16 | [1024, 4096] model.layers.7.input_layernorm.weight -> blk.7.attn_norm.weight | BF16 | [4096] model.layers.7.mlp.down_proj.weight -> blk.7.ffn_down.weight | BF16 | [4096, 14336] model.layers.7.mlp.gate_proj.weight -> blk.7.ffn_gate.weight | BF16 | [14336, 4096] model.layers.7.mlp.up_proj.weight -> blk.7.ffn_up.weight | BF16 | [14336, 4096] model.layers.7.post_attention_layernorm.weight -> blk.7.ffn_norm.weight | BF16 | [4096] model.layers.7.self_attn.k_proj.weight -> blk.7.attn_k.weight | BF16 | [1024, 4096] model.layers.7.self_attn.o_proj.weight -> blk.7.attn_output.weight | BF16 | [4096, 4096] model.layers.7.self_attn.q_proj.weight -> blk.7.attn_q.weight | BF16 | [4096, 4096] model.layers.7.self_attn.v_proj.weight -> blk.7.attn_v.weight | BF16 | [1024, 4096] model.layers.8.self_attn.k_proj.weight -> blk.8.attn_k.weight | BF16 | [1024, 4096] model.layers.8.self_attn.o_proj.weight -> blk.8.attn_output.weight | BF16 | [4096, 4096] model.layers.8.self_attn.q_proj.weight -> blk.8.attn_q.weight | BF16 | [4096, 4096] model.layers.8.self_attn.v_proj.weight -> blk.8.attn_v.weight | BF16 | [1024, 4096] model.layers.10.input_layernorm.weight -> blk.10.attn_norm.weight | BF16 | [4096] model.layers.10.mlp.down_proj.weight -> blk.10.ffn_down.weight | BF16 | [4096, 14336] model.layers.10.mlp.gate_proj.weight -> blk.10.ffn_gate.weight | BF16 | [14336, 4096] model.layers.10.mlp.up_proj.weight -> blk.10.ffn_up.weight | BF16 | [14336, 4096] model.layers.10.post_attention_layernorm.weight -> blk.10.ffn_norm.weight | BF16 | [4096] model.layers.10.self_attn.k_proj.weight -> blk.10.attn_k.weight | BF16 | [1024, 4096] model.layers.10.self_attn.o_proj.weight -> blk.10.attn_output.weight | BF16 | [4096, 4096] model.layers.10.self_attn.q_proj.weight -> blk.10.attn_q.weight | BF16 | [4096, 4096] model.layers.10.self_attn.v_proj.weight -> blk.10.attn_v.weight | BF16 | [1024, 4096] model.layers.11.input_layernorm.weight -> blk.11.attn_norm.weight | BF16 | [4096] model.layers.11.mlp.down_proj.weight -> blk.11.ffn_down.weight | BF16 | [4096, 14336] model.layers.11.mlp.gate_proj.weight -> blk.11.ffn_gate.weight | BF16 | [14336, 4096] model.layers.11.mlp.up_proj.weight -> blk.11.ffn_up.weight | BF16 | [14336, 4096] model.layers.11.post_attention_layernorm.weight -> blk.11.ffn_norm.weight | BF16 | [4096] model.layers.11.self_attn.k_proj.weight -> blk.11.attn_k.weight | BF16 | [1024, 4096] model.layers.11.self_attn.o_proj.weight -> blk.11.attn_output.weight | BF16 | [4096, 4096] model.layers.11.self_attn.q_proj.weight -> blk.11.attn_q.weight | BF16 | [4096, 4096] model.layers.11.self_attn.v_proj.weight -> blk.11.attn_v.weight | BF16 | [1024, 4096] model.layers.12.mlp.gate_proj.weight -> blk.12.ffn_gate.weight | BF16 | [14336, 4096] model.layers.12.mlp.up_proj.weight -> blk.12.ffn_up.weight | BF16 | [14336, 4096] model.layers.12.self_attn.k_proj.weight -> blk.12.attn_k.weight | BF16 | [1024, 4096] model.layers.12.self_attn.o_proj.weight -> blk.12.attn_output.weight | BF16 | [4096, 4096] model.layers.12.self_attn.q_proj.weight -> blk.12.attn_q.weight | BF16 | [4096, 4096] model.layers.12.self_attn.v_proj.weight -> blk.12.attn_v.weight | BF16 | [1024, 4096] model.layers.8.input_layernorm.weight -> blk.8.attn_norm.weight | BF16 | [4096] model.layers.8.mlp.down_proj.weight -> blk.8.ffn_down.weight | BF16 | [4096, 14336] model.layers.8.mlp.gate_proj.weight -> blk.8.ffn_gate.weight | BF16 | [14336, 4096] model.layers.8.mlp.up_proj.weight -> blk.8.ffn_up.weight | BF16 | [14336, 4096] model.layers.8.post_attention_layernorm.weight -> blk.8.ffn_norm.weight | BF16 | [4096] model.layers.9.input_layernorm.weight -> blk.9.attn_norm.weight | BF16 | [4096] model.layers.9.mlp.down_proj.weight -> blk.9.ffn_down.weight | BF16 | [4096, 14336] model.layers.9.mlp.gate_proj.weight -> blk.9.ffn_gate.weight | BF16 | [14336, 4096] model.layers.9.mlp.up_proj.weight -> blk.9.ffn_up.weight | BF16 | [14336, 4096] model.layers.9.post_attention_layernorm.weight -> blk.9.ffn_norm.weight | BF16 | [4096] model.layers.9.self_attn.k_proj.weight -> blk.9.attn_k.weight | BF16 | [1024, 4096] model.layers.9.self_attn.o_proj.weight -> blk.9.attn_output.weight | BF16 | [4096, 4096] model.layers.9.self_attn.q_proj.weight -> blk.9.attn_q.weight | BF16 | [4096, 4096] model.layers.9.self_attn.v_proj.weight -> blk.9.attn_v.weight | BF16 | [1024, 4096] model.layers.12.input_layernorm.weight -> blk.12.attn_norm.weight | BF16 | [4096] model.layers.12.mlp.down_proj.weight -> blk.12.ffn_down.weight | BF16 | [4096, 14336] model.layers.12.post_attention_layernorm.weight -> blk.12.ffn_norm.weight | BF16 | [4096] model.layers.13.input_layernorm.weight -> blk.13.attn_norm.weight | BF16 | [4096] model.layers.13.mlp.down_proj.weight -> blk.13.ffn_down.weight | BF16 | [4096, 14336] model.layers.13.mlp.gate_proj.weight -> blk.13.ffn_gate.weight | BF16 | [14336, 4096] model.layers.13.mlp.up_proj.weight -> blk.13.ffn_up.weight | BF16 | [14336, 4096] model.layers.13.post_attention_layernorm.weight -> blk.13.ffn_norm.weight | BF16 | [4096] model.layers.13.self_attn.k_proj.weight -> blk.13.attn_k.weight | BF16 | [1024, 4096] model.layers.13.self_attn.o_proj.weight -> blk.13.attn_output.weight | BF16 | [4096, 4096] model.layers.13.self_attn.q_proj.weight -> blk.13.attn_q.weight | BF16 | [4096, 4096] model.layers.13.self_attn.v_proj.weight -> blk.13.attn_v.weight | BF16 | [1024, 4096] model.layers.14.input_layernorm.weight -> blk.14.attn_norm.weight | BF16 | [4096] model.layers.14.mlp.down_proj.weight -> blk.14.ffn_down.weight | BF16 | [4096, 14336] model.layers.14.mlp.gate_proj.weight -> blk.14.ffn_gate.weight | BF16 | [14336, 4096] model.layers.14.mlp.up_proj.weight -> blk.14.ffn_up.weight | BF16 | [14336, 4096] model.layers.14.post_attention_layernorm.weight -> blk.14.ffn_norm.weight | BF16 | [4096] model.layers.14.self_attn.k_proj.weight -> blk.14.attn_k.weight | BF16 | [1024, 4096] model.layers.14.self_attn.o_proj.weight -> blk.14.attn_output.weight | BF16 | [4096, 4096] model.layers.14.self_attn.q_proj.weight -> blk.14.attn_q.weight | BF16 | [4096, 4096] model.layers.14.self_attn.v_proj.weight -> blk.14.attn_v.weight | BF16 | [1024, 4096] model.layers.15.input_layernorm.weight -> blk.15.attn_norm.weight | BF16 | [4096] model.layers.15.mlp.down_proj.weight -> blk.15.ffn_down.weight | BF16 | [4096, 14336] model.layers.15.mlp.gate_proj.weight -> blk.15.ffn_gate.weight | BF16 | [14336, 4096] model.layers.15.mlp.up_proj.weight -> blk.15.ffn_up.weight | BF16 | [14336, 4096] model.layers.15.post_attention_layernorm.weight -> blk.15.ffn_norm.weight | BF16 | [4096] model.layers.15.self_attn.k_proj.weight -> blk.15.attn_k.weight | BF16 | [1024, 4096] model.layers.15.self_attn.o_proj.weight -> blk.15.attn_output.weight | BF16 | [4096, 4096] model.layers.15.self_attn.q_proj.weight -> blk.15.attn_q.weight | BF16 | [4096, 4096] model.layers.15.self_attn.v_proj.weight -> blk.15.attn_v.weight | BF16 | [1024, 4096] model.layers.16.input_layernorm.weight -> blk.16.attn_norm.weight | BF16 | [4096] model.layers.16.mlp.down_proj.weight -> blk.16.ffn_down.weight | BF16 | [4096, 14336] model.layers.16.mlp.gate_proj.weight -> blk.16.ffn_gate.weight | BF16 | [14336, 4096] model.layers.16.mlp.up_proj.weight -> blk.16.ffn_up.weight | BF16 | [14336, 4096] model.layers.16.post_attention_layernorm.weight -> blk.16.ffn_norm.weight | BF16 | [4096] model.layers.16.self_attn.k_proj.weight -> blk.16.attn_k.weight | BF16 | [1024, 4096] model.layers.16.self_attn.o_proj.weight -> blk.16.attn_output.weight | BF16 | [4096, 4096] model.layers.16.self_attn.q_proj.weight -> blk.16.attn_q.weight | BF16 | [4096, 4096] model.layers.16.self_attn.v_proj.weight -> blk.16.attn_v.weight | BF16 | [1024, 4096] model.layers.17.self_attn.k_proj.weight -> blk.17.attn_k.weight | BF16 | [1024, 4096] model.layers.17.self_attn.o_proj.weight -> blk.17.attn_output.weight | BF16 | [4096, 4096] model.layers.17.self_attn.q_proj.weight -> blk.17.attn_q.weight | BF16 | [4096, 4096] model.layers.17.self_attn.v_proj.weight -> blk.17.attn_v.weight | BF16 | [1024, 4096] model.layers.17.input_layernorm.weight -> blk.17.attn_norm.weight | BF16 | [4096] model.layers.17.mlp.down_proj.weight -> blk.17.ffn_down.weight | BF16 | [4096, 14336] model.layers.17.mlp.gate_proj.weight -> blk.17.ffn_gate.weight | BF16 | [14336, 4096] model.layers.17.mlp.up_proj.weight -> blk.17.ffn_up.weight | BF16 | [14336, 4096] model.layers.17.post_attention_layernorm.weight -> blk.17.ffn_norm.weight | BF16 | [4096] model.layers.18.input_layernorm.weight -> blk.18.attn_norm.weight | BF16 | [4096] model.layers.18.mlp.down_proj.weight -> blk.18.ffn_down.weight | BF16 | [4096, 14336] model.layers.18.mlp.gate_proj.weight -> blk.18.ffn_gate.weight | BF16 | [14336, 4096] model.layers.18.mlp.up_proj.weight -> blk.18.ffn_up.weight | BF16 | [14336, 4096] model.layers.18.post_attention_layernorm.weight -> blk.18.ffn_norm.weight | BF16 | [4096] model.layers.18.self_attn.k_proj.weight -> blk.18.attn_k.weight | BF16 | [1024, 4096] model.layers.18.self_attn.o_proj.weight -> blk.18.attn_output.weight | BF16 | [4096, 4096] model.layers.18.self_attn.q_proj.weight -> blk.18.attn_q.weight | BF16 | [4096, 4096] model.layers.18.self_attn.v_proj.weight -> blk.18.attn_v.weight | BF16 | [1024, 4096] model.layers.19.input_layernorm.weight -> blk.19.attn_norm.weight | BF16 | [4096] model.layers.19.mlp.down_proj.weight -> blk.19.ffn_down.weight | BF16 | [4096, 14336] model.layers.19.mlp.gate_proj.weight -> blk.19.ffn_gate.weight | BF16 | [14336, 4096] model.layers.19.mlp.up_proj.weight -> blk.19.ffn_up.weight | BF16 | [14336, 4096] model.layers.19.post_attention_layernorm.weight -> blk.19.ffn_norm.weight | BF16 | [4096] model.layers.19.self_attn.k_proj.weight -> blk.19.attn_k.weight | BF16 | [1024, 4096] model.layers.19.self_attn.o_proj.weight -> blk.19.attn_output.weight | BF16 | [4096, 4096] model.layers.19.self_attn.q_proj.weight -> blk.19.attn_q.weight | BF16 | [4096, 4096] model.layers.19.self_attn.v_proj.weight -> blk.19.attn_v.weight | BF16 | [1024, 4096] model.layers.20.input_layernorm.weight -> blk.20.attn_norm.weight | BF16 | [4096] model.layers.20.mlp.down_proj.weight -> blk.20.ffn_down.weight | BF16 | [4096, 14336] model.layers.20.mlp.gate_proj.weight -> blk.20.ffn_gate.weight | BF16 | [14336, 4096] model.layers.20.mlp.up_proj.weight -> blk.20.ffn_up.weight | BF16 | [14336, 4096] model.layers.20.post_attention_layernorm.weight -> blk.20.ffn_norm.weight | BF16 | [4096] model.layers.20.self_attn.k_proj.weight -> blk.20.attn_k.weight | BF16 | [1024, 4096] model.layers.20.self_attn.o_proj.weight -> blk.20.attn_output.weight | BF16 | [4096, 4096] model.layers.20.self_attn.q_proj.weight -> blk.20.attn_q.weight | BF16 | [4096, 4096] model.layers.20.self_attn.v_proj.weight -> blk.20.attn_v.weight | BF16 | [1024, 4096] model.layers.21.mlp.gate_proj.weight -> blk.21.ffn_gate.weight | BF16 | [14336, 4096] model.layers.21.mlp.up_proj.weight -> blk.21.ffn_up.weight | BF16 | [14336, 4096] model.layers.21.self_attn.k_proj.weight -> blk.21.attn_k.weight | BF16 | [1024, 4096] model.layers.21.self_attn.o_proj.weight -> blk.21.attn_output.weight | BF16 | [4096, 4096] model.layers.21.self_attn.q_proj.weight -> blk.21.attn_q.weight | BF16 | [4096, 4096] model.layers.21.self_attn.v_proj.weight -> blk.21.attn_v.weight | BF16 | [1024, 4096] model.layers.21.input_layernorm.weight -> blk.21.attn_norm.weight | BF16 | [4096] model.layers.21.mlp.down_proj.weight -> blk.21.ffn_down.weight | BF16 | [4096, 14336] model.layers.21.post_attention_layernorm.weight -> blk.21.ffn_norm.weight | BF16 | [4096] model.layers.22.input_layernorm.weight -> blk.22.attn_norm.weight | BF16 | [4096] model.layers.22.mlp.down_proj.weight -> blk.22.ffn_down.weight | BF16 | [4096, 14336] model.layers.22.mlp.gate_proj.weight -> blk.22.ffn_gate.weight | BF16 | [14336, 4096] model.layers.22.mlp.up_proj.weight -> blk.22.ffn_up.weight | BF16 | [14336, 4096] model.layers.22.post_attention_layernorm.weight -> blk.22.ffn_norm.weight | BF16 | [4096] model.layers.22.self_attn.k_proj.weight -> blk.22.attn_k.weight | BF16 | [1024, 4096] model.layers.22.self_attn.o_proj.weight -> blk.22.attn_output.weight | BF16 | [4096, 4096] model.layers.22.self_attn.q_proj.weight -> blk.22.attn_q.weight | BF16 | [4096, 4096] model.layers.22.self_attn.v_proj.weight -> blk.22.attn_v.weight | BF16 | [1024, 4096] model.layers.23.input_layernorm.weight -> blk.23.attn_norm.weight | BF16 | [4096] model.layers.23.mlp.down_proj.weight -> blk.23.ffn_down.weight | BF16 | [4096, 14336] model.layers.23.mlp.gate_proj.weight -> blk.23.ffn_gate.weight | BF16 | [14336, 4096] model.layers.23.mlp.up_proj.weight -> blk.23.ffn_up.weight | BF16 | [14336, 4096] model.layers.23.post_attention_layernorm.weight -> blk.23.ffn_norm.weight | BF16 | [4096] model.layers.23.self_attn.k_proj.weight -> blk.23.attn_k.weight | BF16 | [1024, 4096] model.layers.23.self_attn.o_proj.weight -> blk.23.attn_output.weight | BF16 | [4096, 4096] model.layers.23.self_attn.q_proj.weight -> blk.23.attn_q.weight | BF16 | [4096, 4096] model.layers.23.self_attn.v_proj.weight -> blk.23.attn_v.weight | BF16 | [1024, 4096] model.layers.24.input_layernorm.weight -> blk.24.attn_norm.weight | BF16 | [4096] model.layers.24.mlp.down_proj.weight -> blk.24.ffn_down.weight | BF16 | [4096, 14336] model.layers.24.mlp.gate_proj.weight -> blk.24.ffn_gate.weight | BF16 | [14336, 4096] model.layers.24.mlp.up_proj.weight -> blk.24.ffn_up.weight | BF16 | [14336, 4096] model.layers.24.post_attention_layernorm.weight -> blk.24.ffn_norm.weight | BF16 | [4096] model.layers.24.self_attn.k_proj.weight -> blk.24.attn_k.weight | BF16 | [1024, 4096] model.layers.24.self_attn.o_proj.weight -> blk.24.attn_output.weight | BF16 | [4096, 4096] model.layers.24.self_attn.q_proj.weight -> blk.24.attn_q.weight | BF16 | [4096, 4096] model.layers.24.self_attn.v_proj.weight -> blk.24.attn_v.weight | BF16 | [1024, 4096] model.layers.25.input_layernorm.weight -> blk.25.attn_norm.weight | BF16 | [4096] model.layers.25.mlp.down_proj.weight -> blk.25.ffn_down.weight | BF16 | [4096, 14336] model.layers.25.mlp.gate_proj.weight -> blk.25.ffn_gate.weight | BF16 | [14336, 4096] model.layers.25.mlp.up_proj.weight -> blk.25.ffn_up.weight | BF16 | [14336, 4096] model.layers.25.post_attention_layernorm.weight -> blk.25.ffn_norm.weight | BF16 | [4096] model.layers.25.self_attn.k_proj.weight -> blk.25.attn_k.weight | BF16 | [1024, 4096] model.layers.25.self_attn.o_proj.weight -> blk.25.attn_output.weight | BF16 | [4096, 4096] model.layers.25.self_attn.q_proj.weight -> blk.25.attn_q.weight | BF16 | [4096, 4096] model.layers.25.self_attn.v_proj.weight -> blk.25.attn_v.weight | BF16 | [1024, 4096] model.layers.26.self_attn.k_proj.weight -> blk.26.attn_k.weight | BF16 | [1024, 4096] model.layers.26.self_attn.o_proj.weight -> blk.26.attn_output.weight | BF16 | [4096, 4096] model.layers.26.self_attn.q_proj.weight -> blk.26.attn_q.weight | BF16 | [4096, 4096] model.layers.26.self_attn.v_proj.weight -> blk.26.attn_v.weight | BF16 | [1024, 4096] model.layers.26.input_layernorm.weight -> blk.26.attn_norm.weight | BF16 | [4096] model.layers.26.mlp.down_proj.weight -> blk.26.ffn_down.weight | BF16 | [4096, 14336] model.layers.26.mlp.gate_proj.weight -> blk.26.ffn_gate.weight | BF16 | [14336, 4096] model.layers.26.mlp.up_proj.weight -> blk.26.ffn_up.weight | BF16 | [14336, 4096] model.layers.26.post_attention_layernorm.weight -> blk.26.ffn_norm.weight | BF16 | [4096] model.layers.27.input_layernorm.weight -> blk.27.attn_norm.weight | BF16 | [4096] model.layers.27.mlp.down_proj.weight -> blk.27.ffn_down.weight | BF16 | [4096, 14336] model.layers.27.mlp.gate_proj.weight -> blk.27.ffn_gate.weight | BF16 | [14336, 4096] model.layers.27.mlp.up_proj.weight -> blk.27.ffn_up.weight | BF16 | [14336, 4096] model.layers.27.post_attention_layernorm.weight -> blk.27.ffn_norm.weight | BF16 | [4096] model.layers.27.self_attn.k_proj.weight -> blk.27.attn_k.weight | BF16 | [1024, 4096] model.layers.27.self_attn.o_proj.weight -> blk.27.attn_output.weight | BF16 | [4096, 4096] model.layers.27.self_attn.q_proj.weight -> blk.27.attn_q.weight | BF16 | [4096, 4096] model.layers.27.self_attn.v_proj.weight -> blk.27.attn_v.weight | BF16 | [1024, 4096] model.layers.28.input_layernorm.weight -> blk.28.attn_norm.weight | BF16 | [4096] model.layers.28.mlp.down_proj.weight -> blk.28.ffn_down.weight | BF16 | [4096, 14336] model.layers.28.mlp.gate_proj.weight -> blk.28.ffn_gate.weight | BF16 | [14336, 4096] model.layers.28.mlp.up_proj.weight -> blk.28.ffn_up.weight | BF16 | [14336, 4096] model.layers.28.post_attention_layernorm.weight -> blk.28.ffn_norm.weight | BF16 | [4096] model.layers.28.self_attn.k_proj.weight -> blk.28.attn_k.weight | BF16 | [1024, 4096] model.layers.28.self_attn.o_proj.weight -> blk.28.attn_output.weight | BF16 | [4096, 4096] model.layers.28.self_attn.q_proj.weight -> blk.28.attn_q.weight | BF16 | [4096, 4096] model.layers.28.self_attn.v_proj.weight -> blk.28.attn_v.weight | BF16 | [1024, 4096] model.layers.29.input_layernorm.weight -> blk.29.attn_norm.weight | BF16 | [4096] model.layers.29.mlp.down_proj.weight -> blk.29.ffn_down.weight | BF16 | [4096, 14336] model.layers.29.mlp.gate_proj.weight -> blk.29.ffn_gate.weight | BF16 | [14336, 4096] model.layers.29.mlp.up_proj.weight -> blk.29.ffn_up.weight | BF16 | [14336, 4096] model.layers.29.post_attention_layernorm.weight -> blk.29.ffn_norm.weight | BF16 | [4096] model.layers.29.self_attn.k_proj.weight -> blk.29.attn_k.weight | BF16 | [1024, 4096] model.layers.29.self_attn.o_proj.weight -> blk.29.attn_output.weight | BF16 | [4096, 4096] model.layers.29.self_attn.q_proj.weight -> blk.29.attn_q.weight | BF16 | [4096, 4096] model.layers.29.self_attn.v_proj.weight -> blk.29.attn_v.weight | BF16 | [1024, 4096] model.layers.30.mlp.gate_proj.weight -> blk.30.ffn_gate.weight | BF16 | [14336, 4096] model.layers.30.mlp.up_proj.weight -> blk.30.ffn_up.weight | BF16 | [14336, 4096] model.layers.30.self_attn.k_proj.weight -> blk.30.attn_k.weight | BF16 | [1024, 4096] model.layers.30.self_attn.o_proj.weight -> blk.30.attn_output.weight | BF16 | [4096, 4096] model.layers.30.self_attn.q_proj.weight -> blk.30.attn_q.weight | BF16 | [4096, 4096] model.layers.30.self_attn.v_proj.weight -> blk.30.attn_v.weight | BF16 | [1024, 4096] lm_head.weight -> output.weight | BF16 | [32000, 4096] model.layers.30.input_layernorm.weight -> blk.30.attn_norm.weight | BF16 | [4096] model.layers.30.mlp.down_proj.weight -> blk.30.ffn_down.weight | BF16 | [4096, 14336] model.layers.30.post_attention_layernorm.weight -> blk.30.ffn_norm.weight | BF16 | [4096] model.layers.31.input_layernorm.weight -> blk.31.attn_norm.weight | BF16 | [4096] model.layers.31.mlp.down_proj.weight -> blk.31.ffn_down.weight | BF16 | [4096, 14336] model.layers.31.mlp.gate_proj.weight -> blk.31.ffn_gate.weight | BF16 | [14336, 4096] model.layers.31.mlp.up_proj.weight -> blk.31.ffn_up.weight | BF16 | [14336, 4096] model.layers.31.post_attention_layernorm.weight -> blk.31.ffn_norm.weight | BF16 | [4096] model.layers.31.self_attn.k_proj.weight -> blk.31.attn_k.weight | BF16 | [1024, 4096] model.layers.31.self_attn.o_proj.weight -> blk.31.attn_output.weight | BF16 | [4096, 4096] model.layers.31.self_attn.q_proj.weight -> blk.31.attn_q.weight | BF16 | [4096, 4096] model.layers.31.self_attn.v_proj.weight -> blk.31.attn_v.weight | BF16 | [1024, 4096] model.norm.weight -> output_norm.weight | BF16 | [4096] Writing zephyr_f32.gguf, format 7 gguf: This GGUF file is for Little Endian only gguf: Adding 58980 merge(s). gguf: Setting special token type bos to 1 gguf: Setting special token type eos to 2 gguf: Setting special token type unk to 0 gguf: Setting special token type pad to 2 gguf: Setting chat_template to {% for message in messages %} {% if message['role'] == 'user' %} {{ '<|user|> ' + message['content'] + eos_token }} {% elif message['role'] == 'system' %} {{ '<|system|> ' + message['content'] + eos_token }} {% elif message['role'] == 'assistant' %} {{ '<|assistant|> ' + message['content'] + eos_token }} {% endif %} {% if loop.last and add_generation_prompt %} {{ '<|assistant|>' }} {% endif %} {% endfor %} [ 1/291] Writing tensor token_embd.weight | size 32000 x 4096 | type Q8_0 | T+ 19 [ 2/291] Writing tensor blk.0.attn_norm.weight | size 4096 | type F32 | T+ 19 [ 3/291] Writing tensor blk.0.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 19 [ 4/291] Writing tensor blk.0.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 19 [ 5/291] Writing tensor blk.0.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 19 [ 6/291] Writing tensor blk.0.ffn_norm.weight | size 4096 | type F32 | T+ 19 [ 7/291] Writing tensor blk.0.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 19 [ 8/291] Writing tensor blk.0.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 19 [ 9/291] Writing tensor blk.0.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 21 [ 10/291] Writing tensor blk.0.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 22 [ 11/291] Writing tensor blk.1.attn_norm.weight | size 4096 | type F32 | T+ 22 [ 12/291] Writing tensor blk.1.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 23 [ 13/291] Writing tensor blk.1.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 27 [ 14/291] Writing tensor blk.1.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 27 [ 15/291] Writing tensor blk.1.ffn_norm.weight | size 4096 | type F32 | T+ 27 [ 16/291] Writing tensor blk.1.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 27 [ 17/291] Writing tensor blk.1.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 27 [ 18/291] Writing tensor blk.1.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 27 [ 19/291] Writing tensor blk.1.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 27 [ 20/291] Writing tensor blk.2.attn_norm.weight | size 4096 | type F32 | T+ 27 [ 21/291] Writing tensor blk.2.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 34 [ 22/291] Writing tensor blk.2.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 35 [ 23/291] Writing tensor blk.2.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 35 [ 24/291] Writing tensor blk.2.ffn_norm.weight | size 4096 | type F32 | T+ 35 [ 25/291] Writing tensor blk.2.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 35 [ 26/291] Writing tensor blk.2.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 35 [ 27/291] Writing tensor blk.2.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 35 [ 28/291] Writing tensor blk.2.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 35 [ 29/291] Writing tensor blk.3.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 38 [ 30/291] Writing tensor blk.3.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 38 [ 31/291] Writing tensor blk.3.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 42 [ 32/291] Writing tensor blk.3.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 42 [ 33/291] Writing tensor blk.3.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 42 [ 34/291] Writing tensor blk.3.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 42 [ 35/291] Writing tensor blk.3.attn_norm.weight | size 4096 | type F32 | T+ 42 [ 36/291] Writing tensor blk.3.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 42 [ 37/291] Writing tensor blk.3.ffn_norm.weight | size 4096 | type F32 | T+ 42 [ 38/291] Writing tensor blk.4.attn_norm.weight | size 4096 | type F32 | T+ 42 [ 39/291] Writing tensor blk.4.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 45 [ 40/291] Writing tensor blk.4.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 49 [ 41/291] Writing tensor blk.4.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 49 [ 42/291] Writing tensor blk.4.ffn_norm.weight | size 4096 | type F32 | T+ 49 [ 43/291] Writing tensor blk.4.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 49 [ 44/291] Writing tensor blk.4.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 49 [ 45/291] Writing tensor blk.4.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 49 [ 46/291] Writing tensor blk.4.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 49 [ 47/291] Writing tensor blk.5.attn_norm.weight | size 4096 | type F32 | T+ 49 [ 48/291] Writing tensor blk.5.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 53 [ 49/291] Writing tensor blk.5.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 57 [ 50/291] Writing tensor blk.5.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 57 [ 51/291] Writing tensor blk.5.ffn_norm.weight | size 4096 | type F32 | T+ 57 [ 52/291] Writing tensor blk.5.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 57 [ 53/291] Writing tensor blk.5.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 57 [ 54/291] Writing tensor blk.5.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 57 [ 55/291] Writing tensor blk.5.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 57 [ 56/291] Writing tensor blk.6.attn_norm.weight | size 4096 | type F32 | T+ 57 [ 57/291] Writing tensor blk.6.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 61 [ 58/291] Writing tensor blk.6.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 64 [ 59/291] Writing tensor blk.6.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 65 [ 60/291] Writing tensor blk.6.ffn_norm.weight | size 4096 | type F32 | T+ 65 [ 61/291] Writing tensor blk.6.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 65 [ 62/291] Writing tensor blk.6.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 65 [ 63/291] Writing tensor blk.6.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 65 [ 64/291] Writing tensor blk.6.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 65 [ 65/291] Writing tensor blk.7.attn_norm.weight | size 4096 | type F32 | T+ 65 [ 66/291] Writing tensor blk.7.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 68 [ 67/291] Writing tensor blk.7.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 70 [ 68/291] Writing tensor blk.7.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 70 [ 69/291] Writing tensor blk.7.ffn_norm.weight | size 4096 | type F32 | T+ 70 [ 70/291] Writing tensor blk.7.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 70 [ 71/291] Writing tensor blk.7.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74 [ 72/291] Writing tensor blk.7.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74 [ 73/291] Writing tensor blk.7.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74 [ 74/291] Writing tensor blk.8.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 74 [ 75/291] Writing tensor blk.8.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 74 [ 76/291] Writing tensor blk.8.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 74 [ 77/291] Writing tensor blk.8.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 74 [ 78/291] Writing tensor blk.10.attn_norm.weight | size 4096 | type F32 | T+ 74 [ 79/291] Writing tensor blk.10.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 78 [ 80/291] Writing tensor blk.10.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 82 [ 81/291] Writing tensor blk.10.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 82 [ 82/291] Writing tensor blk.10.ffn_norm.weight | size 4096 | type F32 | T+ 82 [ 83/291] Writing tensor blk.10.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 82 [ 84/291] Writing tensor blk.10.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 82 [ 85/291] Writing tensor blk.10.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 82 [ 86/291] Writing tensor blk.10.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 82 [ 87/291] Writing tensor blk.11.attn_norm.weight | size 4096 | type F32 | T+ 82 [ 88/291] Writing tensor blk.11.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 90 [ 89/291] Writing tensor blk.11.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 90 [ 90/291] Writing tensor blk.11.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 90 [ 91/291] Writing tensor blk.11.ffn_norm.weight | size 4096 | type F32 | T+ 90 [ 92/291] Writing tensor blk.11.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 90 [ 93/291] Writing tensor blk.11.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 90 [ 94/291] Writing tensor blk.11.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 91 [ 95/291] Writing tensor blk.11.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 91 [ 96/291] Writing tensor blk.12.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 99 [ 97/291] Writing tensor blk.12.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 99 [ 98/291] Writing tensor blk.12.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 99 [ 99/291] Writing tensor blk.12.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 99 [100/291] Writing tensor blk.12.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 99 [101/291] Writing tensor blk.12.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 100 [102/291] Writing tensor blk.8.attn_norm.weight | size 4096 | type F32 | T+ 100 [103/291] Writing tensor blk.8.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 100 [104/291] Writing tensor blk.8.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 104 [105/291] Writing tensor blk.8.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 104 [106/291] Writing tensor blk.8.ffn_norm.weight | size 4096 | type F32 | T+ 104 [107/291] Writing tensor blk.9.attn_norm.weight | size 4096 | type F32 | T+ 104 [108/291] Writing tensor blk.9.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 105 [109/291] Writing tensor blk.9.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 108 [110/291] Writing tensor blk.9.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 108 [111/291] Writing tensor blk.9.ffn_norm.weight | size 4096 | type F32 | T+ 108 [112/291] Writing tensor blk.9.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 112 [113/291] Writing tensor blk.9.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 112 [114/291] Writing tensor blk.9.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 112 [115/291] Writing tensor blk.9.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 112 [116/291] Writing tensor blk.12.attn_norm.weight | size 4096 | type F32 | T+ 112 [117/291] Writing tensor blk.12.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 112 [118/291] Writing tensor blk.12.ffn_norm.weight | size 4096 | type F32 | T+ 112 [119/291] Writing tensor blk.13.attn_norm.weight | size 4096 | type F32 | T+ 112 [120/291] Writing tensor blk.13.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 117 [121/291] Writing tensor blk.13.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 120 [122/291] Writing tensor blk.13.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 120 [123/291] Writing tensor blk.13.ffn_norm.weight | size 4096 | type F32 | T+ 120 [124/291] Writing tensor blk.13.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 120 [125/291] Writing tensor blk.13.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 120 [126/291] Writing tensor blk.13.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 120 [127/291] Writing tensor blk.13.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 120 [128/291] Writing tensor blk.14.attn_norm.weight | size 4096 | type F32 | T+ 120 [129/291] Writing tensor blk.14.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 124 [130/291] Writing tensor blk.14.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 128 [131/291] Writing tensor blk.14.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 129 [132/291] Writing tensor blk.14.ffn_norm.weight | size 4096 | type F32 | T+ 129 [133/291] Writing tensor blk.14.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 129 [134/291] Writing tensor blk.14.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 129 [135/291] Writing tensor blk.14.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 129 [136/291] Writing tensor blk.14.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 129 [137/291] Writing tensor blk.15.attn_norm.weight | size 4096 | type F32 | T+ 129 [138/291] Writing tensor blk.15.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 132 [139/291] Writing tensor blk.15.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 136 [140/291] Writing tensor blk.15.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 137 [141/291] Writing tensor blk.15.ffn_norm.weight | size 4096 | type F32 | T+ 137 [142/291] Writing tensor blk.15.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 137 [143/291] Writing tensor blk.15.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 137 [144/291] Writing tensor blk.15.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 137 [145/291] Writing tensor blk.15.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 137 [146/291] Writing tensor blk.16.attn_norm.weight | size 4096 | type F32 | T+ 137 [147/291] Writing tensor blk.16.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 140 [148/291] Writing tensor blk.16.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 142 [149/291] Writing tensor blk.16.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 142 [150/291] Writing tensor blk.16.ffn_norm.weight | size 4096 | type F32 | T+ 143 [151/291] Writing tensor blk.16.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 143 [152/291] Writing tensor blk.16.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146 [153/291] Writing tensor blk.16.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 146 [154/291] Writing tensor blk.16.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 146 [155/291] Writing tensor blk.17.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 146 [156/291] Writing tensor blk.17.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 146 [157/291] Writing tensor blk.17.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 152 [158/291] Writing tensor blk.17.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 152 [159/291] Writing tensor blk.17.attn_norm.weight | size 4096 | type F32 | T+ 152 [160/291] Writing tensor blk.17.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 152 [161/291] Writing tensor blk.17.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 152 [162/291] Writing tensor blk.17.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 152 [163/291] Writing tensor blk.17.ffn_norm.weight | size 4096 | type F32 | T+ 152 [164/291] Writing tensor blk.18.attn_norm.weight | size 4096 | type F32 | T+ 152 [165/291] Writing tensor blk.18.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 156 [166/291] Writing tensor blk.18.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 160 [167/291] Writing tensor blk.18.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 160 [168/291] Writing tensor blk.18.ffn_norm.weight | size 4096 | type F32 | T+ 160 [169/291] Writing tensor blk.18.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 160 [170/291] Writing tensor blk.18.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 160 [171/291] Writing tensor blk.18.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 160 [172/291] Writing tensor blk.18.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 160 [173/291] Writing tensor blk.19.attn_norm.weight | size 4096 | type F32 | T+ 160 [174/291] Writing tensor blk.19.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 163 [175/291] Writing tensor blk.19.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 167 [176/291] Writing tensor blk.19.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 168 [177/291] Writing tensor blk.19.ffn_norm.weight | size 4096 | type F32 | T+ 168 [178/291] Writing tensor blk.19.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 168 [179/291] Writing tensor blk.19.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 168 [180/291] Writing tensor blk.19.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 168 [181/291] Writing tensor blk.19.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 168 [182/291] Writing tensor blk.20.attn_norm.weight | size 4096 | type F32 | T+ 168 [183/291] Writing tensor blk.20.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 175 [184/291] Writing tensor blk.20.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 175 [185/291] Writing tensor blk.20.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 175 [186/291] Writing tensor blk.20.ffn_norm.weight | size 4096 | type F32 | T+ 176 [187/291] Writing tensor blk.20.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 176 [188/291] Writing tensor blk.20.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 176 [189/291] Writing tensor blk.20.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 176 [190/291] Writing tensor blk.20.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 176 [191/291] Writing tensor blk.21.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 179 [192/291] Writing tensor blk.21.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 179 [193/291] Writing tensor blk.21.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 183 [194/291] Writing tensor blk.21.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 184 [195/291] Writing tensor blk.21.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 184 [196/291] Writing tensor blk.21.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 184 [197/291] Writing tensor blk.21.attn_norm.weight | size 4096 | type F32 | T+ 184 [198/291] Writing tensor blk.21.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 184 [199/291] Writing tensor blk.21.ffn_norm.weight | size 4096 | type F32 | T+ 184 [200/291] Writing tensor blk.22.attn_norm.weight | size 4096 | type F32 | T+ 184 [201/291] Writing tensor blk.22.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 188 [202/291] Writing tensor blk.22.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 192 [203/291] Writing tensor blk.22.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 192 [204/291] Writing tensor blk.22.ffn_norm.weight | size 4096 | type F32 | T+ 192 [205/291] Writing tensor blk.22.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 192 [206/291] Writing tensor blk.22.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 192 [207/291] Writing tensor blk.22.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 192 [208/291] Writing tensor blk.22.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 192 [209/291] Writing tensor blk.23.attn_norm.weight | size 4096 | type F32 | T+ 192 [210/291] Writing tensor blk.23.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 196 [211/291] Writing tensor blk.23.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 200 [212/291] Writing tensor blk.23.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 200 [213/291] Writing tensor blk.23.ffn_norm.weight | size 4096 | type F32 | T+ 200 [214/291] Writing tensor blk.23.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 200 [215/291] Writing tensor blk.23.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 200 [216/291] Writing tensor blk.23.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 200 [217/291] Writing tensor blk.23.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 200 [218/291] Writing tensor blk.24.attn_norm.weight | size 4096 | type F32 | T+ 200 [219/291] Writing tensor blk.24.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 204 [220/291] Writing tensor blk.24.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 208 [221/291] Writing tensor blk.24.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 208 [222/291] Writing tensor blk.24.ffn_norm.weight | size 4096 | type F32 | T+ 208 [223/291] Writing tensor blk.24.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 208 [224/291] Writing tensor blk.24.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 208 [225/291] Writing tensor blk.24.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 208 [226/291] Writing tensor blk.24.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 208 [227/291] Writing tensor blk.25.attn_norm.weight | size 4096 | type F32 | T+ 208 [228/291] Writing tensor blk.25.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 212 [229/291] Writing tensor blk.25.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 213 [230/291] Writing tensor blk.25.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 214 [231/291] Writing tensor blk.25.ffn_norm.weight | size 4096 | type F32 | T+ 214 [232/291] Writing tensor blk.25.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 214 [233/291] Writing tensor blk.25.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217 [234/291] Writing tensor blk.25.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 217 [235/291] Writing tensor blk.25.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 217 [236/291] Writing tensor blk.26.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 217 [237/291] Writing tensor blk.26.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 217 [238/291] Writing tensor blk.26.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 223 [239/291] Writing tensor blk.26.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 223 [240/291] Writing tensor blk.26.attn_norm.weight | size 4096 | type F32 | T+ 223 [241/291] Writing tensor blk.26.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 223 [242/291] Writing tensor blk.26.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 223 [243/291] Writing tensor blk.26.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 223 [244/291] Writing tensor blk.26.ffn_norm.weight | size 4096 | type F32 | T+ 223 [245/291] Writing tensor blk.27.attn_norm.weight | size 4096 | type F32 | T+ 223 [246/291] Writing tensor blk.27.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 227 [247/291] Writing tensor blk.27.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 231 [248/291] Writing tensor blk.27.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 231 [249/291] Writing tensor blk.27.ffn_norm.weight | size 4096 | type F32 | T+ 231 [250/291] Writing tensor blk.27.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 231 [251/291] Writing tensor blk.27.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 231 [252/291] Writing tensor blk.27.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 231 [253/291] Writing tensor blk.27.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 231 [254/291] Writing tensor blk.28.attn_norm.weight | size 4096 | type F32 | T+ 231 [255/291] Writing tensor blk.28.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 235 [256/291] Writing tensor blk.28.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 240 [257/291] Writing tensor blk.28.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 240 [258/291] Writing tensor blk.28.ffn_norm.weight | size 4096 | type F32 | T+ 240 [259/291] Writing tensor blk.28.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 240 [260/291] Writing tensor blk.28.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 240 [261/291] Writing tensor blk.28.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 240 [262/291] Writing tensor blk.28.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 240 [263/291] Writing tensor blk.29.attn_norm.weight | size 4096 | type F32 | T+ 240 [264/291] Writing tensor blk.29.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 247 [265/291] Writing tensor blk.29.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 248 [266/291] Writing tensor blk.29.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 248 [267/291] Writing tensor blk.29.ffn_norm.weight | size 4096 | type F32 | T+ 248 [268/291] Writing tensor blk.29.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 248 [269/291] Writing tensor blk.29.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 248 [270/291] Writing tensor blk.29.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259 [271/291] Writing tensor blk.29.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259 [272/291] Writing tensor blk.30.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 259 [273/291] Writing tensor blk.30.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 259 [274/291] Writing tensor blk.30.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 259 [275/291] Writing tensor blk.30.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 259 [276/291] Writing tensor blk.30.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 259 [277/291] Writing tensor blk.30.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 259 [278/291] Writing tensor output.weight | size 32000 x 4096 | type Q8_0 | T+ 266 [279/291] Writing tensor blk.30.attn_norm.weight | size 4096 | type F32 | T+ 266 [280/291] Writing tensor blk.30.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266 [281/291] Writing tensor blk.30.ffn_norm.weight | size 4096 | type F32 | T+ 266 [282/291] Writing tensor blk.31.attn_norm.weight | size 4096 | type F32 | T+ 266 [283/291] Writing tensor blk.31.ffn_down.weight | size 4096 x 14336 | type Q8_0 | T+ 266 [284/291] Writing tensor blk.31.ffn_gate.weight | size 14336 x 4096 | type Q8_0 | T+ 266 [285/291] Writing tensor blk.31.ffn_up.weight | size 14336 x 4096 | type Q8_0 | T+ 266 [286/291] Writing tensor blk.31.ffn_norm.weight | size 4096 | type F32 | T+ 267 [287/291] Writing tensor blk.31.attn_k.weight | size 1024 x 4096 | type Q8_0 | T+ 267 [288/291] Writing tensor blk.31.attn_output.weight | size 4096 x 4096 | type Q8_0 | T+ 267 [289/291] Writing tensor blk.31.attn_q.weight | size 4096 x 4096 | type Q8_0 | T+ 267 [290/291] Writing tensor blk.31.attn_v.weight | size 1024 x 4096 | type Q8_0 | T+ 267 [291/291] Writing tensor output_norm.weight | size 4096 | type F32 | T+ 267 Wrote zephyr_int8.gguf