diff --git a/Llama-3.3-70B-Instruct/ll_4bit/README.md b/Llama-3.3-70B-Instruct/ll_4bit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69b948e165edf9328192beee7bbd9b0f12b987f6
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/README.md
@@ -0,0 +1,7 @@
+# Quantized Model Checkpoint
+
+**Base model:** unknown
+
+**Average bitwidth:** unknown
+
+See `quantization_config.txt` for full configuration details.
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/chat_template.jinja b/Llama-3.3-70B-Instruct/ll_4bit/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/chat_template.jinja
@@ -0,0 +1,109 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+ {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+ {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+ {%- set date_string = "26 Jul 2024" %}
+{%- endif %}
+{%- if not tools is defined %}
+ {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+ {%- set system_message = messages[0]['content']|trim %}
+ {%- set messages = messages[1:] %}
+{%- else %}
+ {%- set system_message = "" %}
+{%- endif %}
+
+{#- System message + builtin tools #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if builtin_tools is defined or tools is not none %}
+ {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if builtin_tools is defined %}
+ {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+ {{- "Do not use variables.\n\n" }}
+ {%- for t in tools %}
+ {{- t | tojson(indent=4) }}
+ {{- "\n\n" }}
+ {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+ {#- Extract the first user message so we can plug it in here #}
+ {%- if messages | length != 0 %}
+ {%- set first_user_message = messages[0]['content']|trim %}
+ {%- set messages = messages[1:] %}
+ {%- else %}
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+ {{- "Do not use variables.\n\n" }}
+ {%- for t in tools %}
+ {{- t | tojson(indent=4) }}
+ {{- "\n\n" }}
+ {%- endfor %}
+ {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+ {%- elif 'tool_calls' in message %}
+ {%- if not message.tool_calls|length == 1 %}
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
+ {%- endif %}
+ {%- set tool_call = message.tool_calls[0].function %}
+ {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+ {{- "<|python_tag|>" + tool_call.name + ".call(" }}
+ {%- for arg_name, arg_val in tool_call.arguments | items %}
+ {{- arg_name + '="' + arg_val + '"' }}
+ {%- if not loop.last %}
+ {{- ", " }}
+ {%- endif %}
+ {%- endfor %}
+ {{- ")" }}
+ {%- else %}
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+ {{- '{"name": "' + tool_call.name + '", ' }}
+ {{- '"parameters": ' }}
+ {{- tool_call.arguments | tojson }}
+ {{- "}" }}
+ {%- endif %}
+ {%- if builtin_tools is defined %}
+ {#- This means we're in ipython mode #}
+ {{- "<|eom_id|>" }}
+ {%- else %}
+ {{- "<|eot_id|>" }}
+ {%- endif %}
+ {%- elif message.role == "tool" or message.role == "ipython" %}
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+ {%- if message.content is mapping or message.content is iterable %}
+ {{- message.content | tojson }}
+ {%- else %}
+ {{- message.content }}
+ {%- endif %}
+ {{- "<|eot_id|>" }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/config.json b/Llama-3.3-70B-Instruct/ll_4bit/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e63a7035337dcb4b48fd88eaa1ae72ee7ddcf27a
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/config.json
@@ -0,0 +1,40 @@
+{
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "dtype": "float16",
+ "eos_token_id": [
+ 128001,
+ 128008,
+ 128009
+ ],
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 8192,
+ "initializer_range": 0.02,
+ "intermediate_size": 28672,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 64,
+ "num_hidden_layers": 80,
+ "num_key_value_heads": 8,
+ "pad_token_id": null,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_parameters": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_theta": 500000.0,
+ "rope_type": "llama3"
+ },
+ "tie_word_embeddings": false,
+ "transformers_version": "5.3.0",
+ "use_cache": false,
+ "vocab_size": 128256
+}
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/generation_config.json b/Llama-3.3-70B-Instruct/ll_4bit/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..46631145bc4c440dabc4b8b3ae4bad43f1f7d99d
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/generation_config.json
@@ -0,0 +1,12 @@
+{
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": [
+ 128001,
+ 128008,
+ 128009
+ ],
+ "temperature": 0.6,
+ "top_p": 0.9,
+ "transformers_version": "5.3.0"
+}
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/humming_online_quant_config.json b/Llama-3.3-70B-Instruct/ll_4bit/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..71f9c5cc25ef014276e683aa28267a8997891b97
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/humming_online_quant_config.json
@@ -0,0 +1,5 @@
+{
+ "quant_method": "gptq",
+ "bits": 4,
+ "group_size": 128
+}
\ No newline at end of file
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/model.safetensors.index.json b/Llama-3.3-70B-Instruct/ll_4bit/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a01e223969ce58bab528610d7b8078cd1f49cd1
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/model.safetensors.index.json
@@ -0,0 +1,731 @@
+{
+ "metadata": {
+ "total_parameters": 70553706496,
+ "total_size": 141107412992
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00001-of-00003.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00002-of-00003.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.64.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.65.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.66.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.67.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.68.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.69.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.70.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.70.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.71.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.72.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.73.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.74.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.75.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.76.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.77.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.78.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.input_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.79.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+ "model.norm.weight": "model-00003-of-00003.safetensors"
+ }
+}
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/quantization_config.txt b/Llama-3.3-70B-Instruct/ll_4bit/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94e66b961c3965d5a9ca23282d8c7aa7f2c7ea57
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/quantization_config.txt
@@ -0,0 +1,560 @@
+model.layers.0.self_attn.q_proj: 4
+model.layers.0.self_attn.k_proj: 4
+model.layers.0.self_attn.v_proj: 4
+model.layers.0.self_attn.o_proj: 4
+model.layers.0.mlp.gate_proj: 4
+model.layers.0.mlp.up_proj: 4
+model.layers.0.mlp.down_proj: 4
+model.layers.1.self_attn.q_proj: 4
+model.layers.1.self_attn.k_proj: 4
+model.layers.1.self_attn.v_proj: 4
+model.layers.1.self_attn.o_proj: 4
+model.layers.1.mlp.gate_proj: 4
+model.layers.1.mlp.up_proj: 4
+model.layers.1.mlp.down_proj: 4
+model.layers.2.self_attn.q_proj: 4
+model.layers.2.self_attn.k_proj: 4
+model.layers.2.self_attn.v_proj: 4
+model.layers.2.self_attn.o_proj: 4
+model.layers.2.mlp.gate_proj: 4
+model.layers.2.mlp.up_proj: 4
+model.layers.2.mlp.down_proj: 4
+model.layers.3.self_attn.q_proj: 4
+model.layers.3.self_attn.k_proj: 4
+model.layers.3.self_attn.v_proj: 4
+model.layers.3.self_attn.o_proj: 4
+model.layers.3.mlp.gate_proj: 4
+model.layers.3.mlp.up_proj: 4
+model.layers.3.mlp.down_proj: 4
+model.layers.4.self_attn.q_proj: 4
+model.layers.4.self_attn.k_proj: 4
+model.layers.4.self_attn.v_proj: 4
+model.layers.4.self_attn.o_proj: 4
+model.layers.4.mlp.gate_proj: 4
+model.layers.4.mlp.up_proj: 4
+model.layers.4.mlp.down_proj: 4
+model.layers.5.self_attn.q_proj: 4
+model.layers.5.self_attn.k_proj: 4
+model.layers.5.self_attn.v_proj: 4
+model.layers.5.self_attn.o_proj: 4
+model.layers.5.mlp.gate_proj: 4
+model.layers.5.mlp.up_proj: 4
+model.layers.5.mlp.down_proj: 4
+model.layers.6.self_attn.q_proj: 4
+model.layers.6.self_attn.k_proj: 4
+model.layers.6.self_attn.v_proj: 4
+model.layers.6.self_attn.o_proj: 4
+model.layers.6.mlp.gate_proj: 4
+model.layers.6.mlp.up_proj: 4
+model.layers.6.mlp.down_proj: 4
+model.layers.7.self_attn.q_proj: 4
+model.layers.7.self_attn.k_proj: 4
+model.layers.7.self_attn.v_proj: 4
+model.layers.7.self_attn.o_proj: 4
+model.layers.7.mlp.gate_proj: 4
+model.layers.7.mlp.up_proj: 4
+model.layers.7.mlp.down_proj: 4
+model.layers.8.self_attn.q_proj: 4
+model.layers.8.self_attn.k_proj: 4
+model.layers.8.self_attn.v_proj: 4
+model.layers.8.self_attn.o_proj: 4
+model.layers.8.mlp.gate_proj: 4
+model.layers.8.mlp.up_proj: 4
+model.layers.8.mlp.down_proj: 4
+model.layers.9.self_attn.q_proj: 4
+model.layers.9.self_attn.k_proj: 4
+model.layers.9.self_attn.v_proj: 4
+model.layers.9.self_attn.o_proj: 4
+model.layers.9.mlp.gate_proj: 4
+model.layers.9.mlp.up_proj: 4
+model.layers.9.mlp.down_proj: 4
+model.layers.10.self_attn.q_proj: 4
+model.layers.10.self_attn.k_proj: 4
+model.layers.10.self_attn.v_proj: 4
+model.layers.10.self_attn.o_proj: 4
+model.layers.10.mlp.gate_proj: 4
+model.layers.10.mlp.up_proj: 4
+model.layers.10.mlp.down_proj: 4
+model.layers.11.self_attn.q_proj: 4
+model.layers.11.self_attn.k_proj: 4
+model.layers.11.self_attn.v_proj: 4
+model.layers.11.self_attn.o_proj: 4
+model.layers.11.mlp.gate_proj: 4
+model.layers.11.mlp.up_proj: 4
+model.layers.11.mlp.down_proj: 4
+model.layers.12.self_attn.q_proj: 4
+model.layers.12.self_attn.k_proj: 4
+model.layers.12.self_attn.v_proj: 4
+model.layers.12.self_attn.o_proj: 4
+model.layers.12.mlp.gate_proj: 4
+model.layers.12.mlp.up_proj: 4
+model.layers.12.mlp.down_proj: 4
+model.layers.13.self_attn.q_proj: 4
+model.layers.13.self_attn.k_proj: 4
+model.layers.13.self_attn.v_proj: 4
+model.layers.13.self_attn.o_proj: 4
+model.layers.13.mlp.gate_proj: 4
+model.layers.13.mlp.up_proj: 4
+model.layers.13.mlp.down_proj: 4
+model.layers.14.self_attn.q_proj: 4
+model.layers.14.self_attn.k_proj: 4
+model.layers.14.self_attn.v_proj: 4
+model.layers.14.self_attn.o_proj: 4
+model.layers.14.mlp.gate_proj: 4
+model.layers.14.mlp.up_proj: 4
+model.layers.14.mlp.down_proj: 4
+model.layers.15.self_attn.q_proj: 4
+model.layers.15.self_attn.k_proj: 4
+model.layers.15.self_attn.v_proj: 4
+model.layers.15.self_attn.o_proj: 4
+model.layers.15.mlp.gate_proj: 4
+model.layers.15.mlp.up_proj: 4
+model.layers.15.mlp.down_proj: 4
+model.layers.16.self_attn.q_proj: 4
+model.layers.16.self_attn.k_proj: 4
+model.layers.16.self_attn.v_proj: 4
+model.layers.16.self_attn.o_proj: 4
+model.layers.16.mlp.gate_proj: 4
+model.layers.16.mlp.up_proj: 4
+model.layers.16.mlp.down_proj: 4
+model.layers.17.self_attn.q_proj: 4
+model.layers.17.self_attn.k_proj: 4
+model.layers.17.self_attn.v_proj: 4
+model.layers.17.self_attn.o_proj: 4
+model.layers.17.mlp.gate_proj: 4
+model.layers.17.mlp.up_proj: 4
+model.layers.17.mlp.down_proj: 4
+model.layers.18.self_attn.q_proj: 4
+model.layers.18.self_attn.k_proj: 4
+model.layers.18.self_attn.v_proj: 4
+model.layers.18.self_attn.o_proj: 4
+model.layers.18.mlp.gate_proj: 4
+model.layers.18.mlp.up_proj: 4
+model.layers.18.mlp.down_proj: 4
+model.layers.19.self_attn.q_proj: 4
+model.layers.19.self_attn.k_proj: 4
+model.layers.19.self_attn.v_proj: 4
+model.layers.19.self_attn.o_proj: 4
+model.layers.19.mlp.gate_proj: 4
+model.layers.19.mlp.up_proj: 4
+model.layers.19.mlp.down_proj: 4
+model.layers.20.self_attn.q_proj: 4
+model.layers.20.self_attn.k_proj: 4
+model.layers.20.self_attn.v_proj: 4
+model.layers.20.self_attn.o_proj: 4
+model.layers.20.mlp.gate_proj: 4
+model.layers.20.mlp.up_proj: 4
+model.layers.20.mlp.down_proj: 4
+model.layers.21.self_attn.q_proj: 4
+model.layers.21.self_attn.k_proj: 4
+model.layers.21.self_attn.v_proj: 4
+model.layers.21.self_attn.o_proj: 4
+model.layers.21.mlp.gate_proj: 4
+model.layers.21.mlp.up_proj: 4
+model.layers.21.mlp.down_proj: 4
+model.layers.22.self_attn.q_proj: 4
+model.layers.22.self_attn.k_proj: 4
+model.layers.22.self_attn.v_proj: 4
+model.layers.22.self_attn.o_proj: 4
+model.layers.22.mlp.gate_proj: 4
+model.layers.22.mlp.up_proj: 4
+model.layers.22.mlp.down_proj: 4
+model.layers.23.self_attn.q_proj: 4
+model.layers.23.self_attn.k_proj: 4
+model.layers.23.self_attn.v_proj: 4
+model.layers.23.self_attn.o_proj: 4
+model.layers.23.mlp.gate_proj: 4
+model.layers.23.mlp.up_proj: 4
+model.layers.23.mlp.down_proj: 4
+model.layers.24.self_attn.q_proj: 4
+model.layers.24.self_attn.k_proj: 4
+model.layers.24.self_attn.v_proj: 4
+model.layers.24.self_attn.o_proj: 4
+model.layers.24.mlp.gate_proj: 4
+model.layers.24.mlp.up_proj: 4
+model.layers.24.mlp.down_proj: 4
+model.layers.25.self_attn.q_proj: 4
+model.layers.25.self_attn.k_proj: 4
+model.layers.25.self_attn.v_proj: 4
+model.layers.25.self_attn.o_proj: 4
+model.layers.25.mlp.gate_proj: 4
+model.layers.25.mlp.up_proj: 4
+model.layers.25.mlp.down_proj: 4
+model.layers.26.self_attn.q_proj: 4
+model.layers.26.self_attn.k_proj: 4
+model.layers.26.self_attn.v_proj: 4
+model.layers.26.self_attn.o_proj: 4
+model.layers.26.mlp.gate_proj: 4
+model.layers.26.mlp.up_proj: 4
+model.layers.26.mlp.down_proj: 4
+model.layers.27.self_attn.q_proj: 4
+model.layers.27.self_attn.k_proj: 4
+model.layers.27.self_attn.v_proj: 4
+model.layers.27.self_attn.o_proj: 4
+model.layers.27.mlp.gate_proj: 4
+model.layers.27.mlp.up_proj: 4
+model.layers.27.mlp.down_proj: 4
+model.layers.28.self_attn.q_proj: 4
+model.layers.28.self_attn.k_proj: 4
+model.layers.28.self_attn.v_proj: 4
+model.layers.28.self_attn.o_proj: 4
+model.layers.28.mlp.gate_proj: 4
+model.layers.28.mlp.up_proj: 4
+model.layers.28.mlp.down_proj: 4
+model.layers.29.self_attn.q_proj: 4
+model.layers.29.self_attn.k_proj: 4
+model.layers.29.self_attn.v_proj: 4
+model.layers.29.self_attn.o_proj: 4
+model.layers.29.mlp.gate_proj: 4
+model.layers.29.mlp.up_proj: 4
+model.layers.29.mlp.down_proj: 4
+model.layers.30.self_attn.q_proj: 4
+model.layers.30.self_attn.k_proj: 4
+model.layers.30.self_attn.v_proj: 4
+model.layers.30.self_attn.o_proj: 4
+model.layers.30.mlp.gate_proj: 4
+model.layers.30.mlp.up_proj: 4
+model.layers.30.mlp.down_proj: 4
+model.layers.31.self_attn.q_proj: 4
+model.layers.31.self_attn.k_proj: 4
+model.layers.31.self_attn.v_proj: 4
+model.layers.31.self_attn.o_proj: 4
+model.layers.31.mlp.gate_proj: 4
+model.layers.31.mlp.up_proj: 4
+model.layers.31.mlp.down_proj: 4
+model.layers.32.self_attn.q_proj: 4
+model.layers.32.self_attn.k_proj: 4
+model.layers.32.self_attn.v_proj: 4
+model.layers.32.self_attn.o_proj: 4
+model.layers.32.mlp.gate_proj: 4
+model.layers.32.mlp.up_proj: 4
+model.layers.32.mlp.down_proj: 4
+model.layers.33.self_attn.q_proj: 4
+model.layers.33.self_attn.k_proj: 4
+model.layers.33.self_attn.v_proj: 4
+model.layers.33.self_attn.o_proj: 4
+model.layers.33.mlp.gate_proj: 4
+model.layers.33.mlp.up_proj: 4
+model.layers.33.mlp.down_proj: 4
+model.layers.34.self_attn.q_proj: 4
+model.layers.34.self_attn.k_proj: 4
+model.layers.34.self_attn.v_proj: 4
+model.layers.34.self_attn.o_proj: 4
+model.layers.34.mlp.gate_proj: 4
+model.layers.34.mlp.up_proj: 4
+model.layers.34.mlp.down_proj: 4
+model.layers.35.self_attn.q_proj: 4
+model.layers.35.self_attn.k_proj: 4
+model.layers.35.self_attn.v_proj: 4
+model.layers.35.self_attn.o_proj: 4
+model.layers.35.mlp.gate_proj: 4
+model.layers.35.mlp.up_proj: 4
+model.layers.35.mlp.down_proj: 4
+model.layers.36.self_attn.q_proj: 4
+model.layers.36.self_attn.k_proj: 4
+model.layers.36.self_attn.v_proj: 4
+model.layers.36.self_attn.o_proj: 4
+model.layers.36.mlp.gate_proj: 4
+model.layers.36.mlp.up_proj: 4
+model.layers.36.mlp.down_proj: 4
+model.layers.37.self_attn.q_proj: 4
+model.layers.37.self_attn.k_proj: 4
+model.layers.37.self_attn.v_proj: 4
+model.layers.37.self_attn.o_proj: 4
+model.layers.37.mlp.gate_proj: 4
+model.layers.37.mlp.up_proj: 4
+model.layers.37.mlp.down_proj: 4
+model.layers.38.self_attn.q_proj: 4
+model.layers.38.self_attn.k_proj: 4
+model.layers.38.self_attn.v_proj: 4
+model.layers.38.self_attn.o_proj: 4
+model.layers.38.mlp.gate_proj: 4
+model.layers.38.mlp.up_proj: 4
+model.layers.38.mlp.down_proj: 4
+model.layers.39.self_attn.q_proj: 4
+model.layers.39.self_attn.k_proj: 4
+model.layers.39.self_attn.v_proj: 4
+model.layers.39.self_attn.o_proj: 4
+model.layers.39.mlp.gate_proj: 4
+model.layers.39.mlp.up_proj: 4
+model.layers.39.mlp.down_proj: 4
+model.layers.40.self_attn.q_proj: 4
+model.layers.40.self_attn.k_proj: 4
+model.layers.40.self_attn.v_proj: 4
+model.layers.40.self_attn.o_proj: 4
+model.layers.40.mlp.gate_proj: 4
+model.layers.40.mlp.up_proj: 4
+model.layers.40.mlp.down_proj: 4
+model.layers.41.self_attn.q_proj: 4
+model.layers.41.self_attn.k_proj: 4
+model.layers.41.self_attn.v_proj: 4
+model.layers.41.self_attn.o_proj: 4
+model.layers.41.mlp.gate_proj: 4
+model.layers.41.mlp.up_proj: 4
+model.layers.41.mlp.down_proj: 4
+model.layers.42.self_attn.q_proj: 4
+model.layers.42.self_attn.k_proj: 4
+model.layers.42.self_attn.v_proj: 4
+model.layers.42.self_attn.o_proj: 4
+model.layers.42.mlp.gate_proj: 4
+model.layers.42.mlp.up_proj: 4
+model.layers.42.mlp.down_proj: 4
+model.layers.43.self_attn.q_proj: 4
+model.layers.43.self_attn.k_proj: 4
+model.layers.43.self_attn.v_proj: 4
+model.layers.43.self_attn.o_proj: 4
+model.layers.43.mlp.gate_proj: 4
+model.layers.43.mlp.up_proj: 4
+model.layers.43.mlp.down_proj: 4
+model.layers.44.self_attn.q_proj: 4
+model.layers.44.self_attn.k_proj: 4
+model.layers.44.self_attn.v_proj: 4
+model.layers.44.self_attn.o_proj: 4
+model.layers.44.mlp.gate_proj: 4
+model.layers.44.mlp.up_proj: 4
+model.layers.44.mlp.down_proj: 4
+model.layers.45.self_attn.q_proj: 4
+model.layers.45.self_attn.k_proj: 4
+model.layers.45.self_attn.v_proj: 4
+model.layers.45.self_attn.o_proj: 4
+model.layers.45.mlp.gate_proj: 4
+model.layers.45.mlp.up_proj: 4
+model.layers.45.mlp.down_proj: 4
+model.layers.46.self_attn.q_proj: 4
+model.layers.46.self_attn.k_proj: 4
+model.layers.46.self_attn.v_proj: 4
+model.layers.46.self_attn.o_proj: 4
+model.layers.46.mlp.gate_proj: 4
+model.layers.46.mlp.up_proj: 4
+model.layers.46.mlp.down_proj: 4
+model.layers.47.self_attn.q_proj: 4
+model.layers.47.self_attn.k_proj: 4
+model.layers.47.self_attn.v_proj: 4
+model.layers.47.self_attn.o_proj: 4
+model.layers.47.mlp.gate_proj: 4
+model.layers.47.mlp.up_proj: 4
+model.layers.47.mlp.down_proj: 4
+model.layers.48.self_attn.q_proj: 4
+model.layers.48.self_attn.k_proj: 4
+model.layers.48.self_attn.v_proj: 4
+model.layers.48.self_attn.o_proj: 4
+model.layers.48.mlp.gate_proj: 4
+model.layers.48.mlp.up_proj: 4
+model.layers.48.mlp.down_proj: 4
+model.layers.49.self_attn.q_proj: 4
+model.layers.49.self_attn.k_proj: 4
+model.layers.49.self_attn.v_proj: 4
+model.layers.49.self_attn.o_proj: 4
+model.layers.49.mlp.gate_proj: 4
+model.layers.49.mlp.up_proj: 4
+model.layers.49.mlp.down_proj: 4
+model.layers.50.self_attn.q_proj: 4
+model.layers.50.self_attn.k_proj: 4
+model.layers.50.self_attn.v_proj: 4
+model.layers.50.self_attn.o_proj: 4
+model.layers.50.mlp.gate_proj: 4
+model.layers.50.mlp.up_proj: 4
+model.layers.50.mlp.down_proj: 4
+model.layers.51.self_attn.q_proj: 4
+model.layers.51.self_attn.k_proj: 4
+model.layers.51.self_attn.v_proj: 4
+model.layers.51.self_attn.o_proj: 4
+model.layers.51.mlp.gate_proj: 4
+model.layers.51.mlp.up_proj: 4
+model.layers.51.mlp.down_proj: 4
+model.layers.52.self_attn.q_proj: 4
+model.layers.52.self_attn.k_proj: 4
+model.layers.52.self_attn.v_proj: 4
+model.layers.52.self_attn.o_proj: 4
+model.layers.52.mlp.gate_proj: 4
+model.layers.52.mlp.up_proj: 4
+model.layers.52.mlp.down_proj: 4
+model.layers.53.self_attn.q_proj: 4
+model.layers.53.self_attn.k_proj: 4
+model.layers.53.self_attn.v_proj: 4
+model.layers.53.self_attn.o_proj: 4
+model.layers.53.mlp.gate_proj: 4
+model.layers.53.mlp.up_proj: 4
+model.layers.53.mlp.down_proj: 4
+model.layers.54.self_attn.q_proj: 4
+model.layers.54.self_attn.k_proj: 4
+model.layers.54.self_attn.v_proj: 4
+model.layers.54.self_attn.o_proj: 4
+model.layers.54.mlp.gate_proj: 4
+model.layers.54.mlp.up_proj: 4
+model.layers.54.mlp.down_proj: 4
+model.layers.55.self_attn.q_proj: 4
+model.layers.55.self_attn.k_proj: 4
+model.layers.55.self_attn.v_proj: 4
+model.layers.55.self_attn.o_proj: 4
+model.layers.55.mlp.gate_proj: 4
+model.layers.55.mlp.up_proj: 4
+model.layers.55.mlp.down_proj: 4
+model.layers.56.self_attn.q_proj: 4
+model.layers.56.self_attn.k_proj: 4
+model.layers.56.self_attn.v_proj: 4
+model.layers.56.self_attn.o_proj: 4
+model.layers.56.mlp.gate_proj: 4
+model.layers.56.mlp.up_proj: 4
+model.layers.56.mlp.down_proj: 4
+model.layers.57.self_attn.q_proj: 4
+model.layers.57.self_attn.k_proj: 4
+model.layers.57.self_attn.v_proj: 4
+model.layers.57.self_attn.o_proj: 4
+model.layers.57.mlp.gate_proj: 4
+model.layers.57.mlp.up_proj: 4
+model.layers.57.mlp.down_proj: 4
+model.layers.58.self_attn.q_proj: 4
+model.layers.58.self_attn.k_proj: 4
+model.layers.58.self_attn.v_proj: 4
+model.layers.58.self_attn.o_proj: 4
+model.layers.58.mlp.gate_proj: 4
+model.layers.58.mlp.up_proj: 4
+model.layers.58.mlp.down_proj: 4
+model.layers.59.self_attn.q_proj: 4
+model.layers.59.self_attn.k_proj: 4
+model.layers.59.self_attn.v_proj: 4
+model.layers.59.self_attn.o_proj: 4
+model.layers.59.mlp.gate_proj: 4
+model.layers.59.mlp.up_proj: 4
+model.layers.59.mlp.down_proj: 4
+model.layers.60.self_attn.q_proj: 4
+model.layers.60.self_attn.k_proj: 4
+model.layers.60.self_attn.v_proj: 4
+model.layers.60.self_attn.o_proj: 4
+model.layers.60.mlp.gate_proj: 4
+model.layers.60.mlp.up_proj: 4
+model.layers.60.mlp.down_proj: 4
+model.layers.61.self_attn.q_proj: 4
+model.layers.61.self_attn.k_proj: 4
+model.layers.61.self_attn.v_proj: 4
+model.layers.61.self_attn.o_proj: 4
+model.layers.61.mlp.gate_proj: 4
+model.layers.61.mlp.up_proj: 4
+model.layers.61.mlp.down_proj: 4
+model.layers.62.self_attn.q_proj: 4
+model.layers.62.self_attn.k_proj: 4
+model.layers.62.self_attn.v_proj: 4
+model.layers.62.self_attn.o_proj: 4
+model.layers.62.mlp.gate_proj: 4
+model.layers.62.mlp.up_proj: 4
+model.layers.62.mlp.down_proj: 4
+model.layers.63.self_attn.q_proj: 4
+model.layers.63.self_attn.k_proj: 4
+model.layers.63.self_attn.v_proj: 4
+model.layers.63.self_attn.o_proj: 4
+model.layers.63.mlp.gate_proj: 4
+model.layers.63.mlp.up_proj: 4
+model.layers.63.mlp.down_proj: 4
+model.layers.64.self_attn.q_proj: 4
+model.layers.64.self_attn.k_proj: 4
+model.layers.64.self_attn.v_proj: 4
+model.layers.64.self_attn.o_proj: 4
+model.layers.64.mlp.gate_proj: 4
+model.layers.64.mlp.up_proj: 4
+model.layers.64.mlp.down_proj: 4
+model.layers.65.self_attn.q_proj: 4
+model.layers.65.self_attn.k_proj: 4
+model.layers.65.self_attn.v_proj: 4
+model.layers.65.self_attn.o_proj: 4
+model.layers.65.mlp.gate_proj: 4
+model.layers.65.mlp.up_proj: 4
+model.layers.65.mlp.down_proj: 4
+model.layers.66.self_attn.q_proj: 4
+model.layers.66.self_attn.k_proj: 4
+model.layers.66.self_attn.v_proj: 4
+model.layers.66.self_attn.o_proj: 4
+model.layers.66.mlp.gate_proj: 4
+model.layers.66.mlp.up_proj: 4
+model.layers.66.mlp.down_proj: 4
+model.layers.67.self_attn.q_proj: 4
+model.layers.67.self_attn.k_proj: 4
+model.layers.67.self_attn.v_proj: 4
+model.layers.67.self_attn.o_proj: 4
+model.layers.67.mlp.gate_proj: 4
+model.layers.67.mlp.up_proj: 4
+model.layers.67.mlp.down_proj: 4
+model.layers.68.self_attn.q_proj: 4
+model.layers.68.self_attn.k_proj: 4
+model.layers.68.self_attn.v_proj: 4
+model.layers.68.self_attn.o_proj: 4
+model.layers.68.mlp.gate_proj: 4
+model.layers.68.mlp.up_proj: 4
+model.layers.68.mlp.down_proj: 4
+model.layers.69.self_attn.q_proj: 4
+model.layers.69.self_attn.k_proj: 4
+model.layers.69.self_attn.v_proj: 4
+model.layers.69.self_attn.o_proj: 4
+model.layers.69.mlp.gate_proj: 4
+model.layers.69.mlp.up_proj: 4
+model.layers.69.mlp.down_proj: 4
+model.layers.70.self_attn.q_proj: 4
+model.layers.70.self_attn.k_proj: 4
+model.layers.70.self_attn.v_proj: 4
+model.layers.70.self_attn.o_proj: 4
+model.layers.70.mlp.gate_proj: 4
+model.layers.70.mlp.up_proj: 4
+model.layers.70.mlp.down_proj: 4
+model.layers.71.self_attn.q_proj: 4
+model.layers.71.self_attn.k_proj: 4
+model.layers.71.self_attn.v_proj: 4
+model.layers.71.self_attn.o_proj: 4
+model.layers.71.mlp.gate_proj: 4
+model.layers.71.mlp.up_proj: 4
+model.layers.71.mlp.down_proj: 4
+model.layers.72.self_attn.q_proj: 4
+model.layers.72.self_attn.k_proj: 4
+model.layers.72.self_attn.v_proj: 4
+model.layers.72.self_attn.o_proj: 4
+model.layers.72.mlp.gate_proj: 4
+model.layers.72.mlp.up_proj: 4
+model.layers.72.mlp.down_proj: 4
+model.layers.73.self_attn.q_proj: 4
+model.layers.73.self_attn.k_proj: 4
+model.layers.73.self_attn.v_proj: 4
+model.layers.73.self_attn.o_proj: 4
+model.layers.73.mlp.gate_proj: 4
+model.layers.73.mlp.up_proj: 4
+model.layers.73.mlp.down_proj: 4
+model.layers.74.self_attn.q_proj: 4
+model.layers.74.self_attn.k_proj: 4
+model.layers.74.self_attn.v_proj: 4
+model.layers.74.self_attn.o_proj: 4
+model.layers.74.mlp.gate_proj: 4
+model.layers.74.mlp.up_proj: 4
+model.layers.74.mlp.down_proj: 4
+model.layers.75.self_attn.q_proj: 4
+model.layers.75.self_attn.k_proj: 4
+model.layers.75.self_attn.v_proj: 4
+model.layers.75.self_attn.o_proj: 4
+model.layers.75.mlp.gate_proj: 4
+model.layers.75.mlp.up_proj: 4
+model.layers.75.mlp.down_proj: 4
+model.layers.76.self_attn.q_proj: 4
+model.layers.76.self_attn.k_proj: 4
+model.layers.76.self_attn.v_proj: 4
+model.layers.76.self_attn.o_proj: 4
+model.layers.76.mlp.gate_proj: 4
+model.layers.76.mlp.up_proj: 4
+model.layers.76.mlp.down_proj: 4
+model.layers.77.self_attn.q_proj: 4
+model.layers.77.self_attn.k_proj: 4
+model.layers.77.self_attn.v_proj: 4
+model.layers.77.self_attn.o_proj: 4
+model.layers.77.mlp.gate_proj: 4
+model.layers.77.mlp.up_proj: 4
+model.layers.77.mlp.down_proj: 4
+model.layers.78.self_attn.q_proj: 4
+model.layers.78.self_attn.k_proj: 4
+model.layers.78.self_attn.v_proj: 4
+model.layers.78.self_attn.o_proj: 4
+model.layers.78.mlp.gate_proj: 4
+model.layers.78.mlp.up_proj: 4
+model.layers.78.mlp.down_proj: 4
+model.layers.79.self_attn.q_proj: 4
+model.layers.79.self_attn.k_proj: 4
+model.layers.79.self_attn.v_proj: 4
+model.layers.79.self_attn.o_proj: 4
+model.layers.79.mlp.gate_proj: 4
+model.layers.79.mlp.up_proj: 4
+model.layers.79.mlp.down_proj: 4
diff --git a/Llama-3.3-70B-Instruct/ll_4bit/tokenizer_config.json b/Llama-3.3-70B-Instruct/ll_4bit/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8213417e51fa3bca8617f3231d8e41d17e398214
--- /dev/null
+++ b/Llama-3.3-70B-Instruct/ll_4bit/tokenizer_config.json
@@ -0,0 +1,14 @@
+{
+ "backend": "tokenizers",
+ "bos_token": "<|begin_of_text|>",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "is_local": false,
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|finetune_right_pad_id|>",
+ "tokenizer_class": "TokenizersBackend"
+}
diff --git a/Qwen3-32B/ll_4bit/README.md b/Qwen3-32B/ll_4bit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69b948e165edf9328192beee7bbd9b0f12b987f6
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/README.md
@@ -0,0 +1,7 @@
+# Quantized Model Checkpoint
+
+**Base model:** unknown
+
+**Average bitwidth:** unknown
+
+See `quantization_config.txt` for full configuration details.
diff --git a/Qwen3-32B/ll_4bit/chat_template.jinja b/Qwen3-32B/ll_4bit/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..01be9b307daa2d425f7c168c9fb145a286e0afb4
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/chat_template.jinja
@@ -0,0 +1,89 @@
+{%- if tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if messages[0].role == 'system' %}
+ {{- messages[0].content + '\n\n' }}
+ {%- endif %}
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+ {%- if message.content is string %}
+ {%- set content = message.content %}
+ {%- else %}
+ {%- set content = '' %}
+ {%- endif %}
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {%- if loop.last or (not loop.last and reasoning_content) %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if (loop.first and content) or (not loop.first) %}
+ {{- '\n' }}
+ {%- endif %}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n{"name": "' }}
+ {{- tool_call.name }}
+ {{- '", "arguments": ' }}
+ {%- if tool_call.arguments is string %}
+ {{- tool_call.arguments }}
+ {%- else %}
+ {{- tool_call.arguments | tojson }}
+ {%- endif %}
+ {{- '}\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/Qwen3-32B/ll_4bit/config.json b/Qwen3-32B/ll_4bit/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..232e9bf5c353986af38d4bf9de4683158089b93c
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/config.json
@@ -0,0 +1,99 @@
+{
+ "architectures": [
+ "Qwen3ForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "dtype": "float16",
+ "eos_token_id": 151645,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 25600,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 40960,
+ "max_window_layers": 64,
+ "model_type": "qwen3",
+ "num_attention_heads": 64,
+ "num_hidden_layers": 64,
+ "num_key_value_heads": 8,
+ "pad_token_id": null,
+ "rms_norm_eps": 1e-06,
+ "rope_parameters": {
+ "rope_theta": 1000000,
+ "rope_type": "default"
+ },
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "transformers_version": "5.3.0",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
diff --git a/Qwen3-32B/ll_4bit/generation_config.json b/Qwen3-32B/ll_4bit/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cfb3c1a14c48fda643794324f7c126c3c83bcd3e
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "bos_token_id": 151643,
+ "do_sample": true,
+ "eos_token_id": [
+ 151645,
+ 151643
+ ],
+ "pad_token_id": 151643,
+ "temperature": 0.6,
+ "top_k": 20,
+ "top_p": 0.95,
+ "transformers_version": "5.3.0"
+}
diff --git a/Qwen3-32B/ll_4bit/humming_online_quant_config.json b/Qwen3-32B/ll_4bit/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..71f9c5cc25ef014276e683aa28267a8997891b97
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/humming_online_quant_config.json
@@ -0,0 +1,5 @@
+{
+ "quant_method": "gptq",
+ "bits": 4,
+ "group_size": 128
+}
\ No newline at end of file
diff --git a/Qwen3-32B/ll_4bit/model.safetensors.index.json b/Qwen3-32B/ll_4bit/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b2a0b459d8cf056d279cb6405e9ac0770396d01
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/model.safetensors.index.json
@@ -0,0 +1,715 @@
+{
+ "metadata": {
+ "total_parameters": 32762123264,
+ "total_size": 65524246528
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00001-of-00002.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.norm.weight": "model-00002-of-00002.safetensors"
+ }
+}
diff --git a/Qwen3-32B/ll_4bit/quantization_config.txt b/Qwen3-32B/ll_4bit/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb782a8e5e5dab52b3267a020d3128febc5738c8
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/quantization_config.txt
@@ -0,0 +1,280 @@
+model.layers.0.mlp.gate_proj: 4
+model.layers.0.mlp.up_proj: 4
+model.layers.0.self_attn.k_proj: 4
+model.layers.0.self_attn.q_proj: 4
+model.layers.0.self_attn.v_proj: 4
+model.layers.1.mlp.gate_proj: 4
+model.layers.1.mlp.up_proj: 4
+model.layers.1.self_attn.k_proj: 4
+model.layers.1.self_attn.q_proj: 4
+model.layers.1.self_attn.v_proj: 4
+model.layers.2.mlp.gate_proj: 4
+model.layers.2.mlp.up_proj: 4
+model.layers.2.self_attn.k_proj: 4
+model.layers.2.self_attn.q_proj: 4
+model.layers.2.self_attn.v_proj: 4
+model.layers.3.mlp.gate_proj: 4
+model.layers.3.mlp.up_proj: 4
+model.layers.3.self_attn.k_proj: 4
+model.layers.3.self_attn.q_proj: 4
+model.layers.3.self_attn.v_proj: 4
+model.layers.4.mlp.gate_proj: 4
+model.layers.4.mlp.up_proj: 4
+model.layers.4.self_attn.k_proj: 4
+model.layers.4.self_attn.q_proj: 4
+model.layers.4.self_attn.v_proj: 4
+model.layers.5.mlp.gate_proj: 4
+model.layers.5.mlp.up_proj: 4
+model.layers.5.self_attn.k_proj: 4
+model.layers.5.self_attn.q_proj: 4
+model.layers.5.self_attn.v_proj: 4
+model.layers.6.mlp.gate_proj: 4
+model.layers.6.mlp.up_proj: 4
+model.layers.6.self_attn.k_proj: 4
+model.layers.6.self_attn.q_proj: 4
+model.layers.6.self_attn.v_proj: 4
+model.layers.7.mlp.gate_proj: 4
+model.layers.7.mlp.up_proj: 4
+model.layers.7.self_attn.k_proj: 4
+model.layers.7.self_attn.q_proj: 4
+model.layers.7.self_attn.v_proj: 4
+model.layers.8.mlp.gate_proj: 4
+model.layers.8.mlp.up_proj: 4
+model.layers.8.self_attn.k_proj: 4
+model.layers.8.self_attn.q_proj: 4
+model.layers.8.self_attn.v_proj: 4
+model.layers.9.mlp.gate_proj: 4
+model.layers.9.mlp.up_proj: 4
+model.layers.9.self_attn.k_proj: 4
+model.layers.9.self_attn.q_proj: 4
+model.layers.9.self_attn.v_proj: 4
+model.layers.10.mlp.gate_proj: 4
+model.layers.10.mlp.up_proj: 4
+model.layers.10.self_attn.k_proj: 4
+model.layers.10.self_attn.q_proj: 4
+model.layers.10.self_attn.v_proj: 4
+model.layers.11.mlp.gate_proj: 4
+model.layers.11.mlp.up_proj: 4
+model.layers.11.self_attn.k_proj: 4
+model.layers.11.self_attn.q_proj: 4
+model.layers.11.self_attn.v_proj: 4
+model.layers.12.mlp.gate_proj: 4
+model.layers.12.mlp.up_proj: 4
+model.layers.12.self_attn.k_proj: 4
+model.layers.12.self_attn.q_proj: 4
+model.layers.12.self_attn.v_proj: 4
+model.layers.13.mlp.gate_proj: 4
+model.layers.13.mlp.up_proj: 4
+model.layers.13.self_attn.k_proj: 4
+model.layers.13.self_attn.q_proj: 4
+model.layers.13.self_attn.v_proj: 4
+model.layers.14.mlp.gate_proj: 4
+model.layers.14.mlp.up_proj: 4
+model.layers.14.self_attn.k_proj: 4
+model.layers.14.self_attn.q_proj: 4
+model.layers.14.self_attn.v_proj: 4
+model.layers.15.mlp.gate_proj: 4
+model.layers.15.mlp.up_proj: 4
+model.layers.15.self_attn.k_proj: 4
+model.layers.15.self_attn.q_proj: 4
+model.layers.15.self_attn.v_proj: 4
+model.layers.16.mlp.gate_proj: 4
+model.layers.16.mlp.up_proj: 4
+model.layers.16.self_attn.k_proj: 4
+model.layers.16.self_attn.q_proj: 4
+model.layers.16.self_attn.v_proj: 4
+model.layers.17.mlp.gate_proj: 4
+model.layers.17.mlp.up_proj: 4
+model.layers.17.self_attn.k_proj: 4
+model.layers.17.self_attn.q_proj: 4
+model.layers.17.self_attn.v_proj: 4
+model.layers.18.mlp.gate_proj: 4
+model.layers.18.mlp.up_proj: 4
+model.layers.18.self_attn.k_proj: 4
+model.layers.18.self_attn.q_proj: 4
+model.layers.18.self_attn.v_proj: 4
+model.layers.19.mlp.gate_proj: 4
+model.layers.19.mlp.up_proj: 4
+model.layers.19.self_attn.k_proj: 4
+model.layers.19.self_attn.q_proj: 4
+model.layers.19.self_attn.v_proj: 4
+model.layers.20.mlp.gate_proj: 4
+model.layers.20.mlp.up_proj: 4
+model.layers.20.self_attn.k_proj: 4
+model.layers.20.self_attn.q_proj: 4
+model.layers.20.self_attn.v_proj: 4
+model.layers.21.mlp.gate_proj: 4
+model.layers.21.mlp.up_proj: 4
+model.layers.21.self_attn.k_proj: 4
+model.layers.21.self_attn.q_proj: 4
+model.layers.21.self_attn.v_proj: 4
+model.layers.22.mlp.gate_proj: 4
+model.layers.22.mlp.up_proj: 4
+model.layers.22.self_attn.k_proj: 4
+model.layers.22.self_attn.q_proj: 4
+model.layers.22.self_attn.v_proj: 4
+model.layers.23.mlp.gate_proj: 4
+model.layers.23.mlp.up_proj: 4
+model.layers.23.self_attn.k_proj: 4
+model.layers.23.self_attn.q_proj: 4
+model.layers.23.self_attn.v_proj: 4
+model.layers.24.mlp.gate_proj: 4
+model.layers.24.mlp.up_proj: 4
+model.layers.24.self_attn.k_proj: 4
+model.layers.24.self_attn.q_proj: 4
+model.layers.24.self_attn.v_proj: 4
+model.layers.25.mlp.gate_proj: 4
+model.layers.25.mlp.up_proj: 4
+model.layers.25.self_attn.k_proj: 4
+model.layers.25.self_attn.q_proj: 4
+model.layers.25.self_attn.v_proj: 4
+model.layers.26.mlp.gate_proj: 4
+model.layers.26.mlp.up_proj: 4
+model.layers.26.self_attn.k_proj: 4
+model.layers.26.self_attn.q_proj: 4
+model.layers.26.self_attn.v_proj: 4
+model.layers.27.mlp.gate_proj: 4
+model.layers.27.mlp.up_proj: 4
+model.layers.27.self_attn.k_proj: 4
+model.layers.27.self_attn.q_proj: 4
+model.layers.27.self_attn.v_proj: 4
+model.layers.28.mlp.gate_proj: 4
+model.layers.28.mlp.up_proj: 4
+model.layers.28.self_attn.k_proj: 4
+model.layers.28.self_attn.q_proj: 4
+model.layers.28.self_attn.v_proj: 4
+model.layers.29.mlp.gate_proj: 4
+model.layers.29.mlp.up_proj: 4
+model.layers.29.self_attn.k_proj: 4
+model.layers.29.self_attn.q_proj: 4
+model.layers.29.self_attn.v_proj: 4
+model.layers.30.mlp.gate_proj: 4
+model.layers.30.mlp.up_proj: 4
+model.layers.30.self_attn.k_proj: 4
+model.layers.30.self_attn.q_proj: 4
+model.layers.30.self_attn.v_proj: 4
+model.layers.31.mlp.gate_proj: 4
+model.layers.31.mlp.up_proj: 4
+model.layers.31.self_attn.k_proj: 4
+model.layers.31.self_attn.q_proj: 4
+model.layers.31.self_attn.v_proj: 4
+model.layers.32.mlp.gate_proj: 4
+model.layers.32.mlp.up_proj: 4
+model.layers.32.self_attn.k_proj: 4
+model.layers.32.self_attn.q_proj: 4
+model.layers.32.self_attn.v_proj: 4
+model.layers.33.mlp.gate_proj: 4
+model.layers.33.mlp.up_proj: 4
+model.layers.33.self_attn.k_proj: 4
+model.layers.33.self_attn.q_proj: 4
+model.layers.33.self_attn.v_proj: 4
+model.layers.34.mlp.gate_proj: 4
+model.layers.34.mlp.up_proj: 4
+model.layers.34.self_attn.k_proj: 4
+model.layers.34.self_attn.q_proj: 4
+model.layers.34.self_attn.v_proj: 4
+model.layers.35.mlp.gate_proj: 4
+model.layers.35.mlp.up_proj: 4
+model.layers.35.self_attn.k_proj: 4
+model.layers.35.self_attn.q_proj: 4
+model.layers.35.self_attn.v_proj: 4
+model.layers.36.mlp.gate_proj: 4
+model.layers.36.mlp.up_proj: 4
+model.layers.36.self_attn.k_proj: 4
+model.layers.36.self_attn.q_proj: 4
+model.layers.36.self_attn.v_proj: 4
+model.layers.37.mlp.gate_proj: 4
+model.layers.37.mlp.up_proj: 4
+model.layers.37.self_attn.k_proj: 4
+model.layers.37.self_attn.q_proj: 4
+model.layers.37.self_attn.v_proj: 4
+model.layers.38.mlp.gate_proj: 4
+model.layers.38.mlp.up_proj: 4
+model.layers.38.self_attn.k_proj: 4
+model.layers.38.self_attn.q_proj: 4
+model.layers.38.self_attn.v_proj: 4
+model.layers.39.mlp.gate_proj: 4
+model.layers.39.mlp.up_proj: 4
+model.layers.39.self_attn.k_proj: 4
+model.layers.39.self_attn.q_proj: 4
+model.layers.39.self_attn.v_proj: 4
+model.layers.0.mlp.down_proj: 4
+model.layers.1.mlp.down_proj: 4
+model.layers.0.self_attn.o_proj: 4
+model.layers.2.self_attn.o_proj: 4
+model.layers.1.self_attn.o_proj: 4
+model.layers.29.mlp.down_proj: 4
+model.layers.21.mlp.down_proj: 4
+model.layers.3.self_attn.o_proj: 4
+model.layers.2.mlp.down_proj: 4
+model.layers.3.mlp.down_proj: 4
+model.layers.8.mlp.down_proj: 4
+model.layers.7.mlp.down_proj: 4
+model.layers.6.mlp.down_proj: 4
+model.layers.13.mlp.down_proj: 4
+model.layers.16.mlp.down_proj: 4
+model.layers.22.self_attn.o_proj: 4
+model.layers.38.mlp.down_proj: 4
+model.layers.16.self_attn.o_proj: 4
+model.layers.15.self_attn.o_proj: 4
+model.layers.7.self_attn.o_proj: 4
+model.layers.27.self_attn.o_proj: 4
+model.layers.28.mlp.down_proj: 4
+model.layers.5.self_attn.o_proj: 4
+model.layers.6.self_attn.o_proj: 4
+model.layers.12.mlp.down_proj: 4
+model.layers.23.self_attn.o_proj: 4
+model.layers.8.self_attn.o_proj: 4
+model.layers.19.self_attn.o_proj: 4
+model.layers.4.self_attn.o_proj: 4
+model.layers.4.mlp.down_proj: 4
+model.layers.5.mlp.down_proj: 4
+model.layers.9.mlp.down_proj: 4
+model.layers.9.self_attn.o_proj: 4
+model.layers.30.mlp.down_proj: 4
+model.layers.34.mlp.down_proj: 4
+model.layers.22.mlp.down_proj: 4
+model.layers.18.self_attn.o_proj: 4
+model.layers.20.self_attn.o_proj: 4
+model.layers.21.self_attn.o_proj: 4
+model.layers.38.self_attn.o_proj: 4
+model.layers.14.mlp.down_proj: 4
+model.layers.25.self_attn.o_proj: 4
+model.layers.11.mlp.down_proj: 4
+model.layers.10.self_attn.o_proj: 4
+model.layers.17.self_attn.o_proj: 4
+model.layers.34.self_attn.o_proj: 4
+model.layers.29.self_attn.o_proj: 4
+model.layers.39.self_attn.o_proj: 4
+model.layers.14.self_attn.o_proj: 4
+model.layers.20.mlp.down_proj: 4
+model.layers.32.mlp.down_proj: 4
+model.layers.37.self_attn.o_proj: 4
+model.layers.10.mlp.down_proj: 4
+model.layers.15.mlp.down_proj: 4
+model.layers.11.self_attn.o_proj: 4
+model.layers.12.self_attn.o_proj: 4
+model.layers.13.self_attn.o_proj: 4
+model.layers.18.mlp.down_proj: 4
+model.layers.31.mlp.down_proj: 4
+model.layers.32.self_attn.o_proj: 4
+model.layers.37.mlp.down_proj: 4
+model.layers.23.mlp.down_proj: 4
+model.layers.25.mlp.down_proj: 4
+model.layers.28.self_attn.o_proj: 4
+model.layers.24.mlp.down_proj: 4
+model.layers.17.mlp.down_proj: 4
+model.layers.19.mlp.down_proj: 4
+model.layers.26.self_attn.o_proj: 4
+model.layers.26.mlp.down_proj: 4
+model.layers.27.mlp.down_proj: 4
+model.layers.31.self_attn.o_proj: 4
+model.layers.24.self_attn.o_proj: 4
+model.layers.33.self_attn.o_proj: 4
+model.layers.30.self_attn.o_proj: 4
+model.layers.33.mlp.down_proj: 4
+model.layers.36.self_attn.o_proj: 4
+model.layers.39.mlp.down_proj: 4
+model.layers.35.mlp.down_proj: 4
+model.layers.36.mlp.down_proj: 4
+model.layers.35.self_attn.o_proj: 4
diff --git a/Qwen3-32B/ll_4bit/tokenizer_config.json b/Qwen3-32B/ll_4bit/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d75d3bb5300d205e48769cc1999073ab5971214
--- /dev/null
+++ b/Qwen3-32B/ll_4bit/tokenizer_config.json
@@ -0,0 +1,29 @@
+{
+ "add_prefix_space": false,
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "extra_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|object_ref_start|>",
+ "<|object_ref_end|>",
+ "<|box_start|>",
+ "<|box_end|>",
+ "<|quad_start|>",
+ "<|quad_end|>",
+ "<|vision_start|>",
+ "<|vision_end|>",
+ "<|vision_pad|>",
+ "<|image_pad|>",
+ "<|video_pad|>"
+ ],
+ "is_local": false,
+ "model_max_length": 131072,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}
diff --git a/Qwen3-8B/ll_4bit/README.md b/Qwen3-8B/ll_4bit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69b948e165edf9328192beee7bbd9b0f12b987f6
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/README.md
@@ -0,0 +1,7 @@
+# Quantized Model Checkpoint
+
+**Base model:** unknown
+
+**Average bitwidth:** unknown
+
+See `quantization_config.txt` for full configuration details.
diff --git a/Qwen3-8B/ll_4bit/chat_template.jinja b/Qwen3-8B/ll_4bit/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..01be9b307daa2d425f7c168c9fb145a286e0afb4
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/chat_template.jinja
@@ -0,0 +1,89 @@
+{%- if tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if messages[0].role == 'system' %}
+ {{- messages[0].content + '\n\n' }}
+ {%- endif %}
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+ {%- if message.content is string %}
+ {%- set content = message.content %}
+ {%- else %}
+ {%- set content = '' %}
+ {%- endif %}
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {%- if loop.last or (not loop.last and reasoning_content) %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if (loop.first and content) or (not loop.first) %}
+ {{- '\n' }}
+ {%- endif %}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n{"name": "' }}
+ {{- tool_call.name }}
+ {{- '", "arguments": ' }}
+ {%- if tool_call.arguments is string %}
+ {{- tool_call.arguments }}
+ {%- else %}
+ {{- tool_call.arguments | tojson }}
+ {%- endif %}
+ {{- '}\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/Qwen3-8B/ll_4bit/config.json b/Qwen3-8B/ll_4bit/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba589300345eda2a40e727e7bc0bdc65b087579b
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/config.json
@@ -0,0 +1,71 @@
+{
+ "architectures": [
+ "Qwen3ForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "dtype": "float16",
+ "eos_token_id": 151645,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 12288,
+ "layer_types": [
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention",
+ "full_attention"
+ ],
+ "max_position_embeddings": 40960,
+ "max_window_layers": 36,
+ "model_type": "qwen3",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 36,
+ "num_key_value_heads": 8,
+ "pad_token_id": null,
+ "rms_norm_eps": 1e-06,
+ "rope_parameters": {
+ "rope_theta": 1000000,
+ "rope_type": "default"
+ },
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "transformers_version": "5.3.0",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
diff --git a/Qwen3-8B/ll_4bit/generation_config.json b/Qwen3-8B/ll_4bit/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cfb3c1a14c48fda643794324f7c126c3c83bcd3e
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "bos_token_id": 151643,
+ "do_sample": true,
+ "eos_token_id": [
+ 151645,
+ 151643
+ ],
+ "pad_token_id": 151643,
+ "temperature": 0.6,
+ "top_k": 20,
+ "top_p": 0.95,
+ "transformers_version": "5.3.0"
+}
diff --git a/Qwen3-8B/ll_4bit/humming_online_quant_config.json b/Qwen3-8B/ll_4bit/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..71f9c5cc25ef014276e683aa28267a8997891b97
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/humming_online_quant_config.json
@@ -0,0 +1,5 @@
+{
+ "quant_method": "gptq",
+ "bits": 4,
+ "group_size": 128
+}
\ No newline at end of file
diff --git a/Qwen3-8B/ll_4bit/quantization_config.txt b/Qwen3-8B/ll_4bit/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ca0894605558e83f82a42fc72e0a9cd606d0222
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/quantization_config.txt
@@ -0,0 +1,252 @@
+model.layers.23.self_attn.k_proj: 4
+model.layers.22.self_attn.o_proj: 4
+model.layers.4.self_attn.o_proj: 4
+model.layers.1.mlp.down_proj: 4
+model.layers.11.mlp.up_proj: 4
+model.layers.5.self_attn.k_proj: 4
+model.layers.14.self_attn.q_proj: 4
+model.layers.17.mlp.down_proj: 4
+model.layers.33.mlp.gate_proj: 4
+model.layers.5.self_attn.v_proj: 4
+model.layers.23.self_attn.v_proj: 4
+model.layers.27.mlp.gate_proj: 4
+model.layers.2.mlp.up_proj: 4
+model.layers.15.self_attn.q_proj: 4
+model.layers.23.self_attn.o_proj: 4
+model.layers.21.mlp.up_proj: 4
+model.layers.22.self_attn.k_proj: 4
+model.layers.4.mlp.down_proj: 4
+model.layers.4.self_attn.k_proj: 4
+model.layers.5.self_attn.o_proj: 4
+model.layers.31.mlp.up_proj: 4
+model.layers.4.self_attn.v_proj: 4
+model.layers.22.self_attn.v_proj: 4
+model.layers.22.mlp.gate_proj: 4
+model.layers.8.mlp.gate_proj: 4
+model.layers.12.mlp.down_proj: 4
+model.layers.7.mlp.gate_proj: 4
+model.layers.35.self_attn.o_proj: 4
+model.layers.21.self_attn.v_proj: 4
+model.layers.34.self_attn.k_proj: 4
+model.layers.7.self_attn.v_proj: 4
+model.layers.7.self_attn.k_proj: 4
+model.layers.6.self_attn.o_proj: 4
+model.layers.34.self_attn.v_proj: 4
+model.layers.20.self_attn.o_proj: 4
+model.layers.13.mlp.up_proj: 4
+model.layers.21.self_attn.k_proj: 4
+model.layers.35.mlp.down_proj: 4
+model.layers.28.self_attn.q_proj: 4
+model.layers.11.mlp.gate_proj: 4
+model.layers.16.self_attn.q_proj: 4
+model.layers.21.mlp.down_proj: 4
+model.layers.35.self_attn.k_proj: 4
+model.layers.33.mlp.up_proj: 4
+model.layers.20.self_attn.v_proj: 4
+model.layers.34.self_attn.o_proj: 4
+model.layers.6.self_attn.v_proj: 4
+model.layers.28.mlp.gate_proj: 4
+model.layers.2.mlp.gate_proj: 4
+model.layers.18.mlp.down_proj: 4
+model.layers.17.self_attn.q_proj: 4
+model.layers.0.mlp.up_proj: 4
+model.layers.24.mlp.down_proj: 4
+model.layers.23.mlp.up_proj: 4
+model.layers.7.self_attn.o_proj: 4
+model.layers.6.self_attn.k_proj: 4
+model.layers.29.self_attn.q_proj: 4
+model.layers.20.self_attn.k_proj: 4
+model.layers.30.mlp.down_proj: 4
+model.layers.14.mlp.gate_proj: 4
+model.layers.21.self_attn.o_proj: 4
+model.layers.35.self_attn.v_proj: 4
+model.layers.6.mlp.up_proj: 4
+model.layers.9.self_attn.q_proj: 4
+model.layers.0.self_attn.k_proj: 4
+model.layers.9.mlp.up_proj: 4
+model.layers.1.self_attn.o_proj: 4
+model.layers.33.self_attn.v_proj: 4
+model.layers.27.self_attn.o_proj: 4
+model.layers.26.self_attn.k_proj: 4
+model.layers.19.self_attn.o_proj: 4
+model.layers.0.mlp.gate_proj: 4
+model.layers.11.self_attn.q_proj: 4
+model.layers.18.self_attn.k_proj: 4
+model.layers.25.mlp.up_proj: 4
+model.layers.26.mlp.down_proj: 4
+model.layers.18.self_attn.v_proj: 4
+model.layers.35.mlp.up_proj: 4
+model.layers.32.self_attn.o_proj: 4
+model.layers.26.self_attn.v_proj: 4
+model.layers.33.self_attn.k_proj: 4
+model.layers.32.mlp.down_proj: 4
+model.layers.16.mlp.gate_proj: 4
+model.layers.0.self_attn.v_proj: 4
+model.layers.15.mlp.up_proj: 4
+model.layers.19.self_attn.k_proj: 4
+model.layers.10.self_attn.q_proj: 4
+model.layers.5.mlp.gate_proj: 4
+model.layers.18.self_attn.o_proj: 4
+model.layers.0.self_attn.o_proj: 4
+model.layers.1.self_attn.k_proj: 4
+model.layers.8.self_attn.q_proj: 4
+model.layers.27.self_attn.k_proj: 4
+model.layers.26.self_attn.o_proj: 4
+model.layers.32.self_attn.v_proj: 4
+model.layers.9.mlp.down_proj: 4
+model.layers.32.self_attn.k_proj: 4
+model.layers.13.mlp.gate_proj: 4
+model.layers.27.self_attn.v_proj: 4
+model.layers.33.self_attn.o_proj: 4
+model.layers.1.self_attn.v_proj: 4
+model.layers.23.mlp.down_proj: 4
+model.layers.19.self_attn.v_proj: 4
+model.layers.2.self_attn.v_proj: 4
+model.layers.6.mlp.down_proj: 4
+model.layers.31.self_attn.k_proj: 4
+model.layers.24.self_attn.v_proj: 4
+model.layers.30.self_attn.o_proj: 4
+model.layers.24.self_attn.k_proj: 4
+model.layers.4.mlp.up_proj: 4
+model.layers.25.self_attn.o_proj: 4
+model.layers.31.self_attn.v_proj: 4
+model.layers.20.mlp.gate_proj: 4
+model.layers.3.self_attn.o_proj: 4
+model.layers.2.self_attn.k_proj: 4
+model.layers.10.mlp.down_proj: 4
+model.layers.34.mlp.gate_proj: 4
+model.layers.13.self_attn.q_proj: 4
+model.layers.28.mlp.up_proj: 4
+model.layers.27.mlp.up_proj: 4
+model.layers.3.mlp.down_proj: 4
+model.layers.3.self_attn.v_proj: 4
+model.layers.19.mlp.gate_proj: 4
+model.layers.31.self_attn.o_proj: 4
+model.layers.25.self_attn.v_proj: 4
+model.layers.30.self_attn.k_proj: 4
+model.layers.29.mlp.down_proj: 4
+model.layers.18.mlp.up_proj: 4
+model.layers.15.mlp.down_proj: 4
+model.layers.17.mlp.up_proj: 4
+model.layers.31.mlp.gate_proj: 4
+model.layers.12.self_attn.q_proj: 4
+model.layers.30.self_attn.v_proj: 4
+model.layers.24.self_attn.o_proj: 4
+model.layers.25.self_attn.k_proj: 4
+model.layers.25.mlp.gate_proj: 4
+model.layers.3.self_attn.k_proj: 4
+model.layers.2.self_attn.o_proj: 4
+model.layers.2.mlp.down_proj: 4
+model.layers.16.self_attn.o_proj: 4
+model.layers.18.mlp.gate_proj: 4
+model.layers.17.self_attn.k_proj: 4
+model.layers.6.self_attn.q_proj: 4
+model.layers.28.mlp.down_proj: 4
+model.layers.28.self_attn.o_proj: 4
+model.layers.20.self_attn.q_proj: 4
+model.layers.29.self_attn.k_proj: 4
+model.layers.29.self_attn.v_proj: 4
+model.layers.14.mlp.down_proj: 4
+model.layers.35.self_attn.q_proj: 4
+model.layers.30.mlp.gate_proj: 4
+model.layers.24.mlp.gate_proj: 4
+model.layers.14.mlp.up_proj: 4
+model.layers.17.self_attn.v_proj: 4
+model.layers.7.self_attn.q_proj: 4
+model.layers.28.self_attn.k_proj: 4
+model.layers.21.self_attn.q_proj: 4
+model.layers.29.self_attn.o_proj: 4
+model.layers.7.mlp.down_proj: 4
+model.layers.16.self_attn.k_proj: 4
+model.layers.17.self_attn.o_proj: 4
+model.layers.34.mlp.up_proj: 4
+model.layers.21.mlp.gate_proj: 4
+model.layers.16.self_attn.v_proj: 4
+model.layers.24.mlp.up_proj: 4
+model.layers.34.self_attn.q_proj: 4
+model.layers.8.mlp.up_proj: 4
+model.layers.11.mlp.down_proj: 4
+model.layers.7.mlp.up_proj: 4
+model.layers.35.mlp.gate_proj: 4
+model.layers.28.self_attn.v_proj: 4
+model.layers.4.mlp.gate_proj: 4
+model.layers.16.mlp.up_proj: 4
+model.layers.15.self_attn.v_proj: 4
+model.layers.19.mlp.up_proj: 4
+model.layers.8.mlp.down_proj: 4
+model.layers.12.mlp.gate_proj: 4
+model.layers.15.self_attn.k_proj: 4
+model.layers.14.self_attn.o_proj: 4
+model.layers.22.self_attn.q_proj: 4
+model.layers.22.mlp.down_proj: 4
+model.layers.4.self_attn.q_proj: 4
+model.layers.14.self_attn.v_proj: 4
+model.layers.26.mlp.up_proj: 4
+model.layers.29.mlp.up_proj: 4
+model.layers.5.mlp.up_proj: 4
+model.layers.1.mlp.gate_proj: 4
+model.layers.27.mlp.down_proj: 4
+model.layers.23.self_attn.q_proj: 4
+model.layers.5.self_attn.q_proj: 4
+model.layers.33.mlp.down_proj: 4
+model.layers.17.mlp.gate_proj: 4
+model.layers.15.self_attn.o_proj: 4
+model.layers.14.self_attn.k_proj: 4
+model.layers.12.self_attn.k_proj: 4
+model.layers.13.self_attn.o_proj: 4
+model.layers.29.mlp.gate_proj: 4
+model.layers.25.self_attn.q_proj: 4
+model.layers.30.mlp.up_proj: 4
+model.layers.3.mlp.gate_proj: 4
+model.layers.19.mlp.down_proj: 4
+model.layers.3.self_attn.q_proj: 4
+model.layers.20.mlp.up_proj: 4
+model.layers.30.self_attn.q_proj: 4
+model.layers.25.mlp.down_proj: 4
+model.layers.12.self_attn.v_proj: 4
+model.layers.3.mlp.up_proj: 4
+model.layers.31.mlp.down_proj: 4
+model.layers.15.mlp.gate_proj: 4
+model.layers.24.self_attn.q_proj: 4
+model.layers.2.self_attn.q_proj: 4
+model.layers.6.mlp.gate_proj: 4
+model.layers.12.self_attn.o_proj: 4
+model.layers.13.self_attn.k_proj: 4
+model.layers.13.self_attn.v_proj: 4
+model.layers.34.mlp.down_proj: 4
+model.layers.10.mlp.gate_proj: 4
+model.layers.10.mlp.up_proj: 4
+model.layers.20.mlp.down_proj: 4
+model.layers.31.self_attn.q_proj: 4
+model.layers.22.mlp.up_proj: 4
+model.layers.32.self_attn.q_proj: 4
+model.layers.8.self_attn.v_proj: 4
+model.layers.5.mlp.down_proj: 4
+model.layers.10.self_attn.v_proj: 4
+model.layers.1.mlp.up_proj: 4
+model.layers.11.self_attn.o_proj: 4
+model.layers.10.self_attn.k_proj: 4
+model.layers.19.self_attn.q_proj: 4
+model.layers.23.mlp.gate_proj: 4
+model.layers.8.self_attn.k_proj: 4
+model.layers.32.mlp.up_proj: 4
+model.layers.1.self_attn.q_proj: 4
+model.layers.9.self_attn.o_proj: 4
+model.layers.9.mlp.gate_proj: 4
+model.layers.13.mlp.down_proj: 4
+model.layers.27.self_attn.q_proj: 4
+model.layers.0.mlp.down_proj: 4
+model.layers.11.self_attn.v_proj: 4
+model.layers.33.self_attn.q_proj: 4
+model.layers.9.self_attn.v_proj: 4
+model.layers.12.mlp.up_proj: 4
+model.layers.8.self_attn.o_proj: 4
+model.layers.0.self_attn.q_proj: 4
+model.layers.9.self_attn.k_proj: 4
+model.layers.26.self_attn.q_proj: 4
+model.layers.16.mlp.down_proj: 4
+model.layers.32.mlp.gate_proj: 4
+model.layers.18.self_attn.q_proj: 4
+model.layers.11.self_attn.k_proj: 4
+model.layers.10.self_attn.o_proj: 4
+model.layers.26.mlp.gate_proj: 4
diff --git a/Qwen3-8B/ll_4bit/tokenizer_config.json b/Qwen3-8B/ll_4bit/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d75d3bb5300d205e48769cc1999073ab5971214
--- /dev/null
+++ b/Qwen3-8B/ll_4bit/tokenizer_config.json
@@ -0,0 +1,29 @@
+{
+ "add_prefix_space": false,
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "extra_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|object_ref_start|>",
+ "<|object_ref_end|>",
+ "<|box_start|>",
+ "<|box_end|>",
+ "<|quad_start|>",
+ "<|quad_end|>",
+ "<|vision_start|>",
+ "<|vision_end|>",
+ "<|vision_pad|>",
+ "<|image_pad|>",
+ "<|video_pad|>"
+ ],
+ "is_local": false,
+ "model_max_length": 131072,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}
diff --git a/Qwen3.5-27B/ll_4bit/README.md b/Qwen3.5-27B/ll_4bit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69b948e165edf9328192beee7bbd9b0f12b987f6
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/README.md
@@ -0,0 +1,7 @@
+# Quantized Model Checkpoint
+
+**Base model:** unknown
+
+**Average bitwidth:** unknown
+
+See `quantization_config.txt` for full configuration details.
diff --git a/Qwen3.5-27B/ll_4bit/chat_template.jinja b/Qwen3.5-27B/ll_4bit/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a585dec894e63da457d9440ec6aa7caa16d20860
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_4bit/config.json b/Qwen3.5-27B/ll_4bit/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c0fc6f0ed2ea5d07e1c36677cfab84cd8f27351
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/config.json
@@ -0,0 +1,138 @@
+{
+ "vision_start_token_id": 248053,
+ "video_token_id": 248057,
+ "image_token_id": 248056,
+ "architectures": [
+ "Qwen3_5ForConditionalGeneration"
+ ],
+ "model_type": "qwen3_5",
+ "vision_end_token_id": 248054,
+ "tie_word_embeddings": false,
+ "vision_config": {
+ "deepstack_visual_indexes": [],
+ "depth": 27,
+ "hidden_act": "gelu_pytorch_tanh",
+ "hidden_size": 1152,
+ "in_channels": 3,
+ "initializer_range": 0.02,
+ "intermediate_size": 4304,
+ "model_type": "qwen3_5",
+ "num_heads": 16,
+ "num_position_embeddings": 2304,
+ "out_hidden_size": 5120,
+ "patch_size": 16,
+ "spatial_merge_size": 2,
+ "temporal_patch_size": 2
+ },
+ "transformers_version": "4.57.0.dev0",
+ "text_config": {
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "attn_output_gate": true,
+ "bos_token_id": null,
+ "dtype": "float16",
+ "eos_token_id": 248044,
+ "full_attention_interval": 4,
+ "head_dim": 256,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 17408,
+ "layer_types": [
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention"
+ ],
+ "linear_conv_kernel_dim": 4,
+ "linear_key_head_dim": 128,
+ "linear_num_key_heads": 16,
+ "linear_num_value_heads": 48,
+ "linear_value_head_dim": 128,
+ "mamba_ssm_dtype": "float32",
+ "max_position_embeddings": 262144,
+ "mlp_only_layers": [],
+ "mtp_num_hidden_layers": 1,
+ "mtp_use_dedicated_embeddings": false,
+ "num_attention_heads": 24,
+ "num_hidden_layers": 64,
+ "num_key_value_heads": 4,
+ "pad_token_id": null,
+ "partial_rotary_factor": 0.25,
+ "rms_norm_eps": 1e-06,
+ "rope_parameters": {
+ "mrope_interleaved": true,
+ "mrope_section": [
+ 11,
+ 11,
+ 10
+ ],
+ "partial_rotary_factor": 0.25,
+ "rope_theta": 10000000,
+ "rope_type": "default"
+ },
+ "use_cache": false,
+ "vocab_size": 248320,
+ "model_type": "qwen3_5_text"
+ }
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_4bit/generation_config.json b/Qwen3.5-27B/ll_4bit/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1068c09fbcc050fcccf2066dda235127d9bad05e
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "bos_token_id": 248044,
+ "do_sample": true,
+ "eos_token_id": [
+ 248046,
+ 248044
+ ],
+ "pad_token_id": 248044,
+ "temperature": 0.6,
+ "top_k": 20,
+ "top_p": 0.95,
+ "transformers_version": "5.3.0"
+}
diff --git a/Qwen3.5-27B/ll_4bit/humming_online_quant_config.json b/Qwen3.5-27B/ll_4bit/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..71f9c5cc25ef014276e683aa28267a8997891b97
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/humming_online_quant_config.json
@@ -0,0 +1,5 @@
+{
+ "quant_method": "gptq",
+ "bits": 4,
+ "group_size": 128
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_4bit/model-00001-of-00002.safetensors b/Qwen3.5-27B/ll_4bit/model-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa3f7b261cfa8a98c4312a2547101fdd5d33c491
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710d2e34d9313fe8c77e9dbb1425d9d61560271c23c509c4fa4c1cc168b859d7
+size 49825162192
diff --git a/Qwen3.5-27B/ll_4bit/model.safetensors.index.json b/Qwen3.5-27B/ll_4bit/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..70fe4d0836094f2d2b8dd7c40966b8baa44d5114
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/model.safetensors.index.json
@@ -0,0 +1,859 @@
+{
+ "metadata": {
+ "total_parameters": 26895998464,
+ "total_size": 53791996928
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.58.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.60.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors"
+ }
+}
diff --git a/Qwen3.5-27B/ll_4bit/preprocessor_config.json b/Qwen3.5-27B/ll_4bit/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ea84a437d448ff71b08df68fdd949d5cc4ebb64
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/preprocessor_config.json
@@ -0,0 +1,21 @@
+{
+ "size": {
+ "longest_edge": 16777216,
+ "shortest_edge": 65536
+ },
+ "patch_size": 16,
+ "temporal_patch_size": 2,
+ "merge_size": 2,
+ "image_mean": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "image_std": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "processor_class": "Qwen3VLProcessor",
+ "image_processor_type": "Qwen2VLImageProcessorFast"
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_4bit/quantization_config.txt b/Qwen3.5-27B/ll_4bit/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dc784298206b6747d4776f10db7482e6556328c
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/quantization_config.txt
@@ -0,0 +1,432 @@
+model.layers.0.mlp.gate_proj: 4
+model.layers.0.mlp.up_proj: 4
+model.layers.1.mlp.gate_proj: 4
+model.layers.1.mlp.up_proj: 4
+model.layers.2.mlp.gate_proj: 4
+model.layers.2.mlp.up_proj: 4
+model.layers.3.mlp.gate_proj: 4
+model.layers.3.mlp.up_proj: 4
+model.layers.4.mlp.gate_proj: 4
+model.layers.4.mlp.up_proj: 4
+model.layers.5.mlp.gate_proj: 4
+model.layers.5.mlp.up_proj: 4
+model.layers.6.mlp.gate_proj: 4
+model.layers.6.mlp.up_proj: 4
+model.layers.7.mlp.gate_proj: 4
+model.layers.7.mlp.up_proj: 4
+model.layers.8.mlp.gate_proj: 4
+model.layers.8.mlp.up_proj: 4
+model.layers.9.mlp.gate_proj: 4
+model.layers.9.mlp.up_proj: 4
+model.layers.10.mlp.gate_proj: 4
+model.layers.10.mlp.up_proj: 4
+model.layers.11.mlp.gate_proj: 4
+model.layers.11.mlp.up_proj: 4
+model.layers.12.mlp.gate_proj: 4
+model.layers.12.mlp.up_proj: 4
+model.layers.13.mlp.gate_proj: 4
+model.layers.13.mlp.up_proj: 4
+model.layers.14.mlp.gate_proj: 4
+model.layers.14.mlp.up_proj: 4
+model.layers.15.mlp.gate_proj: 4
+model.layers.15.mlp.up_proj: 4
+model.layers.16.mlp.gate_proj: 4
+model.layers.16.mlp.up_proj: 4
+model.layers.17.mlp.gate_proj: 4
+model.layers.17.mlp.up_proj: 4
+model.layers.18.mlp.gate_proj: 4
+model.layers.18.mlp.up_proj: 4
+model.layers.19.mlp.gate_proj: 4
+model.layers.19.mlp.up_proj: 4
+model.layers.20.mlp.gate_proj: 4
+model.layers.20.mlp.up_proj: 4
+model.layers.21.mlp.gate_proj: 4
+model.layers.21.mlp.up_proj: 4
+model.layers.22.mlp.gate_proj: 4
+model.layers.22.mlp.up_proj: 4
+model.layers.23.mlp.gate_proj: 4
+model.layers.23.mlp.up_proj: 4
+model.layers.24.mlp.gate_proj: 4
+model.layers.24.mlp.up_proj: 4
+model.layers.25.mlp.gate_proj: 4
+model.layers.25.mlp.up_proj: 4
+model.layers.26.mlp.gate_proj: 4
+model.layers.26.mlp.up_proj: 4
+model.layers.27.mlp.gate_proj: 4
+model.layers.27.mlp.up_proj: 4
+model.layers.28.mlp.gate_proj: 4
+model.layers.28.mlp.up_proj: 4
+model.layers.29.mlp.gate_proj: 4
+model.layers.29.mlp.up_proj: 4
+model.layers.30.mlp.gate_proj: 4
+model.layers.30.mlp.up_proj: 4
+model.layers.31.mlp.gate_proj: 4
+model.layers.31.mlp.up_proj: 4
+model.layers.32.mlp.gate_proj: 4
+model.layers.32.mlp.up_proj: 4
+model.layers.33.mlp.gate_proj: 4
+model.layers.33.mlp.up_proj: 4
+model.layers.34.mlp.gate_proj: 4
+model.layers.34.mlp.up_proj: 4
+model.layers.35.mlp.gate_proj: 4
+model.layers.35.mlp.up_proj: 4
+model.layers.36.mlp.gate_proj: 4
+model.layers.36.mlp.up_proj: 4
+model.layers.37.mlp.gate_proj: 4
+model.layers.37.mlp.up_proj: 4
+model.layers.38.mlp.gate_proj: 4
+model.layers.38.mlp.up_proj: 4
+model.layers.39.mlp.gate_proj: 4
+model.layers.39.mlp.up_proj: 4
+model.layers.40.mlp.gate_proj: 4
+model.layers.40.mlp.up_proj: 4
+model.layers.41.mlp.gate_proj: 4
+model.layers.41.mlp.up_proj: 4
+model.layers.42.mlp.gate_proj: 4
+model.layers.42.mlp.up_proj: 4
+model.layers.43.mlp.gate_proj: 4
+model.layers.43.mlp.up_proj: 4
+model.layers.44.mlp.gate_proj: 4
+model.layers.44.mlp.up_proj: 4
+model.layers.45.mlp.gate_proj: 4
+model.layers.45.mlp.up_proj: 4
+model.layers.46.mlp.gate_proj: 4
+model.layers.46.mlp.up_proj: 4
+model.layers.47.mlp.gate_proj: 4
+model.layers.47.mlp.up_proj: 4
+model.layers.48.mlp.gate_proj: 4
+model.layers.48.mlp.up_proj: 4
+model.layers.49.mlp.gate_proj: 4
+model.layers.49.mlp.up_proj: 4
+model.layers.50.mlp.gate_proj: 4
+model.layers.50.mlp.up_proj: 4
+model.layers.51.mlp.gate_proj: 4
+model.layers.51.mlp.up_proj: 4
+model.layers.52.mlp.gate_proj: 4
+model.layers.52.mlp.up_proj: 4
+model.layers.53.mlp.gate_proj: 4
+model.layers.53.mlp.up_proj: 4
+model.layers.54.mlp.gate_proj: 4
+model.layers.54.mlp.up_proj: 4
+model.layers.55.mlp.gate_proj: 4
+model.layers.55.mlp.up_proj: 4
+model.layers.56.mlp.gate_proj: 4
+model.layers.56.mlp.up_proj: 4
+model.layers.57.mlp.gate_proj: 4
+model.layers.57.mlp.up_proj: 4
+model.layers.58.mlp.gate_proj: 4
+model.layers.58.mlp.up_proj: 4
+model.layers.59.mlp.gate_proj: 4
+model.layers.59.mlp.up_proj: 4
+model.layers.60.mlp.gate_proj: 4
+model.layers.60.mlp.up_proj: 4
+model.layers.61.mlp.gate_proj: 4
+model.layers.61.mlp.up_proj: 4
+model.layers.62.mlp.gate_proj: 4
+model.layers.62.mlp.up_proj: 4
+model.layers.63.mlp.gate_proj: 4
+model.layers.63.mlp.up_proj: 4
+model.layers.61.linear_attn.in_proj_b: 4
+model.layers.36.linear_attn.in_proj_qkv: 4
+model.layers.60.linear_attn.in_proj_a: 4
+model.layers.60.linear_attn.out_proj: 4
+model.layers.54.linear_attn.in_proj_z: 4
+model.layers.12.linear_attn.in_proj_qkv: 4
+model.layers.56.linear_attn.in_proj_b: 4
+model.layers.9.linear_attn.in_proj_z: 4
+model.layers.57.linear_attn.in_proj_a: 4
+model.layers.52.linear_attn.in_proj_z: 4
+model.layers.50.linear_attn.in_proj_b: 4
+model.layers.52.mlp.down_proj: 4
+model.layers.27.mlp.down_proj: 4
+model.layers.3.mlp.down_proj: 4
+model.layers.4.linear_attn.in_proj_qkv: 4
+model.layers.38.linear_attn.in_proj_b: 4
+model.layers.46.mlp.down_proj: 4
+model.layers.26.linear_attn.in_proj_qkv: 4
+model.layers.33.mlp.down_proj: 4
+model.layers.33.linear_attn.out_proj: 4
+model.layers.22.linear_attn.in_proj_qkv: 4
+model.layers.1.linear_attn.in_proj_b: 4
+model.layers.58.linear_attn.out_proj: 4
+model.layers.0.linear_attn.in_proj_qkv: 4
+model.layers.0.linear_attn.in_proj_a: 4
+model.layers.42.linear_attn.out_proj: 4
+model.layers.34.linear_attn.in_proj_b: 4
+model.layers.29.linear_attn.out_proj: 4
+model.layers.36.linear_attn.in_proj_z: 4
+model.layers.22.linear_attn.out_proj: 4
+model.layers.6.mlp.down_proj: 4
+model.layers.33.linear_attn.in_proj_a: 4
+model.layers.43.mlp.down_proj: 4
+model.layers.49.linear_attn.out_proj: 4
+model.layers.32.linear_attn.in_proj_b: 4
+model.layers.16.linear_attn.in_proj_qkv: 4
+model.layers.30.linear_attn.in_proj_z: 4
+model.layers.36.mlp.down_proj: 4
+model.layers.5.linear_attn.in_proj_z: 4
+model.layers.57.mlp.down_proj: 4
+model.layers.32.linear_attn.in_proj_qkv: 4
+model.layers.53.linear_attn.out_proj: 4
+model.layers.6.linear_attn.in_proj_a: 4
+model.layers.58.linear_attn.in_proj_z: 4
+model.layers.38.linear_attn.out_proj: 4
+model.layers.22.mlp.down_proj: 4
+model.layers.20.linear_attn.in_proj_z: 4
+model.layers.58.mlp.down_proj: 4
+model.layers.22.linear_attn.in_proj_b: 4
+model.layers.62.linear_attn.in_proj_qkv: 4
+model.layers.9.mlp.down_proj: 4
+model.layers.48.linear_attn.in_proj_z: 4
+model.layers.17.linear_attn.in_proj_z: 4
+model.layers.46.linear_attn.in_proj_qkv: 4
+model.layers.24.linear_attn.out_proj: 4
+model.layers.14.linear_attn.in_proj_a: 4
+model.layers.39.mlp.down_proj: 4
+model.layers.13.linear_attn.in_proj_b: 4
+model.layers.12.linear_attn.in_proj_a: 4
+model.layers.56.linear_attn.in_proj_qkv: 4
+model.layers.44.linear_attn.out_proj: 4
+model.layers.26.linear_attn.in_proj_z: 4
+model.layers.8.linear_attn.in_proj_qkv: 4
+model.layers.24.linear_attn.in_proj_b: 4
+model.layers.25.linear_attn.in_proj_a: 4
+model.layers.11.mlp.down_proj: 4
+model.layers.49.mlp.down_proj: 4
+model.layers.41.linear_attn.in_proj_a: 4
+model.layers.40.linear_attn.in_proj_b: 4
+model.layers.42.linear_attn.in_proj_z: 4
+model.layers.29.linear_attn.in_proj_a: 4
+model.layers.52.linear_attn.in_proj_qkv: 4
+model.layers.28.linear_attn.in_proj_b: 4
+model.layers.28.mlp.down_proj: 4
+model.layers.61.mlp.down_proj: 4
+model.layers.42.linear_attn.in_proj_qkv: 4
+model.layers.1.linear_attn.out_proj: 4
+model.layers.14.mlp.down_proj: 4
+model.layers.18.linear_attn.in_proj_a: 4
+model.layers.46.linear_attn.in_proj_b: 4
+model.layers.44.linear_attn.in_proj_z: 4
+model.layers.17.linear_attn.out_proj: 4
+model.layers.57.linear_attn.in_proj_b: 4
+model.layers.56.linear_attn.in_proj_a: 4
+model.layers.29.linear_attn.in_proj_qkv: 4
+model.layers.8.linear_attn.in_proj_z: 4
+model.layers.62.linear_attn.in_proj_z: 4
+model.layers.60.linear_attn.in_proj_b: 4
+model.layers.61.linear_attn.in_proj_a: 4
+model.layers.16.mlp.down_proj: 4
+model.layers.61.linear_attn.out_proj: 4
+model.layers.6.linear_attn.out_proj: 4
+model.layers.38.linear_attn.in_proj_a: 4
+model.layers.45.linear_attn.in_proj_qkv: 4
+model.layers.63.mlp.down_proj: 4
+model.layers.10.linear_attn.out_proj: 4
+model.layers.53.linear_attn.in_proj_z: 4
+model.layers.61.linear_attn.in_proj_qkv: 4
+model.layers.50.linear_attn.in_proj_a: 4
+model.layers.52.linear_attn.out_proj: 4
+model.layers.34.linear_attn.in_proj_a: 4
+model.layers.37.linear_attn.in_proj_z: 4
+model.layers.0.linear_attn.in_proj_b: 4
+model.layers.2.linear_attn.in_proj_z: 4
+model.layers.41.linear_attn.in_proj_qkv: 4
+model.layers.1.linear_attn.in_proj_a: 4
+model.layers.48.linear_attn.out_proj: 4
+model.layers.6.linear_attn.in_proj_b: 4
+model.layers.4.linear_attn.in_proj_z: 4
+model.layers.28.linear_attn.out_proj: 4
+model.layers.32.linear_attn.out_proj: 4
+model.layers.13.mlp.down_proj: 4
+model.layers.32.linear_attn.in_proj_a: 4
+model.layers.33.linear_attn.in_proj_b: 4
+model.layers.16.linear_attn.in_proj_z: 4
+model.layers.14.linear_attn.in_proj_b: 4
+model.layers.49.linear_attn.in_proj_z: 4
+model.layers.25.linear_attn.in_proj_qkv: 4
+model.layers.34.linear_attn.out_proj: 4
+model.layers.21.linear_attn.in_proj_z: 4
+model.layers.22.linear_attn.in_proj_a: 4
+model.layers.45.linear_attn.out_proj: 4
+model.layers.34.mlp.down_proj: 4
+model.layers.25.linear_attn.in_proj_b: 4
+model.layers.49.linear_attn.in_proj_qkv: 4
+model.layers.4.mlp.down_proj: 4
+model.layers.41.mlp.down_proj: 4
+model.layers.24.linear_attn.in_proj_a: 4
+model.layers.25.linear_attn.out_proj: 4
+model.layers.10.linear_attn.in_proj_z: 4
+model.layers.12.linear_attn.in_proj_b: 4
+model.layers.20.mlp.down_proj: 4
+model.layers.54.linear_attn.out_proj: 4
+model.layers.55.mlp.down_proj: 4
+model.layers.13.linear_attn.in_proj_a: 4
+model.layers.28.linear_attn.in_proj_a: 4
+model.layers.16.linear_attn.out_proj: 4
+model.layers.29.linear_attn.in_proj_b: 4
+model.layers.0.linear_attn.out_proj: 4
+model.layers.40.linear_attn.in_proj_a: 4
+model.layers.19.mlp.down_proj: 4
+model.layers.41.linear_attn.in_proj_b: 4
+model.layers.25.mlp.down_proj: 4
+model.layers.46.linear_attn.in_proj_a: 4
+model.layers.18.linear_attn.in_proj_b: 4
+model.layers.50.mlp.down_proj: 4
+model.layers.45.linear_attn.in_proj_z: 4
+model.layers.21.linear_attn.in_proj_qkv: 4
+model.layers.31.mlp.down_proj: 4
+model.layers.1.mlp.down_proj: 4
+model.layers.44.mlp.down_proj: 4
+model.layers.57.linear_attn.in_proj_qkv: 4
+model.layers.53.linear_attn.in_proj_a: 4
+model.layers.52.linear_attn.in_proj_b: 4
+model.layers.50.linear_attn.in_proj_z: 4
+model.layers.20.linear_attn.out_proj: 4
+model.layers.18.mlp.down_proj: 4
+model.layers.9.linear_attn.in_proj_qkv: 4
+model.layers.38.linear_attn.in_proj_z: 4
+model.layers.62.linear_attn.in_proj_a: 4
+model.layers.24.mlp.down_proj: 4
+model.layers.51.mlp.down_proj: 4
+model.layers.61.linear_attn.in_proj_z: 4
+model.layers.9.linear_attn.in_proj_b: 4
+model.layers.30.mlp.down_proj: 4
+model.layers.0.mlp.down_proj: 4
+model.layers.8.linear_attn.in_proj_a: 4
+model.layers.40.linear_attn.out_proj: 4
+model.layers.54.linear_attn.in_proj_b: 4
+model.layers.45.mlp.down_proj: 4
+model.layers.56.linear_attn.in_proj_z: 4
+model.layers.32.linear_attn.in_proj_z: 4
+model.layers.30.linear_attn.in_proj_b: 4
+model.layers.4.linear_attn.in_proj_a: 4
+model.layers.18.linear_attn.out_proj: 4
+model.layers.58.linear_attn.in_proj_b: 4
+model.layers.5.linear_attn.in_proj_b: 4
+model.layers.2.linear_attn.in_proj_a: 4
+model.layers.35.mlp.down_proj: 4
+model.layers.13.linear_attn.out_proj: 4
+model.layers.5.mlp.down_proj: 4
+model.layers.1.linear_attn.in_proj_z: 4
+model.layers.40.mlp.down_proj: 4
+model.layers.5.linear_attn.out_proj: 4
+model.layers.34.linear_attn.in_proj_z: 4
+model.layers.21.mlp.down_proj: 4
+model.layers.36.linear_attn.in_proj_b: 4
+model.layers.37.linear_attn.in_proj_a: 4
+model.layers.54.mlp.down_proj: 4
+model.layers.53.linear_attn.in_proj_qkv: 4
+model.layers.62.linear_attn.out_proj: 4
+model.layers.10.linear_attn.in_proj_a: 4
+model.layers.13.linear_attn.in_proj_z: 4
+model.layers.26.linear_attn.in_proj_b: 4
+model.layers.24.linear_attn.in_proj_z: 4
+model.layers.5.linear_attn.in_proj_qkv: 4
+model.layers.37.linear_attn.in_proj_qkv: 4
+model.layers.21.linear_attn.in_proj_a: 4
+model.layers.20.linear_attn.in_proj_b: 4
+model.layers.22.linear_attn.in_proj_z: 4
+model.layers.49.linear_attn.in_proj_a: 4
+model.layers.8.linear_attn.out_proj: 4
+model.layers.16.linear_attn.in_proj_a: 4
+model.layers.12.mlp.down_proj: 4
+model.layers.48.linear_attn.in_proj_b: 4
+model.layers.17.linear_attn.in_proj_b: 4
+model.layers.13.linear_attn.in_proj_qkv: 4
+model.layers.46.linear_attn.out_proj: 4
+model.layers.17.linear_attn.in_proj_qkv: 4
+model.layers.46.linear_attn.in_proj_z: 4
+model.layers.44.linear_attn.in_proj_b: 4
+model.layers.37.linear_attn.out_proj: 4
+model.layers.45.linear_attn.in_proj_a: 4
+model.layers.33.linear_attn.in_proj_qkv: 4
+model.layers.57.linear_attn.out_proj: 4
+model.layers.40.linear_attn.in_proj_z: 4
+model.layers.1.linear_attn.in_proj_qkv: 4
+model.layers.17.mlp.down_proj: 4
+model.layers.42.linear_attn.in_proj_b: 4
+model.layers.62.mlp.down_proj: 4
+model.layers.28.linear_attn.in_proj_z: 4
+model.layers.26.linear_attn.out_proj: 4
+model.layers.48.linear_attn.in_proj_qkv: 4
+model.layers.48.mlp.down_proj: 4
+model.layers.41.linear_attn.out_proj: 4
+model.layers.10.linear_attn.in_proj_qkv: 4
+model.layers.34.linear_attn.in_proj_qkv: 4
+model.layers.52.linear_attn.in_proj_a: 4
+model.layers.30.linear_attn.out_proj: 4
+model.layers.53.linear_attn.in_proj_b: 4
+model.layers.29.mlp.down_proj: 4
+model.layers.24.linear_attn.in_proj_qkv: 4
+model.layers.54.linear_attn.in_proj_a: 4
+model.layers.60.mlp.down_proj: 4
+model.layers.8.linear_attn.in_proj_b: 4
+model.layers.6.linear_attn.in_proj_qkv: 4
+model.layers.57.linear_attn.in_proj_z: 4
+model.layers.9.linear_attn.in_proj_a: 4
+model.layers.50.linear_attn.out_proj: 4
+model.layers.15.mlp.down_proj: 4
+model.layers.58.linear_attn.in_proj_qkv: 4
+model.layers.62.linear_attn.in_proj_b: 4
+model.layers.21.linear_attn.out_proj: 4
+model.layers.60.linear_attn.in_proj_z: 4
+model.layers.59.mlp.down_proj: 4
+model.layers.5.linear_attn.in_proj_a: 4
+model.layers.4.linear_attn.out_proj: 4
+model.layers.58.linear_attn.in_proj_a: 4
+model.layers.6.linear_attn.in_proj_z: 4
+model.layers.4.linear_attn.in_proj_b: 4
+model.layers.8.mlp.down_proj: 4
+model.layers.33.linear_attn.in_proj_z: 4
+model.layers.2.linear_attn.in_proj_qkv: 4
+model.layers.30.linear_attn.in_proj_a: 4
+model.layers.38.mlp.down_proj: 4
+model.layers.20.linear_attn.in_proj_qkv: 4
+model.layers.12.linear_attn.out_proj: 4
+model.layers.37.linear_attn.in_proj_b: 4
+model.layers.36.linear_attn.in_proj_a: 4
+model.layers.30.linear_attn.in_proj_qkv: 4
+model.layers.14.linear_attn.in_proj_qkv: 4
+model.layers.0.linear_attn.in_proj_z: 4
+model.layers.2.linear_attn.in_proj_b: 4
+model.layers.10.mlp.down_proj: 4
+model.layers.44.linear_attn.in_proj_qkv: 4
+model.layers.9.linear_attn.out_proj: 4
+model.layers.26.linear_attn.in_proj_a: 4
+model.layers.25.linear_attn.in_proj_z: 4
+model.layers.38.linear_attn.in_proj_qkv: 4
+model.layers.10.linear_attn.in_proj_b: 4
+model.layers.60.linear_attn.in_proj_qkv: 4
+model.layers.12.linear_attn.in_proj_z: 4
+model.layers.7.mlp.down_proj: 4
+model.layers.28.linear_attn.in_proj_qkv: 4
+model.layers.17.linear_attn.in_proj_a: 4
+model.layers.48.linear_attn.in_proj_a: 4
+model.layers.42.mlp.down_proj: 4
+model.layers.14.linear_attn.out_proj: 4
+model.layers.16.linear_attn.in_proj_b: 4
+model.layers.14.linear_attn.in_proj_z: 4
+model.layers.37.mlp.down_proj: 4
+model.layers.49.linear_attn.in_proj_b: 4
+model.layers.20.linear_attn.in_proj_a: 4
+model.layers.54.linear_attn.in_proj_qkv: 4
+model.layers.56.mlp.down_proj: 4
+model.layers.21.linear_attn.in_proj_b: 4
+model.layers.23.mlp.down_proj: 4
+model.layers.2.linear_attn.out_proj: 4
+model.layers.18.linear_attn.in_proj_z: 4
+model.layers.45.linear_attn.in_proj_b: 4
+model.layers.50.linear_attn.in_proj_qkv: 4
+model.layers.44.linear_attn.in_proj_a: 4
+model.layers.56.linear_attn.out_proj: 4
+model.layers.36.linear_attn.out_proj: 4
+model.layers.53.mlp.down_proj: 4
+model.layers.29.linear_attn.in_proj_z: 4
+model.layers.26.mlp.down_proj: 4
+model.layers.2.mlp.down_proj: 4
+model.layers.41.linear_attn.in_proj_z: 4
+model.layers.18.linear_attn.in_proj_qkv: 4
+model.layers.47.mlp.down_proj: 4
+model.layers.42.linear_attn.in_proj_a: 4
+model.layers.40.linear_attn.in_proj_qkv: 4
+model.layers.32.mlp.down_proj: 4
diff --git a/Qwen3.5-27B/ll_4bit/tokenizer_config.json b/Qwen3.5-27B/ll_4bit/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6be6ce1780cf43bd47577fbb76e74aee6db89f21
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/tokenizer_config.json
@@ -0,0 +1,31 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "model_max_length": 262144,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/Qwen3.5-27B/ll_4bit/video_preprocessor_config.json b/Qwen3.5-27B/ll_4bit/video_preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ba673a5ad7d4d13f54155ecd38b2a94a6dac8fe
--- /dev/null
+++ b/Qwen3.5-27B/ll_4bit/video_preprocessor_config.json
@@ -0,0 +1,21 @@
+{
+ "size": {
+ "longest_edge": 25165824,
+ "shortest_edge": 4096
+ },
+ "patch_size": 16,
+ "temporal_patch_size": 2,
+ "merge_size": 2,
+ "image_mean": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "image_std": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "processor_class": "Qwen3VLProcessor",
+ "video_processor_type": "Qwen3VLVideoProcessor"
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/README.md b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e6daf3807e2876ad9583a97e7c2d585088f85267
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/README.md
@@ -0,0 +1,18 @@
+# Quantized Model Checkpoint
+
+**Base model:** Qwen/Qwen3.5-27B
+
+**Average bitwidth:** 5.1871
+
+**Sensitivity method:** linear
+
+**Constraints:**
+- max_kl: 0.005
+- min_eap: 0.985
+
+**Metrics:**
+- final_kl: 0.001887
+- final_eap: 0.985100
+- final_etl: 0.014900
+
+See `quantization_config.txt` for full configuration details.
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/chat_template.jinja b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a585dec894e63da457d9440ec6aa7caa16d20860
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c0fc6f0ed2ea5d07e1c36677cfab84cd8f27351
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/config.json
@@ -0,0 +1,138 @@
+{
+ "vision_start_token_id": 248053,
+ "video_token_id": 248057,
+ "image_token_id": 248056,
+ "architectures": [
+ "Qwen3_5ForConditionalGeneration"
+ ],
+ "model_type": "qwen3_5",
+ "vision_end_token_id": 248054,
+ "tie_word_embeddings": false,
+ "vision_config": {
+ "deepstack_visual_indexes": [],
+ "depth": 27,
+ "hidden_act": "gelu_pytorch_tanh",
+ "hidden_size": 1152,
+ "in_channels": 3,
+ "initializer_range": 0.02,
+ "intermediate_size": 4304,
+ "model_type": "qwen3_5",
+ "num_heads": 16,
+ "num_position_embeddings": 2304,
+ "out_hidden_size": 5120,
+ "patch_size": 16,
+ "spatial_merge_size": 2,
+ "temporal_patch_size": 2
+ },
+ "transformers_version": "4.57.0.dev0",
+ "text_config": {
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "attn_output_gate": true,
+ "bos_token_id": null,
+ "dtype": "float16",
+ "eos_token_id": 248044,
+ "full_attention_interval": 4,
+ "head_dim": 256,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 17408,
+ "layer_types": [
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention",
+ "linear_attention",
+ "linear_attention",
+ "linear_attention",
+ "full_attention"
+ ],
+ "linear_conv_kernel_dim": 4,
+ "linear_key_head_dim": 128,
+ "linear_num_key_heads": 16,
+ "linear_num_value_heads": 48,
+ "linear_value_head_dim": 128,
+ "mamba_ssm_dtype": "float32",
+ "max_position_embeddings": 262144,
+ "mlp_only_layers": [],
+ "mtp_num_hidden_layers": 1,
+ "mtp_use_dedicated_embeddings": false,
+ "num_attention_heads": 24,
+ "num_hidden_layers": 64,
+ "num_key_value_heads": 4,
+ "pad_token_id": null,
+ "partial_rotary_factor": 0.25,
+ "rms_norm_eps": 1e-06,
+ "rope_parameters": {
+ "mrope_interleaved": true,
+ "mrope_section": [
+ 11,
+ 11,
+ 10
+ ],
+ "partial_rotary_factor": 0.25,
+ "rope_theta": 10000000,
+ "rope_type": "default"
+ },
+ "use_cache": false,
+ "vocab_size": 248320,
+ "model_type": "qwen3_5_text"
+ }
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/generation_config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1068c09fbcc050fcccf2066dda235127d9bad05e
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "bos_token_id": 248044,
+ "do_sample": true,
+ "eos_token_id": [
+ 248046,
+ 248044
+ ],
+ "pad_token_id": 248044,
+ "temperature": 0.6,
+ "top_k": 20,
+ "top_p": 0.95,
+ "transformers_version": "5.3.0"
+}
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/humming_online_quant_config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7880677abaf367bd1f16b6a3d6bf81a5f46eb278
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/humming_online_quant_config.json
@@ -0,0 +1,457 @@
+{
+ "quant_method": "gptq",
+ "bits": 5,
+ "group_size": 128,
+ "dynamic": {
+ "+:model\\.layers\\.60\\.mlp\\.gate_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.60\\.mlp\\.up_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.61\\.mlp\\.gate_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.61\\.mlp\\.up_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.61\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.60\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.56\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.9\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.57\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.50\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.38\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.1\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.0\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.34\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.33\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.32\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.30\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.5\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.57\\.mlp\\.down_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.6\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.20\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.22\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.17\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.14\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.13\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.12\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.26\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.24\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.25\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.41\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.40\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.29\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.28\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.61\\.mlp\\.down_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.1\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.18\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.46\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.17\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.57\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.56\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.8\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.60\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.61\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.6\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.38\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.10\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.50\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.34\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.0\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.2\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.1\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.6\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.4\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.32\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.33\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.16\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.14\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.21\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.22\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.25\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.24\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.10\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.12\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.13\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.28\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.16\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.29\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.0\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.40\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.41\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.46\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.18\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.53\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.52\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.20\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.62\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.9\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.8\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.54\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.32\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.30\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.4\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.18\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.58\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.5\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.2\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.13\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.1\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.5\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.34\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.36\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.37\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.10\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.13\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.26\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.24\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.21\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.20\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.22\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.49\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.8\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.16\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.48\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.17\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.44\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.45\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.1\\.linear_attn\\.in_proj_qkv": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.42\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.62\\.mlp\\.down_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.28\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.52\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.53\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.54\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.60\\.mlp\\.down_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.8\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.9\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.62\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.21\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.59\\.mlp\\.down_proj": {
+ "bits": 4
+ },
+ "+:model\\.layers\\.5\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.4\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.58\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.6\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.4\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.33\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.30\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.12\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.37\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.36\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.0\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.2\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.9\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.26\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.25\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.10\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.12\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.17\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.48\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.14\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.16\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.14\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.49\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.20\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.21\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.2\\.linear_attn\\.out_proj": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.18\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.45\\.linear_attn\\.in_proj_b": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.44\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ },
+ "+:model\\.layers\\.29\\.linear_attn\\.in_proj_z": {
+ "bits": 6
+ },
+ "+:model\\.layers\\.42\\.linear_attn\\.in_proj_a": {
+ "bits": 8
+ }
+ }
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/model.safetensors.index.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..70fe4d0836094f2d2b8dd7c40966b8baa44d5114
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/model.safetensors.index.json
@@ -0,0 +1,859 @@
+{
+ "metadata": {
+ "total_parameters": 26895998464,
+ "total_size": 53791996928
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.36.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.37.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.38.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.39.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.40.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.41.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.42.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.43.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.44.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.45.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.46.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.47.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.48.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.49.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.50.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.51.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.52.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.53.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.54.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.55.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.56.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.57.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.58.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.58.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.59.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.60.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.60.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.61.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.A_log": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.conv1d.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.dt_bias": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_a.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_b.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_qkv.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.in_proj_z.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.linear_attn.out_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.62.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.63.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.A_log": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.conv1d.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.dt_bias": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_a.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_b.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_qkv.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.in_proj_z.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.norm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.linear_attn.out_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors"
+ }
+}
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/preprocessor_config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ea84a437d448ff71b08df68fdd949d5cc4ebb64
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/preprocessor_config.json
@@ -0,0 +1,21 @@
+{
+ "size": {
+ "longest_edge": 16777216,
+ "shortest_edge": 65536
+ },
+ "patch_size": 16,
+ "temporal_patch_size": 2,
+ "merge_size": 2,
+ "image_mean": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "image_std": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "processor_class": "Qwen3VLProcessor",
+ "image_processor_type": "Qwen2VLImageProcessorFast"
+}
\ No newline at end of file
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/quantization_config.txt b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2d60c0295877ca3d87de949eab131c1c624b964
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/quantization_config.txt
@@ -0,0 +1,878 @@
+# Model: Qwen/Qwen3.5-27B
+# Layer directory: /nfs/scistore19/alistgrp/mhelcig/data/search/4_5_6_7_8bit_asym_g128/Qwen3.5-27B/4bit/
+# Sensitivity method: linear
+# Estimation method: linear
+# Available bitwidths: [4, 5, 6, 7, 8]
+# Bitwidth map: {4: 4.156, 5: 5.156, 6: 6.156, 7: 7.156, 8: 8.156}
+#
+# Layer groups: 368 groups (fused layers share bitwidth)
+# block_0:mlp.gate_proj,mlp.up_proj.block_0:mlp.gate_proj,mlp.up_proj: group 0, 2 layers
+# block_10:mlp.gate_proj,mlp.up_proj.block_10:mlp.gate_proj,mlp.up_proj: group 10, 2 layers
+# block_11:mlp.gate_proj,mlp.up_proj.block_11:mlp.gate_proj,mlp.up_proj: group 11, 2 layers
+# block_12:mlp.gate_proj,mlp.up_proj.block_12:mlp.gate_proj,mlp.up_proj: group 12, 2 layers
+# block_13:mlp.gate_proj,mlp.up_proj.block_13:mlp.gate_proj,mlp.up_proj: group 13, 2 layers
+# block_14:mlp.gate_proj,mlp.up_proj.block_14:mlp.gate_proj,mlp.up_proj: group 14, 2 layers
+# block_15:mlp.gate_proj,mlp.up_proj.block_15:mlp.gate_proj,mlp.up_proj: group 15, 2 layers
+# block_16:mlp.gate_proj,mlp.up_proj.block_16:mlp.gate_proj,mlp.up_proj: group 16, 2 layers
+# block_17:mlp.gate_proj,mlp.up_proj.block_17:mlp.gate_proj,mlp.up_proj: group 17, 2 layers
+# block_18:mlp.gate_proj,mlp.up_proj.block_18:mlp.gate_proj,mlp.up_proj: group 18, 2 layers
+# block_19:mlp.gate_proj,mlp.up_proj.block_19:mlp.gate_proj,mlp.up_proj: group 19, 2 layers
+# block_1:mlp.gate_proj,mlp.up_proj.block_1:mlp.gate_proj,mlp.up_proj: group 1, 2 layers
+# block_20:mlp.gate_proj,mlp.up_proj.block_20:mlp.gate_proj,mlp.up_proj: group 20, 2 layers
+# block_21:mlp.gate_proj,mlp.up_proj.block_21:mlp.gate_proj,mlp.up_proj: group 21, 2 layers
+# block_22:mlp.gate_proj,mlp.up_proj.block_22:mlp.gate_proj,mlp.up_proj: group 22, 2 layers
+# block_23:mlp.gate_proj,mlp.up_proj.block_23:mlp.gate_proj,mlp.up_proj: group 23, 2 layers
+# block_24:mlp.gate_proj,mlp.up_proj.block_24:mlp.gate_proj,mlp.up_proj: group 24, 2 layers
+# block_25:mlp.gate_proj,mlp.up_proj.block_25:mlp.gate_proj,mlp.up_proj: group 25, 2 layers
+# block_26:mlp.gate_proj,mlp.up_proj.block_26:mlp.gate_proj,mlp.up_proj: group 26, 2 layers
+# block_27:mlp.gate_proj,mlp.up_proj.block_27:mlp.gate_proj,mlp.up_proj: group 27, 2 layers
+# block_28:mlp.gate_proj,mlp.up_proj.block_28:mlp.gate_proj,mlp.up_proj: group 28, 2 layers
+# block_29:mlp.gate_proj,mlp.up_proj.block_29:mlp.gate_proj,mlp.up_proj: group 29, 2 layers
+# block_2:mlp.gate_proj,mlp.up_proj.block_2:mlp.gate_proj,mlp.up_proj: group 2, 2 layers
+# block_30:mlp.gate_proj,mlp.up_proj.block_30:mlp.gate_proj,mlp.up_proj: group 30, 2 layers
+# block_31:mlp.gate_proj,mlp.up_proj.block_31:mlp.gate_proj,mlp.up_proj: group 31, 2 layers
+# block_32:mlp.gate_proj,mlp.up_proj.block_32:mlp.gate_proj,mlp.up_proj: group 32, 2 layers
+# block_33:mlp.gate_proj,mlp.up_proj.block_33:mlp.gate_proj,mlp.up_proj: group 33, 2 layers
+# block_34:mlp.gate_proj,mlp.up_proj.block_34:mlp.gate_proj,mlp.up_proj: group 34, 2 layers
+# block_35:mlp.gate_proj,mlp.up_proj.block_35:mlp.gate_proj,mlp.up_proj: group 35, 2 layers
+# block_36:mlp.gate_proj,mlp.up_proj.block_36:mlp.gate_proj,mlp.up_proj: group 36, 2 layers
+# block_37:mlp.gate_proj,mlp.up_proj.block_37:mlp.gate_proj,mlp.up_proj: group 37, 2 layers
+# block_38:mlp.gate_proj,mlp.up_proj.block_38:mlp.gate_proj,mlp.up_proj: group 38, 2 layers
+# block_39:mlp.gate_proj,mlp.up_proj.block_39:mlp.gate_proj,mlp.up_proj: group 39, 2 layers
+# block_3:mlp.gate_proj,mlp.up_proj.block_3:mlp.gate_proj,mlp.up_proj: group 3, 2 layers
+# block_40:mlp.gate_proj,mlp.up_proj.block_40:mlp.gate_proj,mlp.up_proj: group 40, 2 layers
+# block_41:mlp.gate_proj,mlp.up_proj.block_41:mlp.gate_proj,mlp.up_proj: group 41, 2 layers
+# block_42:mlp.gate_proj,mlp.up_proj.block_42:mlp.gate_proj,mlp.up_proj: group 42, 2 layers
+# block_43:mlp.gate_proj,mlp.up_proj.block_43:mlp.gate_proj,mlp.up_proj: group 43, 2 layers
+# block_44:mlp.gate_proj,mlp.up_proj.block_44:mlp.gate_proj,mlp.up_proj: group 44, 2 layers
+# block_45:mlp.gate_proj,mlp.up_proj.block_45:mlp.gate_proj,mlp.up_proj: group 45, 2 layers
+# block_46:mlp.gate_proj,mlp.up_proj.block_46:mlp.gate_proj,mlp.up_proj: group 46, 2 layers
+# block_47:mlp.gate_proj,mlp.up_proj.block_47:mlp.gate_proj,mlp.up_proj: group 47, 2 layers
+# block_48:mlp.gate_proj,mlp.up_proj.block_48:mlp.gate_proj,mlp.up_proj: group 48, 2 layers
+# block_49:mlp.gate_proj,mlp.up_proj.block_49:mlp.gate_proj,mlp.up_proj: group 49, 2 layers
+# block_4:mlp.gate_proj,mlp.up_proj.block_4:mlp.gate_proj,mlp.up_proj: group 4, 2 layers
+# block_50:mlp.gate_proj,mlp.up_proj.block_50:mlp.gate_proj,mlp.up_proj: group 50, 2 layers
+# block_51:mlp.gate_proj,mlp.up_proj.block_51:mlp.gate_proj,mlp.up_proj: group 51, 2 layers
+# block_52:mlp.gate_proj,mlp.up_proj.block_52:mlp.gate_proj,mlp.up_proj: group 52, 2 layers
+# block_53:mlp.gate_proj,mlp.up_proj.block_53:mlp.gate_proj,mlp.up_proj: group 53, 2 layers
+# block_54:mlp.gate_proj,mlp.up_proj.block_54:mlp.gate_proj,mlp.up_proj: group 54, 2 layers
+# block_55:mlp.gate_proj,mlp.up_proj.block_55:mlp.gate_proj,mlp.up_proj: group 55, 2 layers
+# block_56:mlp.gate_proj,mlp.up_proj.block_56:mlp.gate_proj,mlp.up_proj: group 56, 2 layers
+# block_57:mlp.gate_proj,mlp.up_proj.block_57:mlp.gate_proj,mlp.up_proj: group 57, 2 layers
+# block_58:mlp.gate_proj,mlp.up_proj.block_58:mlp.gate_proj,mlp.up_proj: group 58, 2 layers
+# block_59:mlp.gate_proj,mlp.up_proj.block_59:mlp.gate_proj,mlp.up_proj: group 59, 2 layers
+# block_5:mlp.gate_proj,mlp.up_proj.block_5:mlp.gate_proj,mlp.up_proj: group 5, 2 layers
+# block_60:mlp.gate_proj,mlp.up_proj.block_60:mlp.gate_proj,mlp.up_proj: group 60, 2 layers
+# block_61:mlp.gate_proj,mlp.up_proj.block_61:mlp.gate_proj,mlp.up_proj: group 61, 2 layers
+# block_62:mlp.gate_proj,mlp.up_proj.block_62:mlp.gate_proj,mlp.up_proj: group 62, 2 layers
+# block_63:mlp.gate_proj,mlp.up_proj.block_63:mlp.gate_proj,mlp.up_proj: group 63, 2 layers
+# block_6:mlp.gate_proj,mlp.up_proj.block_6:mlp.gate_proj,mlp.up_proj: group 6, 2 layers
+# block_7:mlp.gate_proj,mlp.up_proj.block_7:mlp.gate_proj,mlp.up_proj: group 7, 2 layers
+# block_8:mlp.gate_proj,mlp.up_proj.block_8:mlp.gate_proj,mlp.up_proj: group 8, 2 layers
+# block_9:mlp.gate_proj,mlp.up_proj.block_9:mlp.gate_proj,mlp.up_proj: group 9, 2 layers
+# Fused in model.layers.0:
+# - linear_attn.in_proj_qkv (group 87, 1 layers)
+# - linear_attn.in_proj_a (group 88, 1 layers)
+# - linear_attn.in_proj_b (group 166, 1 layers)
+# - linear_attn.out_proj (group 202, 1 layers)
+# - mlp.down_proj (group 229, 1 layers)
+# - linear_attn.in_proj_z (group 325, 1 layers)
+# Fused in model.layers.1:
+# - linear_attn.in_proj_b (group 85, 1 layers)
+# - linear_attn.out_proj (group 140, 1 layers)
+# - linear_attn.in_proj_a (group 169, 1 layers)
+# - mlp.down_proj (group 213, 1 layers)
+# - linear_attn.in_proj_z (group 245, 1 layers)
+# - linear_attn.in_proj_qkv (group 280, 1 layers)
+# Fused in model.layers.10:
+# - linear_attn.out_proj (group 159, 1 layers)
+# - linear_attn.in_proj_z (group 193, 1 layers)
+# - linear_attn.in_proj_a (group 255, 1 layers)
+# - linear_attn.in_proj_qkv (group 289, 1 layers)
+# - mlp.down_proj (group 327, 1 layers)
+# - linear_attn.in_proj_b (group 333, 1 layers)
+# model.layers.11.mlp.down_proj: group 129, 1 layers
+# Fused in model.layers.12:
+# - linear_attn.in_proj_qkv (group 69, 1 layers)
+# - linear_attn.in_proj_a (group 122, 1 layers)
+# - linear_attn.in_proj_b (group 194, 1 layers)
+# - mlp.down_proj (group 267, 1 layers)
+# - linear_attn.out_proj (group 320, 1 layers)
+# - linear_attn.in_proj_z (group 335, 1 layers)
+# Fused in model.layers.13:
+# - linear_attn.in_proj_b (group 121, 1 layers)
+# - mlp.down_proj (group 175, 1 layers)
+# - linear_attn.in_proj_a (group 198, 1 layers)
+# - linear_attn.out_proj (group 243, 1 layers)
+# - linear_attn.in_proj_z (group 256, 1 layers)
+# - linear_attn.in_proj_qkv (group 270, 1 layers)
+# Fused in model.layers.14:
+# - linear_attn.in_proj_a (group 119, 1 layers)
+# - mlp.down_proj (group 141, 1 layers)
+# - linear_attn.in_proj_b (group 179, 1 layers)
+# - linear_attn.in_proj_qkv (group 324, 1 layers)
+# - linear_attn.out_proj (group 341, 1 layers)
+# - linear_attn.in_proj_z (group 343, 1 layers)
+# model.layers.15.mlp.down_proj: group 303, 1 layers
+# Fused in model.layers.16:
+# - linear_attn.in_proj_qkv (group 99, 1 layers)
+# - mlp.down_proj (group 153, 1 layers)
+# - linear_attn.in_proj_z (group 178, 1 layers)
+# - linear_attn.out_proj (group 200, 1 layers)
+# - linear_attn.in_proj_a (group 266, 1 layers)
+# - linear_attn.in_proj_b (group 342, 1 layers)
+# Fused in model.layers.17:
+# - linear_attn.in_proj_z (group 116, 1 layers)
+# - linear_attn.out_proj (group 145, 1 layers)
+# - linear_attn.in_proj_b (group 269, 1 layers)
+# - linear_attn.in_proj_qkv (group 272, 1 layers)
+# - mlp.down_proj (group 281, 1 layers)
+# - linear_attn.in_proj_a (group 338, 1 layers)
+# Fused in model.layers.18:
+# - linear_attn.in_proj_a (group 142, 1 layers)
+# - linear_attn.in_proj_b (group 208, 1 layers)
+# - mlp.down_proj (group 220, 1 layers)
+# - linear_attn.out_proj (group 238, 1 layers)
+# - linear_attn.in_proj_z (group 352, 1 layers)
+# - linear_attn.in_proj_qkv (group 363, 1 layers)
+# model.layers.19.mlp.down_proj: group 204, 1 layers
+# Fused in model.layers.2:
+# - linear_attn.in_proj_z (group 167, 1 layers)
+# - linear_attn.in_proj_a (group 241, 1 layers)
+# - linear_attn.in_proj_qkv (group 316, 1 layers)
+# - linear_attn.in_proj_b (group 326, 1 layers)
+# - linear_attn.out_proj (group 351, 1 layers)
+# - mlp.down_proj (group 361, 1 layers)
+# Fused in model.layers.20:
+# - linear_attn.in_proj_z (group 110, 1 layers)
+# - mlp.down_proj (group 195, 1 layers)
+# - linear_attn.out_proj (group 219, 1 layers)
+# - linear_attn.in_proj_b (group 262, 1 layers)
+# - linear_attn.in_proj_qkv (group 319, 1 layers)
+# - linear_attn.in_proj_a (group 346, 1 layers)
+# Fused in model.layers.21:
+# - linear_attn.in_proj_z (group 183, 1 layers)
+# - linear_attn.in_proj_qkv (group 211, 1 layers)
+# - mlp.down_proj (group 249, 1 layers)
+# - linear_attn.in_proj_a (group 261, 1 layers)
+# - linear_attn.out_proj (group 306, 1 layers)
+# - linear_attn.in_proj_b (group 349, 1 layers)
+# Fused in model.layers.22:
+# - linear_attn.in_proj_qkv (group 84, 1 layers)
+# - linear_attn.out_proj (group 93, 1 layers)
+# - mlp.down_proj (group 109, 1 layers)
+# - linear_attn.in_proj_b (group 112, 1 layers)
+# - linear_attn.in_proj_a (group 184, 1 layers)
+# - linear_attn.in_proj_z (group 263, 1 layers)
+# model.layers.23.mlp.down_proj: group 350, 1 layers
+# Fused in model.layers.24:
+# - linear_attn.out_proj (group 118, 1 layers)
+# - linear_attn.in_proj_b (group 127, 1 layers)
+# - linear_attn.in_proj_a (group 191, 1 layers)
+# - mlp.down_proj (group 224, 1 layers)
+# - linear_attn.in_proj_z (group 258, 1 layers)
+# - linear_attn.in_proj_qkv (group 295, 1 layers)
+# Fused in model.layers.25:
+# - linear_attn.in_proj_a (group 128, 1 layers)
+# - linear_attn.in_proj_qkv (group 181, 1 layers)
+# - linear_attn.in_proj_b (group 187, 1 layers)
+# - linear_attn.out_proj (group 192, 1 layers)
+# - mlp.down_proj (group 206, 1 layers)
+# - linear_attn.in_proj_z (group 331, 1 layers)
+# Fused in model.layers.26:
+# - linear_attn.in_proj_qkv (group 81, 1 layers)
+# - linear_attn.in_proj_z (group 125, 1 layers)
+# - linear_attn.in_proj_b (group 257, 1 layers)
+# - linear_attn.out_proj (group 285, 1 layers)
+# - linear_attn.in_proj_a (group 330, 1 layers)
+# - mlp.down_proj (group 360, 1 layers)
+# model.layers.27.mlp.down_proj: group 76, 1 layers
+# Fused in model.layers.28:
+# - linear_attn.in_proj_b (group 136, 1 layers)
+# - mlp.down_proj (group 137, 1 layers)
+# - linear_attn.out_proj (group 173, 1 layers)
+# - linear_attn.in_proj_a (group 199, 1 layers)
+# - linear_attn.in_proj_z (group 284, 1 layers)
+# - linear_attn.in_proj_qkv (group 337, 1 layers)
+# Fused in model.layers.29:
+# - linear_attn.out_proj (group 91, 1 layers)
+# - linear_attn.in_proj_a (group 134, 1 layers)
+# - linear_attn.in_proj_qkv (group 148, 1 layers)
+# - linear_attn.in_proj_b (group 201, 1 layers)
+# - mlp.down_proj (group 294, 1 layers)
+# - linear_attn.in_proj_z (group 359, 1 layers)
+# model.layers.3.mlp.down_proj: group 77, 1 layers
+# Fused in model.layers.30:
+# - linear_attn.in_proj_z (group 100, 1 layers)
+# - mlp.down_proj (group 228, 1 layers)
+# - linear_attn.in_proj_b (group 236, 1 layers)
+# - linear_attn.out_proj (group 292, 1 layers)
+# - linear_attn.in_proj_a (group 317, 1 layers)
+# - linear_attn.in_proj_qkv (group 323, 1 layers)
+# model.layers.31.mlp.down_proj: group 212, 1 layers
+# Fused in model.layers.32:
+# - linear_attn.in_proj_b (group 98, 1 layers)
+# - linear_attn.in_proj_qkv (group 104, 1 layers)
+# - linear_attn.out_proj (group 174, 1 layers)
+# - linear_attn.in_proj_a (group 176, 1 layers)
+# - linear_attn.in_proj_z (group 235, 1 layers)
+# - mlp.down_proj (group 367, 1 layers)
+# Fused in model.layers.33:
+# - mlp.down_proj (group 82, 1 layers)
+# - linear_attn.out_proj (group 83, 1 layers)
+# - linear_attn.in_proj_a (group 95, 1 layers)
+# - linear_attn.in_proj_b (group 177, 1 layers)
+# - linear_attn.in_proj_qkv (group 277, 1 layers)
+# - linear_attn.in_proj_z (group 315, 1 layers)
+# Fused in model.layers.34:
+# - linear_attn.in_proj_b (group 90, 1 layers)
+# - linear_attn.in_proj_a (group 164, 1 layers)
+# - linear_attn.out_proj (group 182, 1 layers)
+# - mlp.down_proj (group 186, 1 layers)
+# - linear_attn.in_proj_z (group 248, 1 layers)
+# - linear_attn.in_proj_qkv (group 290, 1 layers)
+# model.layers.35.mlp.down_proj: group 242, 1 layers
+# Fused in model.layers.36:
+# - linear_attn.in_proj_qkv (group 65, 1 layers)
+# - linear_attn.in_proj_z (group 92, 1 layers)
+# - mlp.down_proj (group 101, 1 layers)
+# - linear_attn.in_proj_b (group 250, 1 layers)
+# - linear_attn.in_proj_a (group 322, 1 layers)
+# - linear_attn.out_proj (group 357, 1 layers)
+# Fused in model.layers.37:
+# - linear_attn.in_proj_z (group 165, 1 layers)
+# - linear_attn.in_proj_a (group 251, 1 layers)
+# - linear_attn.in_proj_qkv (group 260, 1 layers)
+# - linear_attn.out_proj (group 275, 1 layers)
+# - linear_attn.in_proj_b (group 321, 1 layers)
+# - mlp.down_proj (group 344, 1 layers)
+# Fused in model.layers.38:
+# - linear_attn.in_proj_b (group 79, 1 layers)
+# - linear_attn.out_proj (group 108, 1 layers)
+# - linear_attn.in_proj_a (group 156, 1 layers)
+# - linear_attn.in_proj_z (group 222, 1 layers)
+# - mlp.down_proj (group 318, 1 layers)
+# - linear_attn.in_proj_qkv (group 332, 1 layers)
+# model.layers.39.mlp.down_proj: group 120, 1 layers
+# Fused in model.layers.4:
+# - linear_attn.in_proj_qkv (group 78, 1 layers)
+# - linear_attn.in_proj_z (group 172, 1 layers)
+# - mlp.down_proj (group 189, 1 layers)
+# - linear_attn.in_proj_a (group 237, 1 layers)
+# - linear_attn.out_proj (group 310, 1 layers)
+# - linear_attn.in_proj_b (group 313, 1 layers)
+# Fused in model.layers.40:
+# - linear_attn.in_proj_b (group 132, 1 layers)
+# - linear_attn.in_proj_a (group 203, 1 layers)
+# - linear_attn.out_proj (group 231, 1 layers)
+# - mlp.down_proj (group 246, 1 layers)
+# - linear_attn.in_proj_z (group 279, 1 layers)
+# - linear_attn.in_proj_qkv (group 366, 1 layers)
+# Fused in model.layers.41:
+# - linear_attn.in_proj_a (group 131, 1 layers)
+# - linear_attn.in_proj_qkv (group 168, 1 layers)
+# - mlp.down_proj (group 190, 1 layers)
+# - linear_attn.in_proj_b (group 205, 1 layers)
+# - linear_attn.out_proj (group 288, 1 layers)
+# - linear_attn.in_proj_z (group 362, 1 layers)
+# Fused in model.layers.42:
+# - linear_attn.out_proj (group 89, 1 layers)
+# - linear_attn.in_proj_z (group 133, 1 layers)
+# - linear_attn.in_proj_qkv (group 139, 1 layers)
+# - linear_attn.in_proj_b (group 282, 1 layers)
+# - mlp.down_proj (group 340, 1 layers)
+# - linear_attn.in_proj_a (group 365, 1 layers)
+# model.layers.43.mlp.down_proj: group 96, 1 layers
+# Fused in model.layers.44:
+# - linear_attn.out_proj (group 124, 1 layers)
+# - linear_attn.in_proj_z (group 144, 1 layers)
+# - mlp.down_proj (group 214, 1 layers)
+# - linear_attn.in_proj_b (group 274, 1 layers)
+# - linear_attn.in_proj_qkv (group 328, 1 layers)
+# - linear_attn.in_proj_a (group 355, 1 layers)
+# Fused in model.layers.45:
+# - linear_attn.in_proj_qkv (group 157, 1 layers)
+# - linear_attn.out_proj (group 185, 1 layers)
+# - linear_attn.in_proj_z (group 210, 1 layers)
+# - mlp.down_proj (group 233, 1 layers)
+# - linear_attn.in_proj_a (group 276, 1 layers)
+# - linear_attn.in_proj_b (group 353, 1 layers)
+# Fused in model.layers.46:
+# - mlp.down_proj (group 80, 1 layers)
+# - linear_attn.in_proj_qkv (group 117, 1 layers)
+# - linear_attn.in_proj_b (group 143, 1 layers)
+# - linear_attn.in_proj_a (group 207, 1 layers)
+# - linear_attn.out_proj (group 271, 1 layers)
+# - linear_attn.in_proj_z (group 273, 1 layers)
+# model.layers.47.mlp.down_proj: group 364, 1 layers
+# Fused in model.layers.48:
+# - linear_attn.in_proj_z (group 115, 1 layers)
+# - linear_attn.out_proj (group 170, 1 layers)
+# - linear_attn.in_proj_b (group 268, 1 layers)
+# - linear_attn.in_proj_qkv (group 286, 1 layers)
+# - mlp.down_proj (group 287, 1 layers)
+# - linear_attn.in_proj_a (group 339, 1 layers)
+# Fused in model.layers.49:
+# - linear_attn.out_proj (group 97, 1 layers)
+# - mlp.down_proj (group 130, 1 layers)
+# - linear_attn.in_proj_z (group 180, 1 layers)
+# - linear_attn.in_proj_qkv (group 188, 1 layers)
+# - linear_attn.in_proj_a (group 264, 1 layers)
+# - linear_attn.in_proj_b (group 345, 1 layers)
+# Fused in model.layers.5:
+# - linear_attn.in_proj_z (group 102, 1 layers)
+# - linear_attn.in_proj_b (group 240, 1 layers)
+# - mlp.down_proj (group 244, 1 layers)
+# - linear_attn.out_proj (group 247, 1 layers)
+# - linear_attn.in_proj_qkv (group 259, 1 layers)
+# - linear_attn.in_proj_a (group 309, 1 layers)
+# Fused in model.layers.50:
+# - linear_attn.in_proj_b (group 74, 1 layers)
+# - linear_attn.in_proj_a (group 162, 1 layers)
+# - mlp.down_proj (group 209, 1 layers)
+# - linear_attn.in_proj_z (group 218, 1 layers)
+# - linear_attn.out_proj (group 302, 1 layers)
+# - linear_attn.in_proj_qkv (group 354, 1 layers)
+# model.layers.51.mlp.down_proj: group 225, 1 layers
+# Fused in model.layers.52:
+# - linear_attn.in_proj_z (group 73, 1 layers)
+# - mlp.down_proj (group 75, 1 layers)
+# - linear_attn.in_proj_qkv (group 135, 1 layers)
+# - linear_attn.out_proj (group 163, 1 layers)
+# - linear_attn.in_proj_b (group 217, 1 layers)
+# - linear_attn.in_proj_a (group 291, 1 layers)
+# Fused in model.layers.53:
+# - linear_attn.out_proj (group 105, 1 layers)
+# - linear_attn.in_proj_z (group 160, 1 layers)
+# - linear_attn.in_proj_a (group 216, 1 layers)
+# - linear_attn.in_proj_qkv (group 253, 1 layers)
+# - linear_attn.in_proj_b (group 293, 1 layers)
+# - mlp.down_proj (group 358, 1 layers)
+# Fused in model.layers.54:
+# - linear_attn.in_proj_z (group 68, 1 layers)
+# - linear_attn.out_proj (group 196, 1 layers)
+# - linear_attn.in_proj_b (group 232, 1 layers)
+# - mlp.down_proj (group 252, 1 layers)
+# - linear_attn.in_proj_a (group 296, 1 layers)
+# - linear_attn.in_proj_qkv (group 347, 1 layers)
+# model.layers.55.mlp.down_proj: group 197, 1 layers
+# Fused in model.layers.56:
+# - linear_attn.in_proj_b (group 70, 1 layers)
+# - linear_attn.in_proj_qkv (group 123, 1 layers)
+# - linear_attn.in_proj_a (group 147, 1 layers)
+# - linear_attn.in_proj_z (group 234, 1 layers)
+# - mlp.down_proj (group 348, 1 layers)
+# - linear_attn.out_proj (group 356, 1 layers)
+# Fused in model.layers.57:
+# - linear_attn.in_proj_a (group 72, 1 layers)
+# - mlp.down_proj (group 103, 1 layers)
+# - linear_attn.in_proj_b (group 146, 1 layers)
+# - linear_attn.in_proj_qkv (group 215, 1 layers)
+# - linear_attn.out_proj (group 278, 1 layers)
+# - linear_attn.in_proj_z (group 300, 1 layers)
+# Fused in model.layers.58:
+# - linear_attn.out_proj (group 86, 1 layers)
+# - linear_attn.in_proj_z (group 107, 1 layers)
+# - mlp.down_proj (group 111, 1 layers)
+# - linear_attn.in_proj_b (group 239, 1 layers)
+# - linear_attn.in_proj_qkv (group 304, 1 layers)
+# - linear_attn.in_proj_a (group 311, 1 layers)
+# model.layers.59.mlp.down_proj: group 308, 1 layers
+# Fused in model.layers.6:
+# - mlp.down_proj (group 94, 1 layers)
+# - linear_attn.in_proj_a (group 106, 1 layers)
+# - linear_attn.out_proj (group 155, 1 layers)
+# - linear_attn.in_proj_b (group 171, 1 layers)
+# - linear_attn.in_proj_qkv (group 299, 1 layers)
+# - linear_attn.in_proj_z (group 312, 1 layers)
+# Fused in model.layers.60:
+# - linear_attn.in_proj_a (group 66, 1 layers)
+# - linear_attn.out_proj (group 67, 1 layers)
+# - linear_attn.in_proj_b (group 151, 1 layers)
+# - mlp.down_proj (group 297, 1 layers)
+# - linear_attn.in_proj_z (group 307, 1 layers)
+# - linear_attn.in_proj_qkv (group 334, 1 layers)
+# Fused in model.layers.61:
+# - linear_attn.in_proj_b (group 64, 1 layers)
+# - mlp.down_proj (group 138, 1 layers)
+# - linear_attn.in_proj_a (group 152, 1 layers)
+# - linear_attn.out_proj (group 154, 1 layers)
+# - linear_attn.in_proj_qkv (group 161, 1 layers)
+# - linear_attn.in_proj_z (group 226, 1 layers)
+# Fused in model.layers.62:
+# - linear_attn.in_proj_qkv (group 113, 1 layers)
+# - linear_attn.in_proj_z (group 150, 1 layers)
+# - linear_attn.in_proj_a (group 223, 1 layers)
+# - linear_attn.out_proj (group 254, 1 layers)
+# - mlp.down_proj (group 283, 1 layers)
+# - linear_attn.in_proj_b (group 305, 1 layers)
+# model.layers.63.mlp.down_proj: group 158, 1 layers
+# model.layers.7.mlp.down_proj: group 336, 1 layers
+# Fused in model.layers.8:
+# - linear_attn.in_proj_qkv (group 126, 1 layers)
+# - linear_attn.in_proj_z (group 149, 1 layers)
+# - linear_attn.in_proj_a (group 230, 1 layers)
+# - linear_attn.out_proj (group 265, 1 layers)
+# - linear_attn.in_proj_b (group 298, 1 layers)
+# - mlp.down_proj (group 314, 1 layers)
+# Fused in model.layers.9:
+# - linear_attn.in_proj_z (group 71, 1 layers)
+# - mlp.down_proj (group 114, 1 layers)
+# - linear_attn.in_proj_qkv (group 221, 1 layers)
+# - linear_attn.in_proj_b (group 227, 1 layers)
+# - linear_attn.in_proj_a (group 301, 1 layers)
+# - linear_attn.out_proj (group 329, 1 layers)
+#
+# Mode: binary_search_constraint (measured)
+# Constraint max_kl: 0.005
+# Constraint min_eap: 0.985
+# Weights: nll=0.0, kl=1.0, eap=0.0
+#
+# Average bitwidth: 5.1871
+# Total params: 22672834560
+# Total bits: 117606302351
+# Final KL: 0.001887
+# Final EAP: 0.985100
+# Final ETL: 0.014900
+# Satisfies constraints: True
+# Solver calls: 9
+# Evaluations: 9
+#
+# Bitwidth distribution:
+# 8-bit: 96 layers (22.2%)
+# 6-bit: 45 layers (10.4%)
+# 5-bit: 282 layers (65.3%)
+# 4-bit: 9 layers (2.1%)
+#
+model.layers.0.mlp.gate_proj: 5
+model.layers.0.mlp.up_proj: 5
+model.layers.1.mlp.gate_proj: 5
+model.layers.1.mlp.up_proj: 5
+model.layers.2.mlp.gate_proj: 5
+model.layers.2.mlp.up_proj: 5
+model.layers.3.mlp.gate_proj: 5
+model.layers.3.mlp.up_proj: 5
+model.layers.4.mlp.gate_proj: 5
+model.layers.4.mlp.up_proj: 5
+model.layers.5.mlp.gate_proj: 5
+model.layers.5.mlp.up_proj: 5
+model.layers.6.mlp.gate_proj: 5
+model.layers.6.mlp.up_proj: 5
+model.layers.7.mlp.gate_proj: 5
+model.layers.7.mlp.up_proj: 5
+model.layers.8.mlp.gate_proj: 5
+model.layers.8.mlp.up_proj: 5
+model.layers.9.mlp.gate_proj: 5
+model.layers.9.mlp.up_proj: 5
+model.layers.10.mlp.gate_proj: 5
+model.layers.10.mlp.up_proj: 5
+model.layers.11.mlp.gate_proj: 5
+model.layers.11.mlp.up_proj: 5
+model.layers.12.mlp.gate_proj: 5
+model.layers.12.mlp.up_proj: 5
+model.layers.13.mlp.gate_proj: 5
+model.layers.13.mlp.up_proj: 5
+model.layers.14.mlp.gate_proj: 5
+model.layers.14.mlp.up_proj: 5
+model.layers.15.mlp.gate_proj: 5
+model.layers.15.mlp.up_proj: 5
+model.layers.16.mlp.gate_proj: 5
+model.layers.16.mlp.up_proj: 5
+model.layers.17.mlp.gate_proj: 5
+model.layers.17.mlp.up_proj: 5
+model.layers.18.mlp.gate_proj: 5
+model.layers.18.mlp.up_proj: 5
+model.layers.19.mlp.gate_proj: 5
+model.layers.19.mlp.up_proj: 5
+model.layers.20.mlp.gate_proj: 5
+model.layers.20.mlp.up_proj: 5
+model.layers.21.mlp.gate_proj: 5
+model.layers.21.mlp.up_proj: 5
+model.layers.22.mlp.gate_proj: 5
+model.layers.22.mlp.up_proj: 5
+model.layers.23.mlp.gate_proj: 5
+model.layers.23.mlp.up_proj: 5
+model.layers.24.mlp.gate_proj: 5
+model.layers.24.mlp.up_proj: 5
+model.layers.25.mlp.gate_proj: 5
+model.layers.25.mlp.up_proj: 5
+model.layers.26.mlp.gate_proj: 5
+model.layers.26.mlp.up_proj: 5
+model.layers.27.mlp.gate_proj: 5
+model.layers.27.mlp.up_proj: 5
+model.layers.28.mlp.gate_proj: 5
+model.layers.28.mlp.up_proj: 5
+model.layers.29.mlp.gate_proj: 5
+model.layers.29.mlp.up_proj: 5
+model.layers.30.mlp.gate_proj: 5
+model.layers.30.mlp.up_proj: 5
+model.layers.31.mlp.gate_proj: 5
+model.layers.31.mlp.up_proj: 5
+model.layers.32.mlp.gate_proj: 5
+model.layers.32.mlp.up_proj: 5
+model.layers.33.mlp.gate_proj: 5
+model.layers.33.mlp.up_proj: 5
+model.layers.34.mlp.gate_proj: 5
+model.layers.34.mlp.up_proj: 5
+model.layers.35.mlp.gate_proj: 5
+model.layers.35.mlp.up_proj: 5
+model.layers.36.mlp.gate_proj: 5
+model.layers.36.mlp.up_proj: 5
+model.layers.37.mlp.gate_proj: 5
+model.layers.37.mlp.up_proj: 5
+model.layers.38.mlp.gate_proj: 5
+model.layers.38.mlp.up_proj: 5
+model.layers.39.mlp.gate_proj: 5
+model.layers.39.mlp.up_proj: 5
+model.layers.40.mlp.gate_proj: 5
+model.layers.40.mlp.up_proj: 5
+model.layers.41.mlp.gate_proj: 5
+model.layers.41.mlp.up_proj: 5
+model.layers.42.mlp.gate_proj: 5
+model.layers.42.mlp.up_proj: 5
+model.layers.43.mlp.gate_proj: 5
+model.layers.43.mlp.up_proj: 5
+model.layers.44.mlp.gate_proj: 5
+model.layers.44.mlp.up_proj: 5
+model.layers.45.mlp.gate_proj: 5
+model.layers.45.mlp.up_proj: 5
+model.layers.46.mlp.gate_proj: 5
+model.layers.46.mlp.up_proj: 5
+model.layers.47.mlp.gate_proj: 5
+model.layers.47.mlp.up_proj: 5
+model.layers.48.mlp.gate_proj: 5
+model.layers.48.mlp.up_proj: 5
+model.layers.49.mlp.gate_proj: 5
+model.layers.49.mlp.up_proj: 5
+model.layers.50.mlp.gate_proj: 5
+model.layers.50.mlp.up_proj: 5
+model.layers.51.mlp.gate_proj: 5
+model.layers.51.mlp.up_proj: 5
+model.layers.52.mlp.gate_proj: 5
+model.layers.52.mlp.up_proj: 5
+model.layers.53.mlp.gate_proj: 5
+model.layers.53.mlp.up_proj: 5
+model.layers.54.mlp.gate_proj: 5
+model.layers.54.mlp.up_proj: 5
+model.layers.55.mlp.gate_proj: 5
+model.layers.55.mlp.up_proj: 5
+model.layers.56.mlp.gate_proj: 5
+model.layers.56.mlp.up_proj: 5
+model.layers.57.mlp.gate_proj: 5
+model.layers.57.mlp.up_proj: 5
+model.layers.58.mlp.gate_proj: 5
+model.layers.58.mlp.up_proj: 5
+model.layers.59.mlp.gate_proj: 5
+model.layers.59.mlp.up_proj: 5
+model.layers.60.mlp.gate_proj: 4
+model.layers.60.mlp.up_proj: 4
+model.layers.61.mlp.gate_proj: 4
+model.layers.61.mlp.up_proj: 4
+model.layers.62.mlp.gate_proj: 5
+model.layers.62.mlp.up_proj: 5
+model.layers.63.mlp.gate_proj: 5
+model.layers.63.mlp.up_proj: 5
+model.layers.61.linear_attn.in_proj_b: 8
+model.layers.36.linear_attn.in_proj_qkv: 5
+model.layers.60.linear_attn.in_proj_a: 8
+model.layers.60.linear_attn.out_proj: 5
+model.layers.54.linear_attn.in_proj_z: 5
+model.layers.12.linear_attn.in_proj_qkv: 5
+model.layers.56.linear_attn.in_proj_b: 8
+model.layers.9.linear_attn.in_proj_z: 6
+model.layers.57.linear_attn.in_proj_a: 8
+model.layers.52.linear_attn.in_proj_z: 5
+model.layers.50.linear_attn.in_proj_b: 8
+model.layers.52.mlp.down_proj: 5
+model.layers.27.mlp.down_proj: 5
+model.layers.3.mlp.down_proj: 5
+model.layers.4.linear_attn.in_proj_qkv: 5
+model.layers.38.linear_attn.in_proj_b: 8
+model.layers.46.mlp.down_proj: 5
+model.layers.26.linear_attn.in_proj_qkv: 5
+model.layers.33.mlp.down_proj: 5
+model.layers.33.linear_attn.out_proj: 5
+model.layers.22.linear_attn.in_proj_qkv: 5
+model.layers.1.linear_attn.in_proj_b: 8
+model.layers.58.linear_attn.out_proj: 5
+model.layers.0.linear_attn.in_proj_qkv: 5
+model.layers.0.linear_attn.in_proj_a: 8
+model.layers.42.linear_attn.out_proj: 5
+model.layers.34.linear_attn.in_proj_b: 8
+model.layers.29.linear_attn.out_proj: 5
+model.layers.36.linear_attn.in_proj_z: 5
+model.layers.22.linear_attn.out_proj: 5
+model.layers.6.mlp.down_proj: 5
+model.layers.33.linear_attn.in_proj_a: 8
+model.layers.43.mlp.down_proj: 5
+model.layers.49.linear_attn.out_proj: 5
+model.layers.32.linear_attn.in_proj_b: 8
+model.layers.16.linear_attn.in_proj_qkv: 5
+model.layers.30.linear_attn.in_proj_z: 6
+model.layers.36.mlp.down_proj: 5
+model.layers.5.linear_attn.in_proj_z: 6
+model.layers.57.mlp.down_proj: 4
+model.layers.32.linear_attn.in_proj_qkv: 5
+model.layers.53.linear_attn.out_proj: 5
+model.layers.6.linear_attn.in_proj_a: 8
+model.layers.58.linear_attn.in_proj_z: 5
+model.layers.38.linear_attn.out_proj: 5
+model.layers.22.mlp.down_proj: 5
+model.layers.20.linear_attn.in_proj_z: 6
+model.layers.58.mlp.down_proj: 5
+model.layers.22.linear_attn.in_proj_b: 8
+model.layers.62.linear_attn.in_proj_qkv: 5
+model.layers.9.mlp.down_proj: 5
+model.layers.48.linear_attn.in_proj_z: 5
+model.layers.17.linear_attn.in_proj_z: 6
+model.layers.46.linear_attn.in_proj_qkv: 5
+model.layers.24.linear_attn.out_proj: 5
+model.layers.14.linear_attn.in_proj_a: 8
+model.layers.39.mlp.down_proj: 5
+model.layers.13.linear_attn.in_proj_b: 8
+model.layers.12.linear_attn.in_proj_a: 8
+model.layers.56.linear_attn.in_proj_qkv: 5
+model.layers.44.linear_attn.out_proj: 5
+model.layers.26.linear_attn.in_proj_z: 6
+model.layers.8.linear_attn.in_proj_qkv: 5
+model.layers.24.linear_attn.in_proj_b: 8
+model.layers.25.linear_attn.in_proj_a: 8
+model.layers.11.mlp.down_proj: 5
+model.layers.49.mlp.down_proj: 5
+model.layers.41.linear_attn.in_proj_a: 8
+model.layers.40.linear_attn.in_proj_b: 8
+model.layers.42.linear_attn.in_proj_z: 5
+model.layers.29.linear_attn.in_proj_a: 8
+model.layers.52.linear_attn.in_proj_qkv: 5
+model.layers.28.linear_attn.in_proj_b: 8
+model.layers.28.mlp.down_proj: 5
+model.layers.61.mlp.down_proj: 4
+model.layers.42.linear_attn.in_proj_qkv: 5
+model.layers.1.linear_attn.out_proj: 6
+model.layers.14.mlp.down_proj: 5
+model.layers.18.linear_attn.in_proj_a: 8
+model.layers.46.linear_attn.in_proj_b: 8
+model.layers.44.linear_attn.in_proj_z: 5
+model.layers.17.linear_attn.out_proj: 6
+model.layers.57.linear_attn.in_proj_b: 8
+model.layers.56.linear_attn.in_proj_a: 8
+model.layers.29.linear_attn.in_proj_qkv: 5
+model.layers.8.linear_attn.in_proj_z: 6
+model.layers.62.linear_attn.in_proj_z: 5
+model.layers.60.linear_attn.in_proj_b: 8
+model.layers.61.linear_attn.in_proj_a: 8
+model.layers.16.mlp.down_proj: 5
+model.layers.61.linear_attn.out_proj: 5
+model.layers.6.linear_attn.out_proj: 6
+model.layers.38.linear_attn.in_proj_a: 8
+model.layers.45.linear_attn.in_proj_qkv: 5
+model.layers.63.mlp.down_proj: 5
+model.layers.10.linear_attn.out_proj: 6
+model.layers.53.linear_attn.in_proj_z: 5
+model.layers.61.linear_attn.in_proj_qkv: 5
+model.layers.50.linear_attn.in_proj_a: 8
+model.layers.52.linear_attn.out_proj: 5
+model.layers.34.linear_attn.in_proj_a: 8
+model.layers.37.linear_attn.in_proj_z: 5
+model.layers.0.linear_attn.in_proj_b: 8
+model.layers.2.linear_attn.in_proj_z: 6
+model.layers.41.linear_attn.in_proj_qkv: 5
+model.layers.1.linear_attn.in_proj_a: 8
+model.layers.48.linear_attn.out_proj: 5
+model.layers.6.linear_attn.in_proj_b: 8
+model.layers.4.linear_attn.in_proj_z: 6
+model.layers.28.linear_attn.out_proj: 5
+model.layers.32.linear_attn.out_proj: 5
+model.layers.13.mlp.down_proj: 5
+model.layers.32.linear_attn.in_proj_a: 8
+model.layers.33.linear_attn.in_proj_b: 8
+model.layers.16.linear_attn.in_proj_z: 6
+model.layers.14.linear_attn.in_proj_b: 8
+model.layers.49.linear_attn.in_proj_z: 5
+model.layers.25.linear_attn.in_proj_qkv: 5
+model.layers.34.linear_attn.out_proj: 5
+model.layers.21.linear_attn.in_proj_z: 6
+model.layers.22.linear_attn.in_proj_a: 8
+model.layers.45.linear_attn.out_proj: 5
+model.layers.34.mlp.down_proj: 5
+model.layers.25.linear_attn.in_proj_b: 8
+model.layers.49.linear_attn.in_proj_qkv: 5
+model.layers.4.mlp.down_proj: 5
+model.layers.41.mlp.down_proj: 5
+model.layers.24.linear_attn.in_proj_a: 8
+model.layers.25.linear_attn.out_proj: 5
+model.layers.10.linear_attn.in_proj_z: 6
+model.layers.12.linear_attn.in_proj_b: 8
+model.layers.20.mlp.down_proj: 5
+model.layers.54.linear_attn.out_proj: 5
+model.layers.55.mlp.down_proj: 5
+model.layers.13.linear_attn.in_proj_a: 8
+model.layers.28.linear_attn.in_proj_a: 8
+model.layers.16.linear_attn.out_proj: 6
+model.layers.29.linear_attn.in_proj_b: 8
+model.layers.0.linear_attn.out_proj: 6
+model.layers.40.linear_attn.in_proj_a: 8
+model.layers.19.mlp.down_proj: 5
+model.layers.41.linear_attn.in_proj_b: 8
+model.layers.25.mlp.down_proj: 5
+model.layers.46.linear_attn.in_proj_a: 8
+model.layers.18.linear_attn.in_proj_b: 8
+model.layers.50.mlp.down_proj: 5
+model.layers.45.linear_attn.in_proj_z: 5
+model.layers.21.linear_attn.in_proj_qkv: 5
+model.layers.31.mlp.down_proj: 5
+model.layers.1.mlp.down_proj: 5
+model.layers.44.mlp.down_proj: 5
+model.layers.57.linear_attn.in_proj_qkv: 5
+model.layers.53.linear_attn.in_proj_a: 8
+model.layers.52.linear_attn.in_proj_b: 8
+model.layers.50.linear_attn.in_proj_z: 5
+model.layers.20.linear_attn.out_proj: 6
+model.layers.18.mlp.down_proj: 5
+model.layers.9.linear_attn.in_proj_qkv: 5
+model.layers.38.linear_attn.in_proj_z: 5
+model.layers.62.linear_attn.in_proj_a: 8
+model.layers.24.mlp.down_proj: 5
+model.layers.51.mlp.down_proj: 5
+model.layers.61.linear_attn.in_proj_z: 5
+model.layers.9.linear_attn.in_proj_b: 8
+model.layers.30.mlp.down_proj: 5
+model.layers.0.mlp.down_proj: 5
+model.layers.8.linear_attn.in_proj_a: 8
+model.layers.40.linear_attn.out_proj: 5
+model.layers.54.linear_attn.in_proj_b: 8
+model.layers.45.mlp.down_proj: 5
+model.layers.56.linear_attn.in_proj_z: 5
+model.layers.32.linear_attn.in_proj_z: 6
+model.layers.30.linear_attn.in_proj_b: 8
+model.layers.4.linear_attn.in_proj_a: 8
+model.layers.18.linear_attn.out_proj: 6
+model.layers.58.linear_attn.in_proj_b: 8
+model.layers.5.linear_attn.in_proj_b: 8
+model.layers.2.linear_attn.in_proj_a: 8
+model.layers.35.mlp.down_proj: 5
+model.layers.13.linear_attn.out_proj: 6
+model.layers.5.mlp.down_proj: 5
+model.layers.1.linear_attn.in_proj_z: 6
+model.layers.40.mlp.down_proj: 5
+model.layers.5.linear_attn.out_proj: 6
+model.layers.34.linear_attn.in_proj_z: 6
+model.layers.21.mlp.down_proj: 5
+model.layers.36.linear_attn.in_proj_b: 8
+model.layers.37.linear_attn.in_proj_a: 8
+model.layers.54.mlp.down_proj: 5
+model.layers.53.linear_attn.in_proj_qkv: 5
+model.layers.62.linear_attn.out_proj: 5
+model.layers.10.linear_attn.in_proj_a: 8
+model.layers.13.linear_attn.in_proj_z: 6
+model.layers.26.linear_attn.in_proj_b: 8
+model.layers.24.linear_attn.in_proj_z: 6
+model.layers.5.linear_attn.in_proj_qkv: 5
+model.layers.37.linear_attn.in_proj_qkv: 5
+model.layers.21.linear_attn.in_proj_a: 8
+model.layers.20.linear_attn.in_proj_b: 8
+model.layers.22.linear_attn.in_proj_z: 6
+model.layers.49.linear_attn.in_proj_a: 8
+model.layers.8.linear_attn.out_proj: 6
+model.layers.16.linear_attn.in_proj_a: 8
+model.layers.12.mlp.down_proj: 5
+model.layers.48.linear_attn.in_proj_b: 8
+model.layers.17.linear_attn.in_proj_b: 8
+model.layers.13.linear_attn.in_proj_qkv: 5
+model.layers.46.linear_attn.out_proj: 5
+model.layers.17.linear_attn.in_proj_qkv: 5
+model.layers.46.linear_attn.in_proj_z: 5
+model.layers.44.linear_attn.in_proj_b: 8
+model.layers.37.linear_attn.out_proj: 5
+model.layers.45.linear_attn.in_proj_a: 8
+model.layers.33.linear_attn.in_proj_qkv: 5
+model.layers.57.linear_attn.out_proj: 5
+model.layers.40.linear_attn.in_proj_z: 5
+model.layers.1.linear_attn.in_proj_qkv: 6
+model.layers.17.mlp.down_proj: 5
+model.layers.42.linear_attn.in_proj_b: 8
+model.layers.62.mlp.down_proj: 4
+model.layers.28.linear_attn.in_proj_z: 6
+model.layers.26.linear_attn.out_proj: 5
+model.layers.48.linear_attn.in_proj_qkv: 5
+model.layers.48.mlp.down_proj: 5
+model.layers.41.linear_attn.out_proj: 5
+model.layers.10.linear_attn.in_proj_qkv: 5
+model.layers.34.linear_attn.in_proj_qkv: 5
+model.layers.52.linear_attn.in_proj_a: 8
+model.layers.30.linear_attn.out_proj: 5
+model.layers.53.linear_attn.in_proj_b: 8
+model.layers.29.mlp.down_proj: 5
+model.layers.24.linear_attn.in_proj_qkv: 5
+model.layers.54.linear_attn.in_proj_a: 8
+model.layers.60.mlp.down_proj: 4
+model.layers.8.linear_attn.in_proj_b: 8
+model.layers.6.linear_attn.in_proj_qkv: 5
+model.layers.57.linear_attn.in_proj_z: 5
+model.layers.9.linear_attn.in_proj_a: 8
+model.layers.50.linear_attn.out_proj: 5
+model.layers.15.mlp.down_proj: 5
+model.layers.58.linear_attn.in_proj_qkv: 5
+model.layers.62.linear_attn.in_proj_b: 8
+model.layers.21.linear_attn.out_proj: 6
+model.layers.60.linear_attn.in_proj_z: 5
+model.layers.59.mlp.down_proj: 4
+model.layers.5.linear_attn.in_proj_a: 8
+model.layers.4.linear_attn.out_proj: 6
+model.layers.58.linear_attn.in_proj_a: 8
+model.layers.6.linear_attn.in_proj_z: 6
+model.layers.4.linear_attn.in_proj_b: 8
+model.layers.8.mlp.down_proj: 5
+model.layers.33.linear_attn.in_proj_z: 6
+model.layers.2.linear_attn.in_proj_qkv: 5
+model.layers.30.linear_attn.in_proj_a: 8
+model.layers.38.mlp.down_proj: 5
+model.layers.20.linear_attn.in_proj_qkv: 5
+model.layers.12.linear_attn.out_proj: 6
+model.layers.37.linear_attn.in_proj_b: 8
+model.layers.36.linear_attn.in_proj_a: 8
+model.layers.30.linear_attn.in_proj_qkv: 5
+model.layers.14.linear_attn.in_proj_qkv: 5
+model.layers.0.linear_attn.in_proj_z: 6
+model.layers.2.linear_attn.in_proj_b: 8
+model.layers.10.mlp.down_proj: 5
+model.layers.44.linear_attn.in_proj_qkv: 5
+model.layers.9.linear_attn.out_proj: 6
+model.layers.26.linear_attn.in_proj_a: 8
+model.layers.25.linear_attn.in_proj_z: 6
+model.layers.38.linear_attn.in_proj_qkv: 5
+model.layers.10.linear_attn.in_proj_b: 8
+model.layers.60.linear_attn.in_proj_qkv: 5
+model.layers.12.linear_attn.in_proj_z: 6
+model.layers.7.mlp.down_proj: 5
+model.layers.28.linear_attn.in_proj_qkv: 5
+model.layers.17.linear_attn.in_proj_a: 8
+model.layers.48.linear_attn.in_proj_a: 8
+model.layers.42.mlp.down_proj: 5
+model.layers.14.linear_attn.out_proj: 6
+model.layers.16.linear_attn.in_proj_b: 8
+model.layers.14.linear_attn.in_proj_z: 6
+model.layers.37.mlp.down_proj: 5
+model.layers.49.linear_attn.in_proj_b: 8
+model.layers.20.linear_attn.in_proj_a: 8
+model.layers.54.linear_attn.in_proj_qkv: 5
+model.layers.56.mlp.down_proj: 5
+model.layers.21.linear_attn.in_proj_b: 8
+model.layers.23.mlp.down_proj: 5
+model.layers.2.linear_attn.out_proj: 6
+model.layers.18.linear_attn.in_proj_z: 6
+model.layers.45.linear_attn.in_proj_b: 8
+model.layers.50.linear_attn.in_proj_qkv: 5
+model.layers.44.linear_attn.in_proj_a: 8
+model.layers.56.linear_attn.out_proj: 5
+model.layers.36.linear_attn.out_proj: 5
+model.layers.53.mlp.down_proj: 5
+model.layers.29.linear_attn.in_proj_z: 6
+model.layers.26.mlp.down_proj: 5
+model.layers.2.mlp.down_proj: 5
+model.layers.41.linear_attn.in_proj_z: 5
+model.layers.18.linear_attn.in_proj_qkv: 5
+model.layers.47.mlp.down_proj: 5
+model.layers.42.linear_attn.in_proj_a: 8
+model.layers.40.linear_attn.in_proj_qkv: 5
+model.layers.32.mlp.down_proj: 5
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/tokenizer_config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6be6ce1780cf43bd47577fbb76e74aee6db89f21
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/tokenizer_config.json
@@ -0,0 +1,31 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "model_max_length": 262144,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/video_preprocessor_config.json b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/video_preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ba673a5ad7d4d13f54155ecd38b2a94a6dac8fe
--- /dev/null
+++ b/Qwen3.5-27B/ll_bsearch_kl0.005_eap0.985_lin_bw5.19_4-5-6-8bit_grouped_seed42/video_preprocessor_config.json
@@ -0,0 +1,21 @@
+{
+ "size": {
+ "longest_edge": 25165824,
+ "shortest_edge": 4096
+ },
+ "patch_size": 16,
+ "temporal_patch_size": 2,
+ "merge_size": 2,
+ "image_mean": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "image_std": [
+ 0.5,
+ 0.5,
+ 0.5
+ ],
+ "processor_class": "Qwen3VLProcessor",
+ "video_processor_type": "Qwen3VLVideoProcessor"
+}
\ No newline at end of file
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00001-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00001-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..33498ea035c18a5c6517a22eb43dabfc4c539128
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00001-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:811f0cc20a13e264e2cf10d9f370974735ebe24e3a5780af578288aa5be49b81
+size 4584408792
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00006-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00006-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3aadcd20bab03ecc03ddcf834f86d70e56d16b4c
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00006-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a3e05127ff7055e9e5973668121d314c7c1181f9ff27edd0f8ad16c2ac716a1
+size 4664167384
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00010-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00010-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..34d4d17b45ecdc58fb7712f4b6553791f31b9d6b
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00010-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9addab7bdb80f98acc88a5e4de2f8e811e62ef4c64dfdeb0a3e0d03bc3f10bfc
+size 4664134384
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00011-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00011-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c7956c39f909cc5e60973e80edf6163c81ff5c7
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00011-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c836f78c332014ddee87ce1c3e785ad9e745d7b812ae852a0b94797475bc386
+size 4664167384
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00013-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00013-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4848522dc5f728d086506fcd78b4991c99259ad
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00013-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9ec46b2d4823e38e963ab3683507f33e7bcfbcf8028f2df4a1abf1f455075ef
+size 4999711704
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00014-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00014-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..05fba471617702eb8c1cacfbc98ab529c25af700
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00014-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf4b53980cb73c5d6716e5c1e9901cf3e1c25947dad98790d677792b4a38cc1d
+size 4966157032
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00015-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00015-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0df70cc384fa77bca08789c4a4be1a4ba3caa02d
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00015-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c405a59c9a022b8ea52716d168be1b1336cbeb08dc53369861558b9ff894c5d
+size 4664134384
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00018-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00018-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d4c9c095ea50ab896f0ff99f87e9f2e85bdce46
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00018-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb637f85a83e2e1f79c9b90f6918ed08ea03ba0ee75f4fca7e1131234bf43557
+size 4999711704
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00019-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00019-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ef2045ef64a5093bdd102797f27fd4220a67d8f
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00019-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f4e18a1dc811fa4074495356d801459e7f7920ae4aebf4453241cab819b4358
+size 4966157032
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00022-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00022-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a47e40a1a6152eee8dee25caad84862d7280beda
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00022-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e039b5cb65929ec0ad929a65476ee4a2c8ed224b1943127cff37b6416223a9d0
+size 4664167376
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00023-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00023-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3dc5ab5e9dc6518d00735dc02a73b83ce5ac4f70
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00023-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad75d44ce277e15a5f42b269f948ab5faa5fee15936497efa84d3d82136099d2
+size 4999711704
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00024-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00024-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b102a8fff6bade4a0dfa0b692dd6ae6b3f010c48
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00024-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:215dc7cab538ca5e56d7da030a01cf8a8c73107eedb42b44a9aac50e611e31d3
+size 4966157032
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00026-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00026-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef668a974a7d4cb9affa7b288ee7cb85bd9c643e
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00026-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:145a7b63968a1fcfd0e8bd342563d9dd41a72aeb26cc1f2b80ceb79f71432b19
+size 4664167384
diff --git a/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00029-of-00030.safetensors b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00029-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8687b42550305d81adf93e3eee774539143948b9
--- /dev/null
+++ b/humming/Llama-3.3-70B-Instruct/Llama-3.3-70B-Instruct/ll_bsearch_kl0.01_eap0.985_sha_bw6.55_4-5-6-7-8bit_seed42/model-00029-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a199692ecf760e90fd1a6f3ebcb867ff8c558303fbe7e818d46ad88f84f2df
+size 4966173512
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00002-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00002-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90c8782f45b9e55b668380f0febb7b982d6b50f5
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00002-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89b3955bcd3767c42ddb4e830136893cb2787132d360166239cee957a5de6d23
+size 4875989640
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00004-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00004-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89fb44dccc81aa315391619b768ed55a1eeb23bd
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00004-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b9dbedd22b1dbcb9471108e5411ccabff3418c850e0c9149ec8474bbccb174a
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00007-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00007-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0169aaa2ff1660a4f3c2d35a5f126ba26f9aec7e
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00007-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cb9ec01b52fa85fbc37c443e439d4ed2a6124d82d6641f020b4c3e41ccc4134
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00010-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00010-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6ae36eb94ed0dd7e8c528c4a44cddd97120a6d9
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00010-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df0e39f9035e52a8c021adef81f220433c233f17eb62ed48d6795784f0490238
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00011-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00011-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e8782469b620fa1df4d14da47789d6eff4121cb5
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00011-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fad98056554d27ef2154efff9c1f9ad89241371b02a75d03ad81c5b3cb6e2562
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00012-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00012-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..993667737e754245f9f73457c6ac2d84c3112249
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00012-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25e050568252c82d2e64d5d62264783ad888b0df355ac4673e91fbc04d99c44f
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00013-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00013-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3bee82bd5110870abde06c33ff42f1aefb5a4ee
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bsearch_kl0.005_eap0.99_sha_bw4.98_4-5-6-7-8bit_seed42/model-00013-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23b7fb013d4449db09e80adabe125418e5c2ff21efb435259055adeb9c19bd1a
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00001-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00001-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c2b0d73382aa9ee35ac1d3ef3b7f43e233394e76
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00001-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a4d4e7c83617d9ae753a7d29eb5198e3b7f56989a692f25538d435dbab5a383
+size 4932307544
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00002-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00002-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bda3cf6b20bb390689191cbf4373bf60648a69e9
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00002-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd82d75f6714709eca1ee19e634013a35f38ff59bfa5c0d82d60b2098093440c
+size 4875989640
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00008-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00008-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f7fce98453bea8587be76074085f243c47a4878
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00008-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f49b39e539d682e774efdf69e79bb30361478275dea6206b6c1841f1897c89f
+size 4875989696
diff --git a/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00013-of-00014.safetensors b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00013-of-00014.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3bee82bd5110870abde06c33ff42f1aefb5a4ee
--- /dev/null
+++ b/humming/Qwen3-32B/Qwen3-32B/ll_bw8.16_8bit/model-00013-of-00014.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23b7fb013d4449db09e80adabe125418e5c2ff21efb435259055adeb9c19bd1a
+size 4875989696
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/README.md b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d70d302176840f0eaa956b5bf961008db5fadb16
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/README.md
@@ -0,0 +1,9 @@
+# Quantized Model Checkpoint
+
+**Base model:** Qwen/Qwen3-8B
+
+**Average bitwidth:** 8.156
+
+**Sensitivity method:** shapley
+
+See `quantization_config.txt` for full configuration details.
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/added_tokens.json b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..b54f9135e44c1e81047e8d05cb027af8bc039eed
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/added_tokens.json
@@ -0,0 +1,28 @@
+{
+ "": 151668,
+ "": 151658,
+ "": 151666,
+ "": 151667,
+ "": 151657,
+ "": 151665,
+ "<|box_end|>": 151649,
+ "<|box_start|>": 151648,
+ "<|endoftext|>": 151643,
+ "<|file_sep|>": 151664,
+ "<|fim_middle|>": 151660,
+ "<|fim_pad|>": 151662,
+ "<|fim_prefix|>": 151659,
+ "<|fim_suffix|>": 151661,
+ "<|im_end|>": 151645,
+ "<|im_start|>": 151644,
+ "<|image_pad|>": 151655,
+ "<|object_ref_end|>": 151647,
+ "<|object_ref_start|>": 151646,
+ "<|quad_end|>": 151651,
+ "<|quad_start|>": 151650,
+ "<|repo_name|>": 151663,
+ "<|video_pad|>": 151656,
+ "<|vision_end|>": 151653,
+ "<|vision_pad|>": 151654,
+ "<|vision_start|>": 151652
+}
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/chat_template.jinja b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..01be9b307daa2d425f7c168c9fb145a286e0afb4
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/chat_template.jinja
@@ -0,0 +1,89 @@
+{%- if tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if messages[0].role == 'system' %}
+ {{- messages[0].content + '\n\n' }}
+ {%- endif %}
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+ {%- if message.content is string %}
+ {%- set content = message.content %}
+ {%- else %}
+ {%- set content = '' %}
+ {%- endif %}
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {%- if loop.last or (not loop.last and reasoning_content) %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if (loop.first and content) or (not loop.first) %}
+ {{- '\n' }}
+ {%- endif %}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n{"name": "' }}
+ {{- tool_call.name }}
+ {{- '", "arguments": ' }}
+ {%- if tool_call.arguments is string %}
+ {{- tool_call.arguments }}
+ {%- else %}
+ {{- tool_call.arguments | tojson }}
+ {%- endif %}
+ {{- '}\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/humming_online_quant_config.json b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/humming_online_quant_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d73fd43702e20990aee6d43cadcbe7a32521689
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/humming_online_quant_config.json
@@ -0,0 +1,5 @@
+{
+ "quant_method": "gptq",
+ "bits": 8,
+ "group_size": 128
+}
\ No newline at end of file
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/model.safetensors.index.json b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..991332e2a7dada9479948259f715fe1f6c69db54
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/model.safetensors.index.json
@@ -0,0 +1,407 @@
+{
+ "metadata": {
+ "total_parameters": 8190735360,
+ "total_size": 16381470720
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00004-of-00004.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+ "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+ "model.norm.weight": "model-00004-of-00004.safetensors"
+ }
+}
diff --git a/humming/Qwen3-8B/ll_sha_bw8.16_8bit/quantization_config.txt b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/quantization_config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a36288aaeb969f74481af6be05d56817dbe7d373
--- /dev/null
+++ b/humming/Qwen3-8B/ll_sha_bw8.16_8bit/quantization_config.txt
@@ -0,0 +1,266 @@
+# Model: Qwen/Qwen3-8B
+# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Qwen3-8B/4bit
+# Sensitivity method: shapley
+# Estimation method: permutation_separate
+# Available bitwidths: [4, 5, 6, 7, 8]
+# Bitwidth map: {4: 4.156, 5: 5.156, 6: 6.156, 7: 7.156, 8: 8.156}
+#
+# Average bitwidth: 8.156
+# Total params: 8
+# Total bits: 8
+#
+# Bitwidth distribution:
+# 8-bit: 252
+#
+model.layers.23.self_attn.k_proj: 8
+model.layers.22.self_attn.o_proj: 8
+model.layers.4.self_attn.o_proj: 8
+model.layers.1.mlp.down_proj: 8
+model.layers.11.mlp.up_proj: 8
+model.layers.5.self_attn.k_proj: 8
+model.layers.14.self_attn.q_proj: 8
+model.layers.17.mlp.down_proj: 8
+model.layers.33.mlp.gate_proj: 8
+model.layers.5.self_attn.v_proj: 8
+model.layers.23.self_attn.v_proj: 8
+model.layers.27.mlp.gate_proj: 8
+model.layers.2.mlp.up_proj: 8
+model.layers.15.self_attn.q_proj: 8
+model.layers.23.self_attn.o_proj: 8
+model.layers.21.mlp.up_proj: 8
+model.layers.22.self_attn.k_proj: 8
+model.layers.4.mlp.down_proj: 8
+model.layers.4.self_attn.k_proj: 8
+model.layers.5.self_attn.o_proj: 8
+model.layers.31.mlp.up_proj: 8
+model.layers.4.self_attn.v_proj: 8
+model.layers.22.self_attn.v_proj: 8
+model.layers.22.mlp.gate_proj: 8
+model.layers.8.mlp.gate_proj: 8
+model.layers.12.mlp.down_proj: 8
+model.layers.7.mlp.gate_proj: 8
+model.layers.35.self_attn.o_proj: 8
+model.layers.21.self_attn.v_proj: 8
+model.layers.34.self_attn.k_proj: 8
+model.layers.7.self_attn.v_proj: 8
+model.layers.7.self_attn.k_proj: 8
+model.layers.6.self_attn.o_proj: 8
+model.layers.34.self_attn.v_proj: 8
+model.layers.20.self_attn.o_proj: 8
+model.layers.13.mlp.up_proj: 8
+model.layers.21.self_attn.k_proj: 8
+model.layers.35.mlp.down_proj: 8
+model.layers.28.self_attn.q_proj: 8
+model.layers.11.mlp.gate_proj: 8
+model.layers.16.self_attn.q_proj: 8
+model.layers.21.mlp.down_proj: 8
+model.layers.35.self_attn.k_proj: 8
+model.layers.33.mlp.up_proj: 8
+model.layers.20.self_attn.v_proj: 8
+model.layers.34.self_attn.o_proj: 8
+model.layers.6.self_attn.v_proj: 8
+model.layers.28.mlp.gate_proj: 8
+model.layers.2.mlp.gate_proj: 8
+model.layers.18.mlp.down_proj: 8
+model.layers.17.self_attn.q_proj: 8
+model.layers.0.mlp.up_proj: 8
+model.layers.24.mlp.down_proj: 8
+model.layers.23.mlp.up_proj: 8
+model.layers.7.self_attn.o_proj: 8
+model.layers.6.self_attn.k_proj: 8
+model.layers.29.self_attn.q_proj: 8
+model.layers.20.self_attn.k_proj: 8
+model.layers.30.mlp.down_proj: 8
+model.layers.14.mlp.gate_proj: 8
+model.layers.21.self_attn.o_proj: 8
+model.layers.35.self_attn.v_proj: 8
+model.layers.6.mlp.up_proj: 8
+model.layers.9.self_attn.q_proj: 8
+model.layers.0.self_attn.k_proj: 8
+model.layers.9.mlp.up_proj: 8
+model.layers.1.self_attn.o_proj: 8
+model.layers.33.self_attn.v_proj: 8
+model.layers.27.self_attn.o_proj: 8
+model.layers.26.self_attn.k_proj: 8
+model.layers.19.self_attn.o_proj: 8
+model.layers.0.mlp.gate_proj: 8
+model.layers.11.self_attn.q_proj: 8
+model.layers.18.self_attn.k_proj: 8
+model.layers.25.mlp.up_proj: 8
+model.layers.26.mlp.down_proj: 8
+model.layers.18.self_attn.v_proj: 8
+model.layers.35.mlp.up_proj: 8
+model.layers.32.self_attn.o_proj: 8
+model.layers.26.self_attn.v_proj: 8
+model.layers.33.self_attn.k_proj: 8
+model.layers.32.mlp.down_proj: 8
+model.layers.16.mlp.gate_proj: 8
+model.layers.0.self_attn.v_proj: 8
+model.layers.15.mlp.up_proj: 8
+model.layers.19.self_attn.k_proj: 8
+model.layers.10.self_attn.q_proj: 8
+model.layers.5.mlp.gate_proj: 8
+model.layers.18.self_attn.o_proj: 8
+model.layers.0.self_attn.o_proj: 8
+model.layers.1.self_attn.k_proj: 8
+model.layers.8.self_attn.q_proj: 8
+model.layers.27.self_attn.k_proj: 8
+model.layers.26.self_attn.o_proj: 8
+model.layers.32.self_attn.v_proj: 8
+model.layers.9.mlp.down_proj: 8
+model.layers.32.self_attn.k_proj: 8
+model.layers.13.mlp.gate_proj: 8
+model.layers.27.self_attn.v_proj: 8
+model.layers.33.self_attn.o_proj: 8
+model.layers.1.self_attn.v_proj: 8
+model.layers.23.mlp.down_proj: 8
+model.layers.19.self_attn.v_proj: 8
+model.layers.2.self_attn.v_proj: 8
+model.layers.6.mlp.down_proj: 8
+model.layers.31.self_attn.k_proj: 8
+model.layers.24.self_attn.v_proj: 8
+model.layers.30.self_attn.o_proj: 8
+model.layers.24.self_attn.k_proj: 8
+model.layers.4.mlp.up_proj: 8
+model.layers.25.self_attn.o_proj: 8
+model.layers.31.self_attn.v_proj: 8
+model.layers.20.mlp.gate_proj: 8
+model.layers.3.self_attn.o_proj: 8
+model.layers.2.self_attn.k_proj: 8
+model.layers.10.mlp.down_proj: 8
+model.layers.34.mlp.gate_proj: 8
+model.layers.13.self_attn.q_proj: 8
+model.layers.28.mlp.up_proj: 8
+model.layers.27.mlp.up_proj: 8
+model.layers.3.mlp.down_proj: 8
+model.layers.3.self_attn.v_proj: 8
+model.layers.19.mlp.gate_proj: 8
+model.layers.31.self_attn.o_proj: 8
+model.layers.25.self_attn.v_proj: 8
+model.layers.30.self_attn.k_proj: 8
+model.layers.29.mlp.down_proj: 8
+model.layers.18.mlp.up_proj: 8
+model.layers.15.mlp.down_proj: 8
+model.layers.17.mlp.up_proj: 8
+model.layers.31.mlp.gate_proj: 8
+model.layers.12.self_attn.q_proj: 8
+model.layers.30.self_attn.v_proj: 8
+model.layers.24.self_attn.o_proj: 8
+model.layers.25.self_attn.k_proj: 8
+model.layers.25.mlp.gate_proj: 8
+model.layers.3.self_attn.k_proj: 8
+model.layers.2.self_attn.o_proj: 8
+model.layers.2.mlp.down_proj: 8
+model.layers.16.self_attn.o_proj: 8
+model.layers.18.mlp.gate_proj: 8
+model.layers.17.self_attn.k_proj: 8
+model.layers.6.self_attn.q_proj: 8
+model.layers.28.mlp.down_proj: 8
+model.layers.28.self_attn.o_proj: 8
+model.layers.20.self_attn.q_proj: 8
+model.layers.29.self_attn.k_proj: 8
+model.layers.29.self_attn.v_proj: 8
+model.layers.14.mlp.down_proj: 8
+model.layers.35.self_attn.q_proj: 8
+model.layers.30.mlp.gate_proj: 8
+model.layers.24.mlp.gate_proj: 8
+model.layers.14.mlp.up_proj: 8
+model.layers.17.self_attn.v_proj: 8
+model.layers.7.self_attn.q_proj: 8
+model.layers.28.self_attn.k_proj: 8
+model.layers.21.self_attn.q_proj: 8
+model.layers.29.self_attn.o_proj: 8
+model.layers.7.mlp.down_proj: 8
+model.layers.16.self_attn.k_proj: 8
+model.layers.17.self_attn.o_proj: 8
+model.layers.34.mlp.up_proj: 8
+model.layers.21.mlp.gate_proj: 8
+model.layers.16.self_attn.v_proj: 8
+model.layers.24.mlp.up_proj: 8
+model.layers.34.self_attn.q_proj: 8
+model.layers.8.mlp.up_proj: 8
+model.layers.11.mlp.down_proj: 8
+model.layers.7.mlp.up_proj: 8
+model.layers.35.mlp.gate_proj: 8
+model.layers.28.self_attn.v_proj: 8
+model.layers.4.mlp.gate_proj: 8
+model.layers.16.mlp.up_proj: 8
+model.layers.15.self_attn.v_proj: 8
+model.layers.19.mlp.up_proj: 8
+model.layers.8.mlp.down_proj: 8
+model.layers.12.mlp.gate_proj: 8
+model.layers.15.self_attn.k_proj: 8
+model.layers.14.self_attn.o_proj: 8
+model.layers.22.self_attn.q_proj: 8
+model.layers.22.mlp.down_proj: 8
+model.layers.4.self_attn.q_proj: 8
+model.layers.14.self_attn.v_proj: 8
+model.layers.26.mlp.up_proj: 8
+model.layers.29.mlp.up_proj: 8
+model.layers.5.mlp.up_proj: 8
+model.layers.1.mlp.gate_proj: 8
+model.layers.27.mlp.down_proj: 8
+model.layers.23.self_attn.q_proj: 8
+model.layers.5.self_attn.q_proj: 8
+model.layers.33.mlp.down_proj: 8
+model.layers.17.mlp.gate_proj: 8
+model.layers.15.self_attn.o_proj: 8
+model.layers.14.self_attn.k_proj: 8
+model.layers.12.self_attn.k_proj: 8
+model.layers.13.self_attn.o_proj: 8
+model.layers.29.mlp.gate_proj: 8
+model.layers.25.self_attn.q_proj: 8
+model.layers.30.mlp.up_proj: 8
+model.layers.3.mlp.gate_proj: 8
+model.layers.19.mlp.down_proj: 8
+model.layers.3.self_attn.q_proj: 8
+model.layers.20.mlp.up_proj: 8
+model.layers.30.self_attn.q_proj: 8
+model.layers.25.mlp.down_proj: 8
+model.layers.12.self_attn.v_proj: 8
+model.layers.3.mlp.up_proj: 8
+model.layers.31.mlp.down_proj: 8
+model.layers.15.mlp.gate_proj: 8
+model.layers.24.self_attn.q_proj: 8
+model.layers.2.self_attn.q_proj: 8
+model.layers.6.mlp.gate_proj: 8
+model.layers.12.self_attn.o_proj: 8
+model.layers.13.self_attn.k_proj: 8
+model.layers.13.self_attn.v_proj: 8
+model.layers.34.mlp.down_proj: 8
+model.layers.10.mlp.gate_proj: 8
+model.layers.10.mlp.up_proj: 8
+model.layers.20.mlp.down_proj: 8
+model.layers.31.self_attn.q_proj: 8
+model.layers.22.mlp.up_proj: 8
+model.layers.32.self_attn.q_proj: 8
+model.layers.8.self_attn.v_proj: 8
+model.layers.5.mlp.down_proj: 8
+model.layers.10.self_attn.v_proj: 8
+model.layers.1.mlp.up_proj: 8
+model.layers.11.self_attn.o_proj: 8
+model.layers.10.self_attn.k_proj: 8
+model.layers.19.self_attn.q_proj: 8
+model.layers.23.mlp.gate_proj: 8
+model.layers.8.self_attn.k_proj: 8
+model.layers.32.mlp.up_proj: 8
+model.layers.1.self_attn.q_proj: 8
+model.layers.9.self_attn.o_proj: 8
+model.layers.9.mlp.gate_proj: 8
+model.layers.13.mlp.down_proj: 8
+model.layers.27.self_attn.q_proj: 8
+model.layers.0.mlp.down_proj: 8
+model.layers.11.self_attn.v_proj: 8
+model.layers.33.self_attn.q_proj: 8
+model.layers.9.self_attn.v_proj: 8
+model.layers.12.mlp.up_proj: 8
+model.layers.8.self_attn.o_proj: 8
+model.layers.0.self_attn.q_proj: 8
+model.layers.9.self_attn.k_proj: 8
+model.layers.26.self_attn.q_proj: 8
+model.layers.16.mlp.down_proj: 8
+model.layers.32.mlp.gate_proj: 8
+model.layers.18.self_attn.q_proj: 8
+model.layers.11.self_attn.k_proj: 8
+model.layers.10.self_attn.o_proj: 8
+model.layers.26.mlp.gate_proj: 8