# taken from sophosympatheia/New-Dawn-Llama-3.1-70B-v1.1 # merge_method: della_linear base_model: NousResearch/Meta-Llama-3.1-70B-Instruct models: - model: tokyotech-llm/Llama-3-Swallow-70B-v0.1 parameters: weight: - filter: v_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: o_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: up_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: gate_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: down_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - value: 0 density: 0.25 epsilon: 0.05 lambda: 1.0 - model: NousResearch/Meta-Llama-3.1-70B-Instruct parameters: weight: 1.0 density: - filter: v_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: o_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: up_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: gate_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: down_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - value: 0.5 epsilon: - filter: v_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: o_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: up_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: gate_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: down_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - value: 0.1 lambda: 1.0 dtype: float16 tokenizer_source: base