models: - model: tokyotech-llm/Swallow-7b-instruct-hf # no parameters necessary for base model - model: allenai/tulu-2-dpo-7b # follow user intent parameters: density: 1 weight: - filter: mlp.down_proj value: [0.45, 0.10, 0.45, 0.10, 0.45, 0.10, 0.45, 0.10, 0.10] - filter: mlp.gate_proj value: [0.70, 0.10, 0.45, 0.10, 0.45, 0.10, 0.45, 0.10, 0.10] - filter: mlp.up_proj value: [0.70, 0.10, 0.45, 0.10, 0.45, 0.10, 0.45, 0.10, 0.10] - filter: self_attn value: [0.70, 0.45, 0.10, 0.45, 0.10, 0.45, 0.10, 0.45, 0.45] - value: 0 # fallback for rest of tensors. merge_method: dare_ties base_model: tokyotech-llm/Swallow-7b-instruct-hf dtype: bfloat16 tokenizer_source: union name: Superswallow-7b-v0.2-flavor --- slices: - sources: - model: nitky/Superswallow-7b-baseline layer_range: [0, 32] - model: Superswallow-7b-v0.2-flavor layer_range: [0, 32] merge_method: slerp base_model: nitky/Superswallow-7b-baseline parameters: t: # model stabilization - filter: self_attn value: [0, 0.5, 0.3, 0.7, 1] - filter: mlp value: [1, 0.5, 0.7, 0.3, 0] - value: 0.5 # fallback for rest of tensors dtype: bfloat16 name: Superswallow-7b-v0.2