metadata
license: apache-2.0
tags:
- merge
- mergekit
- Gille/StrangeMerges_32-7B-slerp
- yam-peleg/Experiment26-7B
shadow-clown-7B-slerp
shadow-clown-7B-slerp is a DARE merge of the following models using mergekit:
See the paper Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch for more on the method.
🧩 Configuration
slices:
- sources:
- model: CorticalStack/pastiche-crown-clown-7b-dare-dpo
layer_range: [0, 32]
- model: MSL7/INEX12-7b
layer_range: [0, 32]
merge_method: slerp
base_model: CorticalStack/pastiche-crown-clown-7b-dare-dpo
parameters:
t:
- filter: self_attn
value: [0, 0.5, 0.3, 0.7, 1]
- filter: mlp
value: [1, 0.5, 0.7, 0.3, 0]
- value: 0.5
dtype: bfloat16
slices:
- sources:
- model: liminerity/M7-7b
layer_range: [0, 32]
- model: CorticalStack/pastiche-crown-clown-7b-dare-dpo
layer_range: [0, 32]
merge_method: slerp
base_model: liminerity/M7-7b
parameters:
t:
- filter: self_attn
value: [0, 0.5, 0.3, 0.7, 1]
- filter: mlp
value: [1, 0.5, 0.7, 0.3, 0]
- value: 0.5
dtype: bfloat16
slices:
- sources:
- model: ammarali32/multi_verse_model
layer_range: [0, 32]
- model: liminerity/merge
layer_range: [0, 32]
merge_method: slerp
base_model: ammarali32/multi_verse_model
parameters:
t:
- filter: self_attn
value: [0, 0.5, 0.3, 0.7, 1]
- filter: mlp
value: [1, 0.5, 0.7, 0.3, 0]
- value: 0.5
dtype: bfloat16
slices:
- sources:
- model: Gille/StrangeMerges_32-7B-slerp
layer_range: [0, 32]
- model: yam-peleg/Experiment26-7B
layer_range: [0, 32]
merge_method: slerp
base_model: Gille/StrangeMerges_32-7B-slerp
parameters:
t:
- filter: self_attn
value: [0, 0.5, 0.3, 0.7, 1]
- filter: mlp
value: [1, 0.5, 0.7, 0.3, 0]
- value: 0.5
dtype: bfloat16