slices: | |
- sources: | |
- model: LeroyDyer/Mixtral_AI_CyberBrain_3_0 | |
layer_range: [0, 32] | |
- model: ezelikman/quietstar-8-ahead | |
layer_range: [0, 32] | |
# or, the equivalent models: syntax: | |
# models: | |
# - model: mistralai/Mistral-7B-Instruct-v0.2 | |
# LaRGER MODEL MUST BE BASE | |
# - model: yanismiraoui/Yarn-Mistral-7b-128k-sharded | |
merge_method: slerp | |
base_model: ezelikman/quietstar-8-ahead | |
parameters: | |
t: | |
- filter: self_attn | |
value: [0.3, 0.6, 0.3786, 0.6, 0.6] | |
- filter: mlp | |
value: [0.7, 0.4, 0.6, 0.4, 0.7] | |
- value: 0.5 # fallback for rest of tensors | |
dtype: float16 | |