dtype: bfloat16 | |
merge_method: linear | |
slices: | |
- sources: | |
- layer_range: [0, 32] # Assuming the first half of the model is more general and can be reduced more | |
model: NousResearch/Meta-Llama-3-8B-Instruct | |
parameters: | |
weight: 1.0 # Reduce the weight of the first half to make room for the second half | |
- layer_range: [0, 32] # Assuming the second half of the model is more specialized and can be reduced less | |
model: NousResearch/Meta-Llama-3-8B-Instruct | |
parameters: | |
weight: 1.0 # Maintain the weight of the second half | |