slices: - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v layer_range: [0, 8] - model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V layer_range: [0, 8] parameters: t: - value: 0.72 - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v layer_range: [8, 16] - model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V layer_range: [8, 16] parameters: t: - value: [0.75, 0.85, 0.75] - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v layer_range: [16, 24] - model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V layer_range: [16, 24] parameters: t: - value: [0.85, 1.0, 0.85] - filter: feed_forward value: [0.9, 1.0, 1.1] - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v layer_range: [24, 32] - model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V layer_range: [24, 32] parameters: t: - value: [0.95, 1.0, 0.95] - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v layer_range: [32, 40] - model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V layer_range: [32, 40] parameters: t: - value: 1.0 - filter: self_attn value: [0.92, 1.0, 1.08] merge_method: slerp base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v regularization: - method: weight_clipping clip_range: [-0.04, 0.04] postprocessing: - operation: gaussian_smoothing sigma: 0.9 - operation: normalize - operation: quantize target_dtype: int8 dtype: bfloat16