merge_method: della_linear | |
base_model: CultriX/Qwen2.5-14B-Wernickev3 | |
dtype: bfloat16 | |
parameters: | |
epsilon: 0.03 # Refined for sharper parameter scaling. | |
lambda: 1.1 # Balances blending while emphasizing significant contributions. | |
normalize: true # Ensures stable parameter integration across models. | |
adaptive_merge_parameters: | |
task_weights: | |
tinyArc: 1.3 # Logical reasoning enhancement. | |
tinyHellaswag: 1.2 # Contextual understanding. | |
tinyMMLU: 1.1 # Domain knowledge retention. | |
tinyTruthfulQA: 1.4 # Prioritize truthful reasoning tasks. | |
tinyWinogrande: 1.2 # Contextual reasoning boost. | |
IFEval: 1.3 # Instruction-following and factual reasoning. | |
BBH: 1.3 # Complex reasoning support. | |
MATH: 1.4 # Mathematical problem-solving emphasis. | |
GPQA: 1.3 # Factual QA improvement. | |
MUSR: 1.2 # Multi-step reasoning enhancement. | |
MMLU-PRO: 1.2 # Multitask domain consistency. | |
smoothing_factor: 0.15 # Balances contributions for smoother integration. | |
gradient_clipping: 1.0 # Avoids over-contribution from any single model. | |
models: | |
- model: CultriX/Qwen2.5-14B-Wernickev3 | |
parameters: | |
weight: 0.5 # Backbone for multitasking and contextual benchmarks. | |
density: 0.7 # Retain critical parameters for task-specific optimization. | |
- model: djuna/Q2.5-Veltha-14B-0.5 | |
parameters: | |
weight: 0.3 # Complement multitask strengths for IFEval and BBH. | |
density: 0.8 # High density for consistent parameter integration. | |
- model: CultriX/SeQwence-14B-EvolMerge | |
parameters: | |
weight: 0.2 # Balanced contributor for MUSR and GPQA. | |
density: 0.6 # Moderate density to preserve diversity without overfitting. | |
tokenizer_source: CultriX/Qwen2.5-14B-Wernickev3 | |