Qwen2.5-14B-Broca / mergekit_config.yml
CultriX's picture
Upload folder using huggingface_hub
51204ee verified
merge_method: della_linear
base_model: CultriX/Qwen2.5-14B-Wernickev3
dtype: bfloat16
parameters:
epsilon: 0.03 # Refined for sharper parameter scaling.
lambda: 1.1 # Balances blending while emphasizing significant contributions.
normalize: true # Ensures stable parameter integration across models.
adaptive_merge_parameters:
task_weights:
tinyArc: 1.3 # Logical reasoning enhancement.
tinyHellaswag: 1.2 # Contextual understanding.
tinyMMLU: 1.1 # Domain knowledge retention.
tinyTruthfulQA: 1.4 # Prioritize truthful reasoning tasks.
tinyWinogrande: 1.2 # Contextual reasoning boost.
IFEval: 1.3 # Instruction-following and factual reasoning.
BBH: 1.3 # Complex reasoning support.
MATH: 1.4 # Mathematical problem-solving emphasis.
GPQA: 1.3 # Factual QA improvement.
MUSR: 1.2 # Multi-step reasoning enhancement.
MMLU-PRO: 1.2 # Multitask domain consistency.
smoothing_factor: 0.15 # Balances contributions for smoother integration.
gradient_clipping: 1.0 # Avoids over-contribution from any single model.
models:
- model: CultriX/Qwen2.5-14B-Wernickev3
parameters:
weight: 0.5 # Backbone for multitasking and contextual benchmarks.
density: 0.7 # Retain critical parameters for task-specific optimization.
- model: djuna/Q2.5-Veltha-14B-0.5
parameters:
weight: 0.3 # Complement multitask strengths for IFEval and BBH.
density: 0.8 # High density for consistent parameter integration.
- model: CultriX/SeQwence-14B-EvolMerge
parameters:
weight: 0.2 # Balanced contributor for MUSR and GPQA.
density: 0.6 # Moderate density to preserve diversity without overfitting.
tokenizer_source: CultriX/Qwen2.5-14B-Wernickev3