merge_method: task_arithmetic base_model: meta-llama/Llama-3.3-70B-Instruct models: - model: meta-llama/Llama-3.1-70B parameters: weight: -1.0 - model: tokyotech-llm/Llama-3.1-Swallow-70B-v0.1 parameters: weight: 1.0 dtype: bfloat16 name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-llama33 --- merge_method: task_arithmetic base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-llama33 models: - model: meta-llama/Llama-3.1-70B-Instruct parameters: weight: -1.0 - model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF parameters: weight: 1.0 dtype: bfloat16 name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 --- merge_method: task_arithmetic base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 models: - model: meta-llama/Llama-3.1-70B parameters: weight: -0.8 - model: allenai/Llama-3.1-Tulu-3-70B parameters: weight: 0.8 dtype: bfloat16 name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-tulu --- merge_method: task_arithmetic base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 models: - model: tokyotech-llm/Llama-3.1-Swallow-70B-v0.1 parameters: weight: -0.8 - model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 parameters: weight: 0.8 dtype: bfloat16 name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-swallow --- merge_method: model_stock base_model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-nemollama33 models: - model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-tulu - model: Llama-3.3-SuperSwallow-70B-Instruct-v0.1-preset-swallow dtype: bfloat16 name: Llama-3.3-SuperSwallow-70B-Instruct-v0.1