Llama-3.1-MoE-8x8B-Instruct-raw / mergekit_moe_config.yml
PhilipMay's picture
First model commit
77a87c7
raw
history blame contribute delete
542 Bytes
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
gate_mode: random
dtype: bfloat16
experts:
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct
- source_model: meta-llama/Meta-Llama-3.1-8B-Instruct