|
base_model: Qwen/Qwen2.5-Coder-7B
|
|
gate_mode: random
|
|
dtype: bfloat16
|
|
architecture: qwen
|
|
experts:
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen
|
|
shared_experts:
|
|
- source_model: Qwen/Qwen2.5-Coder-7B
|
|
architecture: qwen |