File size: 1,244 Bytes
3a5870b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

base_model: huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
architecture: qwen
gate_mode: hidden
dtype: bfloat16
experts:
  - source_model: huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
    positive_prompts: ["chat", "assistant", "chat history", "chat context", "writing", "text writing", "editing", "text editing", "multilingual"]
  - source_model: Qwen/Qwen2.5-Math-1.5B-Instruct
    positive_prompts: ["bio", "science", "biology", "natural sciences", "scientist", "math", "mathematician", "problem solving", "calculating", "logics"]
  - source_model: Qwen/Qwen2.5-Coder-3B-Instruct
    positive_prompts: ["code", "coding", "coder", "programming", "programmer", "code analysis", "code review", "code fix", "code improvement"]
  - source_model: RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4
    positive_prompts: ["russian chat", "russian chatting", "russian", "russian language", "russian text writing/editing"]
shared_experts:
  - source_model: huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
    positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
      - "chat assistant"
    # (optional, but recommended:)
    residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model