strangecosmo / mergekit_moe_config.yml
Ann Brown
experimental architecture
6c11ff6
base_model: "HuggingFaceTB/cosmo-1b"
experts:
- source_model: "HuggingFaceTB/cosmo-1b"
positive_prompts: []
- source_model: "HuggingFaceTB/cosmo-1b"
positive_prompts: []
- source_model: "HuggingFaceTB/cosmo-1b"
positive_prompts: []
- source_model: "HuggingFaceTB/cosmo-1b"
positive_prompts: []
gate_mode: "random"
dtype: "bfloat16"
experts_per_token: 2
moe_layers:
- 0
- 1
- 2
- 10
- 11
- 12
- 16
- 17
- 18
duplicate_layers:
- 13
- 14
- 15
- 19
- 20
- 21