slices: | |
- sources: | |
- model: TheHierophant/Fimbulvetr-11B-Attention-V0.1-test | |
layer_range: [0, 16] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 1.25 | |
- filter: down_proj | |
value: 1.25 | |
attention_heads: 32 | |
long_term_attention: true | |
- sources: | |
- model: TheHierophant/Underground-Mind-V0.9 | |
layer_range: [16, 32] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 1.5 | |
- filter: down_proj | |
value: 1.5 | |
significance: 0.8 | |
semantic_linking: true | |
- sources: | |
- model: TheHierophant/Underground-Mind-V0.3-test-finetuning | |
layer_range: [32, 40] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 1.75 | |
- filter: down_proj | |
value: 1.75 | |
task_specialization: true | |
enhanced_attention: true | |
- sources: | |
- model: TheHierophant/Underground-Cognitive-V0.3-test | |
layer_range: [40, 47] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 2.0 | |
- filter: down_proj | |
value: 2.0 | |
attention_heads: 18 | |
abstract_attention: true | |
deep_cognitive_focus: true | |
merge_method: passthrough | |
base_model: TheHierophant/Fimbulvetr-11B-Attention-V0.1-test | |
dtype: bfloat16 |