{ | |
"architectures": [ | |
"OpenLMForCausalLM" | |
], | |
"model_type": "openlm", | |
"dim": 4096, | |
"intermediate_dim_ffn": 14336, | |
"n_layers": 32, | |
"n_heads": 32, | |
"n_heads_kv": 8, | |
"vocab_size": 32000, | |
"norm_eps": 1e-5, | |
"seq_len": 2048, | |
"weight_tying": false, | |
"apply_qk_norm": false, | |
"qk_head_dim": 128, | |
"v_head_dim": 128, | |
"norm_type": "rms_norm", | |
"attn_name": "linear_attn", | |
"positional_embedding_type": "rotary", | |
"ffn_type": "swiglu", | |
"use_decay": true, | |
"use_retnet_slopes": false, | |
"decay_start": null | |
} | |