tri_oned / config.json
Gusanidas's picture
Push model using huggingface_hub.
87f39ac verified
raw
history blame contribute delete
No virus
553 Bytes
{
"attn_eq": true,
"attn_type": "Trittention",
"autopad": true,
"causal_attn": true,
"d_head": 64,
"d_mlp": 2048,
"d_model": 512,
"d_vocab": 50304,
"debug": true,
"dropout": 0.1,
"dt_head": 64,
"has_mlp": true,
"init_range": 0.01,
"is_gated": false,
"layer_norm_eps": 1e-05,
"look_backward": 1,
"mlp_type": "all",
"n_ctx": 65,
"n_heads": 8,
"n_layers": 1,
"nt_heads": 2,
"order_attn": false,
"pad_value": 0,
"share_input_output_embed": true,
"use_rotary": false,
"window_size": 16,
"with_ln": true
}