codify_3b_multi / config.json
smallcloudteam's picture
Upload CodifyForCausalLM
ec51200
{
"E": 2560,
"L": 32,
"T": 2048,
"_mup": true,
"alt_pw_klass": {
"type": ""
},
"alt_rel_klass": {
"fused": true,
"type": "alibi"
},
"alt_sa_klass": {
"triton": true,
"type": "flash",
"use_rotary_emb": null
},
"architectures": [
"CodifyForCausalLM"
],
"attn_a_reach": 2048,
"attn_b_reach": 2048,
"attn_heads": 40,
"attn_ra_nbasis": 64,
"attn_seq": [
"d"
],
"attn_sparse_layout_seq": null,
"auto_map": {
"AutoConfig": "configuration_codify.CodifyConfig",
"AutoModel": "modeling_codify.CodifyForCausalLM"
},
"backcheck_pw": "inside",
"backcheck_sa": "none",
"bos_token_id": 1,
"dtype_acts": "torch.float16",
"dtype_weights": "torch.float16",
"enc_name": "openai_programming_v2",
"eos_token_id": 2,
"init_scale": 1,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"mlp_mult": 4,
"model_type": "codify",
"moe": null,
"mup_optimal_lr": 0.0005,
"mup_shapes_file": "lean_former/mup/alibi_32l/shapes.json",
"posemb": false,
"rescale_embeddings": false,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.24.0",
"tune": [
3,
3,
3,
3
],
"unembedding_shared": false,
"use_cache": true,
"use_res_scale": false,
"vocab_size": 51305
}