{ "E": 2560, "L": 32, "T": 2048, "_mup": true, "alt_pw_klass": { "type": "" }, "alt_rel_klass": { "fused": true, "type": "alibi" }, "alt_sa_klass": { "triton": true, "type": "flash", "use_rotary_emb": null }, "architectures": [ "CodifyForCausalLM" ], "attn_a_reach": 2048, "attn_b_reach": 2048, "attn_heads": 40, "attn_ra_nbasis": 64, "attn_seq": [ "d" ], "attn_sparse_layout_seq": null, "auto_map": { "AutoConfig": "configuration_codify.CodifyConfig", "AutoModel": "modeling_codify.CodifyForCausalLM" }, "backcheck_pw": "inside", "backcheck_sa": "none", "bos_token_id": 1, "dtype_acts": "torch.float16", "dtype_weights": "torch.float16", "enc_name": "openai_programming_v2", "eos_token_id": 2, "init_scale": 1, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "mlp_mult": 4, "model_type": "codify", "moe": null, "mup_optimal_lr": 0.0005, "mup_shapes_file": "lean_former/mup/alibi_32l/shapes.json", "posemb": false, "rescale_embeddings": false, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.24.0", "tune": [ 3, 3, 3, 3 ], "unembedding_shared": false, "use_cache": true, "use_res_scale": false, "vocab_size": 51305 }