{ "architectures": [ "LMDeployForCausalLM" ], "auto_map": { "AutoConfig": "configuration_lmdeploy.LMDeployConfig", "AutoModel": "modeling_lmdeploy.LMDeployForCausalLM", "AutoModelForCausalLM": "modeling_lmdeploy.LMDeployForCausalLM" }, "turbomind": { "model_name": "internlm-chat-20b", "tensor_para_size": 1, "head_num": 40, "kv_head_num": 40, "vocab_size": 103168, "num_layer": 60, "inter_size": 13824, "norm_eps": 1e-06, "attn_bias": 0, "start_id": 1, "end_id": 2, "session_len": 8200, "weight_type": "int4", "rotary_embedding": 128, "rope_theta": 10000.0, "size_per_head": 128, "group_size": 128, "max_batch_size": 32, "max_context_token_num": 4, "step_length": 1, "cache_max_entry_count": 48, "cache_chunk_size": 1, "use_context_fmha": 1, "quant_policy": 0, "max_position_embeddings": 2048, "use_dynamic_ntk": 0, "use_logn_attn": 0 }, "lmdeploy_version": "0.0.14" }