granite-3b-code-base / config.json
mayank-mishra's picture
upload model
448e236
raw
history blame
No virus
1.07 kB
{
"activation_function": "swiglu",
"add_bias": true,
"apply_residual_connection_post_layernorm": false,
"architectures": [
"GraniteForCausalLM"
],
"attention_head_type": "mha",
"attention_multiplier": null,
"attention_softmax_in_fp32": true,
"attn_pdrop": 0.1,
"auto_map": {
"AutoConfig": "configuration_granite.GraniteConfig",
"AutoModel": "modeling_granite.GraniteModel",
"AutoModelForCausalLM": "modeling_granite.GraniteForCausalLM"
},
"bos_token_id": 0,
"embd_pdrop": 0.1,
"eos_token_id": 0,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "granite",
"n_embd": 2560,
"n_head": 32,
"n_inner": 10240,
"n_layer": 32,
"n_positions": 2048,
"normalization_function": "rmsnorm",
"num_key_value_heads": 32,
"pad_token_id": 0,
"position_embedding_type": "rope",
"resid_pdrop": 0.1,
"rope_theta": 10000,
"scale_attention_softmax_in_fp32": true,
"scale_attn_weights": true,
"torch_dtype": "float32",
"transformers_version": "4.38.1",
"use_cache": true,
"vocab_size": 49152
}