dp-big-comp-o / config.json
Korventenn's picture
Upload model
669a48b
raw
history blame contribute delete
No virus
2.23 kB
{
"_name_or_path": "./compacter-final",
"adapters": {
"adapters": {
"orca": "e6dace882b5a44db"
},
"config_map": {
"e6dace882b5a44db": {
"adapter_residual_before_ln": false,
"cross_adapter": false,
"factorized_phm_W": true,
"factorized_phm_rule": false,
"hypercomplex_nonlinearity": "glorot-uniform",
"init_weights": "bert",
"inv_adapter": null,
"inv_adapter_reduction_factor": null,
"is_parallel": false,
"learn_phm": true,
"leave_out": [],
"ln_after": false,
"ln_before": false,
"mh_adapter": true,
"non_linearity": "gelu",
"original_ln_after": true,
"original_ln_before": false,
"output_adapter": true,
"phm_bias": true,
"phm_c_init": "normal",
"phm_dim": 4,
"phm_init_range": 0.0001,
"phm_layer": true,
"phm_rank": 256,
"reduction_factor": 8,
"residual_before_ln": true,
"scaling": 1.0,
"shared_W_phm": false,
"shared_phm_rule": true,
"use_gating": false
}
},
"fusion_config_map": {},
"fusions": {}
},
"architectures": [
"LlamaAdapterModel"
],
"attention_bias": false,
"bos_token_id": 32013,
"custom_heads": {},
"eos_token_id": 32021,
"hidden_act": "silu",
"hidden_size": 2048,
"id2label": null,
"initializer_range": 0.02,
"intermediate_size": 5504,
"label2id": null,
"max_position_embeddings": 16384,
"model_type": "llama",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"num_key_value_heads": 16,
"prediction_heads": {
"default": {
"activation_function": null,
"bias": false,
"dropout_prob": null,
"embedding_size": 2048,
"head_type": "causal_lm",
"label2id": null,
"layer_norm": false,
"layers": 1,
"shift_labels": true,
"vocab_size": 32256
}
},
"pretraining_tp": 1,
"rms_norm_eps": 1e-06,
"rope_scaling": {
"factor": 4.0,
"type": "linear"
},
"rope_theta": 100000,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.35.2",
"use_cache": true,
"vocab_size": 32256
}