nluai commited on
Commit
bb7fe63
1 Parent(s): 5feaabd

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +86 -29
config.json CHANGED
@@ -1,31 +1,88 @@
1
  {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "vinai/PhoGPT-4B-Chat",
5
- "bias": "none",
6
- "fan_in_fan_out": false,
7
- "inference_mode": true,
8
- "init_lora_weights": true,
9
- "layer_replication": null,
10
- "layers_pattern": null,
11
- "layers_to_transform": null,
12
- "loftq_config": {},
13
- "lora_alpha": 32,
14
- "lora_dropout": 0.1,
15
- "megatron_config": null,
16
- "megatron_core": "megatron.core",
17
- "modules_to_save": null,
18
- "peft_type": "LORA",
19
- "r": 32,
20
- "rank_pattern": {},
21
- "revision": null,
22
- "target_modules": [
23
- "ffn.down_proj",
24
- "attn.out_proj",
25
- "ffn.up_proj",
26
- "attn.Wqkv"
27
  ],
28
- "task_type": "CAUSAL_LM",
29
- "use_dora": false,
30
- "use_rslora": false
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "_name_or_path": "vinai/PhoGPT-4B-Chat",
3
+ "architectures": [
4
+ "MPTForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
+ "attn_config": {
7
+ "alibi": true,
8
+ "alibi_bias_max": 8,
9
+ "attn_impl": "torch",
10
+ "attn_pdrop": 0.0,
11
+ "attn_type": "multihead_attention",
12
+ "attn_uses_sequence_id": false,
13
+ "clip_qkv": null,
14
+ "prefix_lm": false,
15
+ "qk_gn": false,
16
+ "qk_ln": false,
17
+ "rope": false,
18
+ "rope_dail_config": {
19
+ "pos_idx_in_fp32": true,
20
+ "type": "original",
21
+ "xpos_scale_base": 512
22
+ },
23
+ "rope_hf_config": {
24
+ "factor": 1.0,
25
+ "type": "no_scaling"
26
+ },
27
+ "rope_impl": "dail",
28
+ "rope_theta": 10000,
29
+ "sliding_window_size": -1,
30
+ "softmax_scale": null
31
+ },
32
+ "auto_map": {
33
+ "AutoConfig": "vinai/PhoGPT-4B-Chat--configuration_mpt.MPTConfig",
34
+ "AutoModelForCausalLM": "vinai/PhoGPT-4B-Chat--modeling_mpt.MPTForCausalLM"
35
+ },
36
+ "d_model": 3072,
37
+ "emb_pdrop": 0.0,
38
+ "embedding_fraction": 1.0,
39
+ "expansion_ratio": 4,
40
+ "fc_type": "torch",
41
+ "ffn_config": {
42
+ "fc_type": "torch",
43
+ "ffn_type": "mptmlp"
44
+ },
45
+ "init_config": {
46
+ "emb_init_std": null,
47
+ "emb_init_uniform_lim": null,
48
+ "fan_mode": "fan_in",
49
+ "init_div_is_residual": true,
50
+ "init_gain": 0.0,
51
+ "init_nonlinearity": "relu",
52
+ "init_std": null,
53
+ "name": "kaiming_normal_",
54
+ "verbose": 0
55
+ },
56
+ "init_device": "cpu",
57
+ "learned_pos_emb": false,
58
+ "logit_scale": null,
59
+ "max_seq_len": 8192,
60
+ "model_type": "mpt",
61
+ "n_heads": 24,
62
+ "n_layers": 32,
63
+ "no_bias": false,
64
+ "norm_type": "low_precision_layernorm",
65
+ "pretraining_tp": 1,
66
+ "quantization_config": {
67
+ "_load_in_4bit": true,
68
+ "_load_in_8bit": false,
69
+ "bnb_4bit_compute_dtype": "float16",
70
+ "bnb_4bit_quant_storage": "uint8",
71
+ "bnb_4bit_quant_type": "nf4",
72
+ "bnb_4bit_use_double_quant": true,
73
+ "llm_int8_enable_fp32_cpu_offload": false,
74
+ "llm_int8_has_fp16_weight": false,
75
+ "llm_int8_skip_modules": null,
76
+ "llm_int8_threshold": 6.0,
77
+ "load_in_4bit": true,
78
+ "load_in_8bit": false,
79
+ "quant_method": "bitsandbytes"
80
+ },
81
+ "resid_pdrop": 0.0,
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.41.2",
84
+ "use_cache": false,
85
+ "use_pad_tok_in_ffn": true,
86
+ "verbose": 0,
87
+ "vocab_size": 20480
88
+ }