nluai commited on
Commit
e1ad63a
·
verified ·
1 Parent(s): c6965a5

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +43 -6
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "MPTForCausalLM"
4
  ],
@@ -11,17 +12,36 @@
11
  "attn_uses_sequence_id": false,
12
  "clip_qkv": null,
13
  "prefix_lm": false,
 
14
  "qk_ln": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "softmax_scale": null
16
  },
17
  "auto_map": {
18
- "AutoConfig": "configuration_mpt.MPTConfig",
19
- "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
20
  },
21
  "d_model": 3072,
22
  "emb_pdrop": 0.0,
23
  "embedding_fraction": 1.0,
24
  "expansion_ratio": 4,
 
 
 
 
 
25
  "init_config": {
26
  "emb_init_std": null,
27
  "emb_init_uniform_lim": null,
@@ -34,7 +54,7 @@
34
  "verbose": 0
35
  },
36
  "init_device": "cpu",
37
- "learned_pos_emb": true,
38
  "logit_scale": null,
39
  "max_seq_len": 8192,
40
  "model_type": "mpt",
@@ -42,10 +62,27 @@
42
  "n_layers": 32,
43
  "no_bias": false,
44
  "norm_type": "low_precision_layernorm",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "resid_pdrop": 0.0,
46
- "torch_dtype": "bfloat16",
47
- "transformers_version": "4.30.2",
48
  "use_cache": false,
 
49
  "verbose": 0,
50
  "vocab_size": 20480
51
- }
 
1
  {
2
+ "_name_or_path": "vinai/PhoGPT-4B-Chat",
3
  "architectures": [
4
  "MPTForCausalLM"
5
  ],
 
12
  "attn_uses_sequence_id": false,
13
  "clip_qkv": null,
14
  "prefix_lm": false,
15
+ "qk_gn": false,
16
  "qk_ln": false,
17
+ "rope": false,
18
+ "rope_dail_config": {
19
+ "pos_idx_in_fp32": true,
20
+ "type": "original",
21
+ "xpos_scale_base": 512
22
+ },
23
+ "rope_hf_config": {
24
+ "factor": 1.0,
25
+ "type": "no_scaling"
26
+ },
27
+ "rope_impl": "dail",
28
+ "rope_theta": 10000,
29
+ "sliding_window_size": -1,
30
  "softmax_scale": null
31
  },
32
  "auto_map": {
33
+ "AutoConfig": "vinai/PhoGPT-4B-Chat--configuration_mpt.MPTConfig",
34
+ "AutoModelForCausalLM": "vinai/PhoGPT-4B-Chat--modeling_mpt.MPTForCausalLM"
35
  },
36
  "d_model": 3072,
37
  "emb_pdrop": 0.0,
38
  "embedding_fraction": 1.0,
39
  "expansion_ratio": 4,
40
+ "fc_type": "torch",
41
+ "ffn_config": {
42
+ "fc_type": "torch",
43
+ "ffn_type": "mptmlp"
44
+ },
45
  "init_config": {
46
  "emb_init_std": null,
47
  "emb_init_uniform_lim": null,
 
54
  "verbose": 0
55
  },
56
  "init_device": "cpu",
57
+ "learned_pos_emb": false,
58
  "logit_scale": null,
59
  "max_seq_len": 8192,
60
  "model_type": "mpt",
 
62
  "n_layers": 32,
63
  "no_bias": false,
64
  "norm_type": "low_precision_layernorm",
65
+ "pretraining_tp": 1,
66
+ "quantization_config": {
67
+ "_load_in_4bit": true,
68
+ "_load_in_8bit": false,
69
+ "bnb_4bit_compute_dtype": "float16",
70
+ "bnb_4bit_quant_storage": "uint8",
71
+ "bnb_4bit_quant_type": "nf4",
72
+ "bnb_4bit_use_double_quant": true,
73
+ "llm_int8_enable_fp32_cpu_offload": false,
74
+ "llm_int8_has_fp16_weight": false,
75
+ "llm_int8_skip_modules": null,
76
+ "llm_int8_threshold": 6.0,
77
+ "load_in_4bit": true,
78
+ "load_in_8bit": false,
79
+ "quant_method": "bitsandbytes"
80
+ },
81
  "resid_pdrop": 0.0,
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.41.2",
84
  "use_cache": false,
85
+ "use_pad_tok_in_ffn": true,
86
  "verbose": 0,
87
  "vocab_size": 20480
88
+ }