Xidong commited on
Commit
c5f5c96
1 Parent(s): 0e36737

Upload ./configuration_qwen.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_qwen.py +71 -0
configuration_qwen.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba Cloud.
2
+ #
3
+ # This source code is licensed under the license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ from transformers import PretrainedConfig
7
+
8
+
9
+ class QWenConfig(PretrainedConfig):
10
+ model_type = "qwen"
11
+ keys_to_ignore_at_inference = ["past_key_values"]
12
+
13
+ def __init__(
14
+ self,
15
+ vocab_size=151936,
16
+ hidden_size=4096,
17
+ num_hidden_layers=32,
18
+ num_attention_heads=32,
19
+ emb_dropout_prob=0.0,
20
+ attn_dropout_prob=0.0,
21
+ layer_norm_epsilon=1e-6,
22
+ initializer_range=0.02,
23
+ max_position_embeddings=8192,
24
+ scale_attn_weights=True,
25
+ use_cache=True,
26
+ bf16=False,
27
+ fp16=False,
28
+ fp32=False,
29
+ kv_channels=128,
30
+ rotary_pct=1.0,
31
+ rotary_emb_base=10000,
32
+ use_dynamic_ntk=True,
33
+ use_logn_attn=True,
34
+ use_flash_attn="auto",
35
+ intermediate_size=22016,
36
+ no_bias=True,
37
+ tie_word_embeddings=False,
38
+ use_cache_quantization=False,
39
+ use_cache_kernel=False,
40
+ softmax_in_fp32=False,
41
+ **kwargs,
42
+ ):
43
+ self.vocab_size = vocab_size
44
+ self.hidden_size = hidden_size
45
+ self.intermediate_size = intermediate_size
46
+ self.num_hidden_layers = num_hidden_layers
47
+ self.num_attention_heads = num_attention_heads
48
+ self.emb_dropout_prob = emb_dropout_prob
49
+ self.attn_dropout_prob = attn_dropout_prob
50
+ self.layer_norm_epsilon = layer_norm_epsilon
51
+ self.initializer_range = initializer_range
52
+ self.scale_attn_weights = scale_attn_weights
53
+ self.use_cache = use_cache
54
+ self.max_position_embeddings = max_position_embeddings
55
+ self.bf16 = bf16
56
+ self.fp16 = fp16
57
+ self.fp32 = fp32
58
+ self.kv_channels = kv_channels
59
+ self.rotary_pct = rotary_pct
60
+ self.rotary_emb_base = rotary_emb_base
61
+ self.use_dynamic_ntk = use_dynamic_ntk
62
+ self.use_logn_attn = use_logn_attn
63
+ self.use_flash_attn = use_flash_attn
64
+ self.no_bias = no_bias
65
+ self.use_cache_quantization = use_cache_quantization
66
+ self.use_cache_kernel = use_cache_kernel
67
+ self.softmax_in_fp32 = softmax_in_fp32
68
+ super().__init__(
69
+ tie_word_embeddings=tie_word_embeddings,
70
+ **kwargs
71
+ )