JonusNattapong commited on
Commit
bf725a9
·
verified ·
1 Parent(s): 3de8a14

Upload configuration_openthaiwilai.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_openthaiwilai.py +15 -4
configuration_openthaiwilai.py CHANGED
@@ -1,5 +1,5 @@
1
- from transformers import PretrainedConfig
2
 
 
3
 
4
  class OpenThaiWilaiConfig(PretrainedConfig):
5
  model_type = "OpenThaiWilai"
@@ -7,7 +7,8 @@ class OpenThaiWilaiConfig(PretrainedConfig):
7
  attribute_map = {
8
  "num_experts": "num_experts",
9
  "top_k": "top_k",
10
- "num_hidden_layers": "num_layers"
 
11
  }
12
 
13
  def __init__(
@@ -16,10 +17,13 @@ class OpenThaiWilaiConfig(PretrainedConfig):
16
  hidden_size=768,
17
  num_layers=6,
18
  num_heads=8,
 
19
  num_experts=4,
20
  top_k=2,
21
- max_position_embeddings=512,
22
  intermediate_size=3072,
 
 
23
  eos_token_id=None,
24
  bos_token_id=None,
25
  pad_token_id=None,
@@ -41,10 +45,13 @@ class OpenThaiWilaiConfig(PretrainedConfig):
41
  self.hidden_size = hidden_size
42
  self.num_layers = num_layers
43
  self.num_heads = num_heads
 
44
  self.num_experts = num_experts
45
  self.top_k = top_k
46
  self.max_position_embeddings = max_position_embeddings
47
  self.intermediate_size = intermediate_size
 
 
48
 
49
  self.num_hidden_layers = num_layers
50
 
@@ -53,4 +60,8 @@ class OpenThaiWilaiConfig(PretrainedConfig):
53
  output["num_experts"] = self.num_experts
54
  output["top_k"] = self.top_k
55
  output["num_hidden_layers"] = self.num_layers
56
- return output
 
 
 
 
 
 
1
 
2
+ from transformers import PretrainedConfig
3
 
4
  class OpenThaiWilaiConfig(PretrainedConfig):
5
  model_type = "OpenThaiWilai"
 
7
  attribute_map = {
8
  "num_experts": "num_experts",
9
  "top_k": "top_k",
10
+ "num_hidden_layers": "num_layers",
11
+ "num_key_value_heads": "num_key_value_heads" # Added this line
12
  }
13
 
14
  def __init__(
 
17
  hidden_size=768,
18
  num_layers=6,
19
  num_heads=8,
20
+ num_key_value_heads=None, # Added this line
21
  num_experts=4,
22
  top_k=2,
23
+ max_position_embeddings=2048, # Updated max_position_embeddings to match modeling file
24
  intermediate_size=3072,
25
+ rope=True, # Added rope and use_flashattn to match modeling file
26
+ use_flashattn=True, # Added rope and use_flashattn to match modeling file
27
  eos_token_id=None,
28
  bos_token_id=None,
29
  pad_token_id=None,
 
45
  self.hidden_size = hidden_size
46
  self.num_layers = num_layers
47
  self.num_heads = num_heads
48
+ self.num_key_value_heads = num_key_value_heads or num_heads # Added this line
49
  self.num_experts = num_experts
50
  self.top_k = top_k
51
  self.max_position_embeddings = max_position_embeddings
52
  self.intermediate_size = intermediate_size
53
+ self.rope = rope # Added rope and use_flashattn
54
+ self.use_flashattn = use_flashattn # Added rope and use_flashattn
55
 
56
  self.num_hidden_layers = num_layers
57
 
 
60
  output["num_experts"] = self.num_experts
61
  output["top_k"] = self.top_k
62
  output["num_hidden_layers"] = self.num_layers
63
+ output["num_key_value_heads"] = self.num_key_value_heads # Added this line
64
+ output["max_position_embeddings"] = self.max_position_embeddings # Ensure this is included
65
+ output["rope"] = self.rope # Ensure this is included
66
+ output["use_flashattn"] = self.use_flashattn # Ensure this is included
67
+ return output