LeroyDyer commited on
Commit
88ef2e1
1 Parent(s): b7636d1

Upload 4 files

Browse files
Files changed (2) hide show
  1. configuration_mistral.py +4 -5
  2. modeling_mistral.py +0 -0
configuration_mistral.py CHANGED
@@ -14,8 +14,8 @@
14
  # limitations under the License.
15
  """Mistral model configuration"""
16
 
17
- from transformers.configuration_utils import PretrainedConfig
18
- from transformers.utils import logging
19
 
20
 
21
  logger = logging.get_logger(__name__)
@@ -116,7 +116,7 @@ class MistralConfig(PretrainedConfig):
116
  rope_theta=10000.0,
117
  sliding_window=4096,
118
  attention_dropout=0.0,
119
- max_thoughts=16,thought_length = 1024,
120
  merged_talk_heads=True,
121
  merged_lm_and_talk_heads=False,
122
  merged_lm_and_think_heads=True,
@@ -148,7 +148,6 @@ class MistralConfig(PretrainedConfig):
148
  self.rope_theta = rope_theta
149
  self.attention_dropout = attention_dropout
150
  self.max_thoughts = max_thoughts
151
- self.thought_length = thought_length
152
  self.merged_talk_heads = merged_talk_heads
153
  self.merged_lm_and_talk_heads = merged_lm_and_talk_heads
154
  self.merged_lm_and_think_heads = merged_lm_and_think_heads
@@ -262,7 +261,7 @@ class MistralStarConfig(PretrainedConfig):
262
  sliding_window=4096,
263
  attention_dropout=0.0,
264
  max_thoughts=16,
265
- thought_length = 1024,
266
  merged_talk_heads=True,
267
  merged_lm_and_talk_heads=False,
268
  merged_lm_and_think_heads=True,
 
14
  # limitations under the License.
15
  """Mistral model configuration"""
16
 
17
+ from ...configuration_utils import PretrainedConfig
18
+ from ...utils import logging
19
 
20
 
21
  logger = logging.get_logger(__name__)
 
116
  rope_theta=10000.0,
117
  sliding_window=4096,
118
  attention_dropout=0.0,
119
+ max_thoughts=16,
120
  merged_talk_heads=True,
121
  merged_lm_and_talk_heads=False,
122
  merged_lm_and_think_heads=True,
 
148
  self.rope_theta = rope_theta
149
  self.attention_dropout = attention_dropout
150
  self.max_thoughts = max_thoughts
 
151
  self.merged_talk_heads = merged_talk_heads
152
  self.merged_lm_and_talk_heads = merged_lm_and_talk_heads
153
  self.merged_lm_and_think_heads = merged_lm_and_think_heads
 
261
  sliding_window=4096,
262
  attention_dropout=0.0,
263
  max_thoughts=16,
264
+ thought_length = 10,
265
  merged_talk_heads=True,
266
  merged_lm_and_talk_heads=False,
267
  merged_lm_and_think_heads=True,
modeling_mistral.py CHANGED
The diff for this file is too large to render. See raw diff