LeroyDyer
/

_Spydaz_Web_AI_MistralStar_4BIT

LeroyDyer commited on Jul 21

Commit

88ef2e1

•

1 Parent(s): b7636d1

Upload 4 files

Files changed (2) hide show

configuration_mistral.py CHANGED Viewed

@@ -14,8 +14,8 @@
 # limitations under the License.
 """Mistral model configuration"""
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
 logger = logging.get_logger(__name__)
@@ -116,7 +116,7 @@ class MistralConfig(PretrainedConfig):
         rope_theta=10000.0,
         sliding_window=4096,
         attention_dropout=0.0,
-        max_thoughts=16,thought_length = 1024,
         merged_talk_heads=True,
         merged_lm_and_talk_heads=False,
         merged_lm_and_think_heads=True,
@@ -148,7 +148,6 @@ class MistralConfig(PretrainedConfig):
         self.rope_theta = rope_theta
         self.attention_dropout = attention_dropout
         self.max_thoughts = max_thoughts
-        self.thought_length = thought_length
         self.merged_talk_heads = merged_talk_heads
         self.merged_lm_and_talk_heads = merged_lm_and_talk_heads
         self.merged_lm_and_think_heads = merged_lm_and_think_heads
@@ -262,7 +261,7 @@ class MistralStarConfig(PretrainedConfig):
         sliding_window=4096,
         attention_dropout=0.0,
         max_thoughts=16,
-        thought_length = 1024,
         merged_talk_heads=True,
         merged_lm_and_talk_heads=False,
         merged_lm_and_think_heads=True,

 # limitations under the License.
 """Mistral model configuration"""
+from ...configuration_utils import PretrainedConfig
+from ...utils import logging
 logger = logging.get_logger(__name__)
         rope_theta=10000.0,
         sliding_window=4096,
         attention_dropout=0.0,
+        max_thoughts=16,
         merged_talk_heads=True,
         merged_lm_and_talk_heads=False,
         merged_lm_and_think_heads=True,
         self.rope_theta = rope_theta
         self.attention_dropout = attention_dropout
         self.max_thoughts = max_thoughts
         self.merged_talk_heads = merged_talk_heads
         self.merged_lm_and_talk_heads = merged_lm_and_talk_heads
         self.merged_lm_and_think_heads = merged_lm_and_think_heads
         sliding_window=4096,
         attention_dropout=0.0,
         max_thoughts=16,
+        thought_length = 10,
         merged_talk_heads=True,
         merged_lm_and_talk_heads=False,
         merged_lm_and_think_heads=True,

modeling_mistral.py CHANGED Viewed

The diff for this file is too large to render. See raw diff