jonathanjordan21
/

mos-mamba-6x130m-hf

Text Generation

Model card Files Files and versions Community

jonathanjordan21 commited on Jul 9

Commit

c7ba421

•

1 Parent(s): e1dca0d

Update modeling_mos_mamba.py

Files changed (1) hide show

modeling_mos_mamba.py +17 -17

modeling_mos_mamba.py CHANGED Viewed

@@ -32,28 +32,28 @@ from .configuration_mos_mamba import MoSMambaConfig
 import torch.nn.functional as F
-if is_mamba_ssm_available():
-    from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn, selective_scan_fn
-    from mamba_ssm.ops.triton.selective_state_update import selective_state_update
-else:
-    selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
-if is_causal_conv1d_available():
-    from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
-else:
-    causal_conv1d_update, causal_conv1d_fn = None, None
-# try:
 #     from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn, selective_scan_fn
 #     from mamba_ssm.ops.triton.selective_state_update import selective_state_update
-# except:
 #     selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
-# try:
 #     from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
-# except:
 #     causal_conv1d_update, causal_conv1d_fn = None, None
 is_fast_path_available = all(
@@ -706,7 +706,7 @@ class MoSMambaPreTrainedModel(PreTrainedModel):
             if module.bias is not None:
                 if not getattr(module.bias, "_no_reinit", False):
                     nn.init.zeros_(module.bias)
-            nn.init.uniform_(module.weight, -0.001, 0.001)
         elif isinstance(module, nn.Embedding):
             nn.init.normal_(module.weight, std=self.config.initializer_range)

 import torch.nn.functional as F
+# if is_mamba_ssm_available():
 #     from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn, selective_scan_fn
 #     from mamba_ssm.ops.triton.selective_state_update import selective_state_update
+# else:
 #     selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
+# if is_causal_conv1d_available():
 #     from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+# else:
 #     causal_conv1d_update, causal_conv1d_fn = None, None
+try:
+    from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn, selective_scan_fn
+    from mamba_ssm.ops.triton.selective_state_update import selective_state_update
+except:
+    selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
+try:
+    from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+except:
+    causal_conv1d_update, causal_conv1d_fn = None, None
 is_fast_path_available = all(
             if module.bias is not None:
                 if not getattr(module.bias, "_no_reinit", False):
                     nn.init.zeros_(module.bias)
+            # nn.init.uniform_(module.weight, -0.001, 0.001)
         elif isinstance(module, nn.Embedding):
             nn.init.normal_(module.weight, std=self.config.initializer_range)