jonathanjordan21 commited on
Commit
f59f64a
1 Parent(s): a6b2a1f

add initialization of module weight for Linear Layers in x_proj

Browse files

add initialization of module weight for Linear Layers in x_proj in order to avoid NaN value after being processed in mamba library

Files changed (1) hide show
  1. modeling_mos_mamba.py +2 -0
modeling_mos_mamba.py CHANGED
@@ -693,6 +693,8 @@ class MoSMambaPreTrainedModel(PreTrainedModel):
693
  if module.bias is not None:
694
  if not getattr(module.bias, "_no_reinit", False):
695
  nn.init.zeros_(module.bias)
 
 
696
  elif isinstance(module, nn.Embedding):
697
  nn.init.normal_(module.weight, std=self.config.initializer_range)
698
 
 
693
  if module.bias is not None:
694
  if not getattr(module.bias, "_no_reinit", False):
695
  nn.init.zeros_(module.bias)
696
+ nn.init.uniform_(module.weight, -0.001, 0.001)
697
+
698
  elif isinstance(module, nn.Embedding):
699
  nn.init.normal_(module.weight, std=self.config.initializer_range)
700