jonathanjordan21
commited on
Commit
•
f59f64a
1
Parent(s):
a6b2a1f
add initialization of module weight for Linear Layers in x_proj
Browse filesadd initialization of module weight for Linear Layers in x_proj in order to avoid NaN value after being processed in mamba library
- modeling_mos_mamba.py +2 -0
modeling_mos_mamba.py
CHANGED
@@ -693,6 +693,8 @@ class MoSMambaPreTrainedModel(PreTrainedModel):
|
|
693 |
if module.bias is not None:
|
694 |
if not getattr(module.bias, "_no_reinit", False):
|
695 |
nn.init.zeros_(module.bias)
|
|
|
|
|
696 |
elif isinstance(module, nn.Embedding):
|
697 |
nn.init.normal_(module.weight, std=self.config.initializer_range)
|
698 |
|
|
|
693 |
if module.bias is not None:
|
694 |
if not getattr(module.bias, "_no_reinit", False):
|
695 |
nn.init.zeros_(module.bias)
|
696 |
+
nn.init.uniform_(module.weight, -0.001, 0.001)
|
697 |
+
|
698 |
elif isinstance(module, nn.Embedding):
|
699 |
nn.init.normal_(module.weight, std=self.config.initializer_range)
|
700 |
|