Commit
•
f128e65
1
Parent(s):
1332dbc
Update modeling_mos_mamba.py
Browse files- modeling_mos_mamba.py +2 -0
modeling_mos_mamba.py
CHANGED
@@ -691,6 +691,8 @@ class MoSMambaPreTrainedModel(PreTrainedModel):
|
|
691 |
elif self.config.time_step_init_scheme == "random":
|
692 |
nn.init.uniform_(module.dt_proj.weight, -dt_init_std, dt_init_std)
|
693 |
|
|
|
|
|
694 |
dt = torch.exp(
|
695 |
torch.rand(self.config.intermediate_size)
|
696 |
* (math.log(self.config.time_step_max) - math.log(self.config.time_step_min))
|
|
|
691 |
elif self.config.time_step_init_scheme == "random":
|
692 |
nn.init.uniform_(module.dt_proj.weight, -dt_init_std, dt_init_std)
|
693 |
|
694 |
+
nn.init.xavier_uniform_(module.gate.weight, gain=0.1)
|
695 |
+
|
696 |
dt = torch.exp(
|
697 |
torch.rand(self.config.intermediate_size)
|
698 |
* (math.log(self.config.time_step_max) - math.log(self.config.time_step_min))
|