jonathanjordan21 commited on
Commit
f128e65
1 Parent(s): 1332dbc

Update modeling_mos_mamba.py

Browse files
Files changed (1) hide show
  1. modeling_mos_mamba.py +2 -0
modeling_mos_mamba.py CHANGED
@@ -691,6 +691,8 @@ class MoSMambaPreTrainedModel(PreTrainedModel):
691
  elif self.config.time_step_init_scheme == "random":
692
  nn.init.uniform_(module.dt_proj.weight, -dt_init_std, dt_init_std)
693
 
 
 
694
  dt = torch.exp(
695
  torch.rand(self.config.intermediate_size)
696
  * (math.log(self.config.time_step_max) - math.log(self.config.time_step_min))
 
691
  elif self.config.time_step_init_scheme == "random":
692
  nn.init.uniform_(module.dt_proj.weight, -dt_init_std, dt_init_std)
693
 
694
+ nn.init.xavier_uniform_(module.gate.weight, gain=0.1)
695
+
696
  dt = torch.exp(
697
  torch.rand(self.config.intermediate_size)
698
  * (math.log(self.config.time_step_max) - math.log(self.config.time_step_min))