jonathanjordan21 commited on
Commit
a6b2a1f
1 Parent(s): 9adf831

Update modeling_mos_mamba.py

Browse files
Files changed (1) hide show
  1. modeling_mos_mamba.py +2 -2
modeling_mos_mamba.py CHANGED
@@ -875,8 +875,8 @@ class MoSMambaForCausalLM(MoSMambaPreTrainedModel):
875
  super().__init__(config)
876
  self.backbone = MoSMambaModel(config)
877
  self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
878
- self.num_selectivities = 6
879
- self.num_selectivities_per_tok = 2
880
  self.router_aux_loss_coef = 0.02
881
  # Initialize weights and apply final processing
882
  self.post_init()
 
875
  super().__init__(config)
876
  self.backbone = MoSMambaModel(config)
877
  self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
878
+ self.num_selectivities = config.num_selectivities
879
+ self.num_selectivities_per_tok = config.num_selectivities_per_tok
880
  self.router_aux_loss_coef = 0.02
881
  # Initialize weights and apply final processing
882
  self.post_init()