nilabhra commited on
Commit
8814452
1 Parent(s): 077d3ae

fix: MLP layer names.

Browse files
Files changed (1) hide show
  1. modeling_falcon.py +4 -4
modeling_falcon.py CHANGED
@@ -763,18 +763,18 @@ class FalconMLP(nn.Module):
763
  super().__init__()
764
  hidden_size = config.hidden_size
765
 
766
- self.upscale = FalconLinear(
767
  hidden_size, config.ff_factor * hidden_size, bias=config.bias
768
  )
769
  self.act = nn.GELU()
770
- self.downscale = FalconLinear(
771
  config.ff_factor * hidden_size, hidden_size, bias=config.bias
772
  )
773
  self.hidden_dropout = config.hidden_dropout
774
 
775
  def forward(self, x: torch.Tensor) -> torch.Tensor:
776
- x = self.act(self.upscale(x))
777
- x = self.downscale(x)
778
  return x
779
 
780
  FALCON_ATTENTION_CLASSES = {
 
763
  super().__init__()
764
  hidden_size = config.hidden_size
765
 
766
+ self.dense_h_to_4h = FalconLinear(
767
  hidden_size, config.ff_factor * hidden_size, bias=config.bias
768
  )
769
  self.act = nn.GELU()
770
+ self.dense_4h_to_h = FalconLinear(
771
  config.ff_factor * hidden_size, hidden_size, bias=config.bias
772
  )
773
  self.hidden_dropout = config.hidden_dropout
774
 
775
  def forward(self, x: torch.Tensor) -> torch.Tensor:
776
+ x = self.act(self.dense_h_to_4h(x))
777
+ x = self.dense_4h_to_h(x)
778
  return x
779
 
780
  FALCON_ATTENTION_CLASSES = {