fix rmsnorm init weight bug.

Using torch.ones to init rmsnorm weight. And torch.empty gets random weight tensor, which maybe out of float value limits.

Files changed (1) hide show

modeling_chatglm.py CHANGED Viewed

@@ -181,7 +181,7 @@ def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> torch.Ten
 class RMSNorm(torch.nn.Module):
     def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None, **kwargs):
         super().__init__()
-        self.weight = torch.nn.Parameter(torch.empty(normalized_shape, device=device, dtype=dtype))
         self.eps = eps
     def forward(self, hidden_states: torch.Tensor):

 class RMSNorm(torch.nn.Module):
     def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None, **kwargs):
         super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(normalized_shape, device=device, dtype=dtype))
         self.eps = eps
     def forward(self, hidden_states: torch.Tensor):