Spaces:

flax-community
/

dalle-mini

Running

boris commited on Mar 27, 2022

Commit

02824a7

•

1 Parent(s): d9a16f2

feat: remove unecessary LN

Files changed (1) hide show

src/dalle_mini/model/modeling.py CHANGED Viewed

@@ -736,9 +736,10 @@ class FlaxBartEncoderLayerCollection(nn.Module):
                 all_hidden_states += (hidden_states,)
             # final layernorm on the output of the last layer
             # or every 6 layers for Swin v2
             # ignored args for deepnet which always add a norm with scale
-            add_norm = (i == n_layers - 1) or (
-                (self.config.ln_positions == "swinv2") and ((i + 1) % 6 == 0)
             )
             # we don't need to scale the norm for the last layer
             use_scale = i != n_layers - 1

                 all_hidden_states += (hidden_states,)
             # final layernorm on the output of the last layer
             # or every 6 layers for Swin v2
+            # not needed for other models which use layernorm before x-attention
             # ignored args for deepnet which always add a norm with scale
+            add_norm = self.config.ln_positions == "swinv2" and (
+                (i == n_layers - 1) or ((i + 1) % 6 == 0)
             )
             # we don't need to scale the norm for the last layer
             use_scale = i != n_layers - 1