erfanzar
/

FlaxFalcon

text-generation-inference

Model card Files Files and versions Community

erfanzar commited on Jun 18, 2023

Commit

5e73374

•

1 Parent(s): b230d07

Update model.py

Files changed (1) hide show

model.py +2 -3

model.py CHANGED Viewed

@@ -156,7 +156,7 @@ def built_bloom_alibi(attention_mask, num_attention_heads):
         slops = jnp.concatenate([slops, jnp.power(extra_base, extra_power)], axis=0)
     arange_tensor = (((jnp.cumsum(attention_mask, axis=-1)) - 1) * attention_mask)[:, jnp.newaxis, :]
     alibi = slops[..., jnp.newaxis].astype(jnp.bfloat16) * arange_tensor
-    return alibi.reshape(b , num_attention_heads, 1, s)
 def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0,
@@ -252,7 +252,7 @@ class FlaxFalconAttention(nn.Module):
         attn = with_sharding_constraint(attn, PartitionSpec(("dp", "fsdp"), "mp", None, None))
         if alibi is not None:
-            attn += attn
         attn = attn * self.factor_scale
         if attention_mask is not None:
@@ -365,7 +365,6 @@ class FlaxFalconCollection(nn.Module):
                  ):
         for b in self.blocks:
             hidden_states = b(
                 attention_mask=attention_mask,
                 hidden_states=hidden_states,
                 alibi=alibi

         slops = jnp.concatenate([slops, jnp.power(extra_base, extra_power)], axis=0)
     arange_tensor = (((jnp.cumsum(attention_mask, axis=-1)) - 1) * attention_mask)[:, jnp.newaxis, :]
     alibi = slops[..., jnp.newaxis].astype(jnp.bfloat16) * arange_tensor
+    return alibi.reshape(b, num_attention_heads, 1, s)
 def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0,
         attn = with_sharding_constraint(attn, PartitionSpec(("dp", "fsdp"), "mp", None, None))
         if alibi is not None:
+            attn += alibi
         attn = attn * self.factor_scale
         if attention_mask is not None:
                  ):
         for b in self.blocks:
             hidden_states = b(
                 attention_mask=attention_mask,
                 hidden_states=hidden_states,
                 alibi=alibi