togethercomputer
/

evo-1-131k-base

Text Generation

deep signal processing

Model card Files Files and versions Community

maxall4 commited on 24 days ago

Commit

3b191f9

•

1 Parent(s): d726368

Update model.py

Files changed (1) hide show

model.py +3 -3

model.py CHANGED Viewed

@@ -355,7 +355,7 @@ class StripedHyena(nn.Module):
         self.gradient_checkpointing = False
         self._gradient_checkpointing_func = None
-    def forward(self, inputs_embeds, inference_params_dict=None, padding_mask=None):
         L = x.shape[1]
         x = self.embedding_layer.embed(x)
         if inference_params_dict is not None:
@@ -370,7 +370,7 @@ class StripedHyena(nn.Module):
         x = self.unembed.unembed(x)
         return x, inference_params_dict_out
-    def stateful_forward(self, inputs_embeds, inference_params_dict=None):
         for block_idx, block in enumerate(self.blocks):
             block_name = "mha" if block_idx in self.config.attn_layer_idxs else "hyena"
             inference_params = inference_params_dict[block_name]
@@ -378,7 +378,7 @@ class StripedHyena(nn.Module):
         return x, inference_params_dict
-    def stateless_forward(self, inputs_embeds, padding_mask=None):
         if type(padding_mask) == torch.Tensor:
             x = x * padding_mask[..., None]

         self.gradient_checkpointing = False
         self._gradient_checkpointing_func = None
+    def forward(self, x, inference_params_dict=None, padding_mask=None):
         L = x.shape[1]
         x = self.embedding_layer.embed(x)
         if inference_params_dict is not None:
         x = self.unembed.unembed(x)
         return x, inference_params_dict_out
+    def stateful_forward(self, x, inference_params_dict=None):
         for block_idx, block in enumerate(self.blocks):
             block_name = "mha" if block_idx in self.config.attn_layer_idxs else "hyena"
             inference_params = inference_params_dict[block_name]
         return x, inference_params_dict
+    def stateless_forward(self, x, padding_mask=None):
         if type(padding_mask) == torch.Tensor:
             x = x * padding_mask[..., None]