Update modeling_mpt.py
Browse files- modeling_mpt.py +1 -1
modeling_mpt.py
CHANGED
@@ -231,7 +231,7 @@ class MPTForCausalLM(MPTPreTrainedModel):
|
|
231 |
def get_decoder(self):
|
232 |
return self.transformer
|
233 |
|
234 |
-
def forward(self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple[torch.FloatTensor]]]=None, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None, labels: Optional[torch.LongTensor]=None, return_dict: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, use_cache: Optional[bool]=None):
|
235 |
return_dict = return_dict if return_dict is not None else self.config.return_dict
|
236 |
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
237 |
outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache)
|
|
|
231 |
def get_decoder(self):
|
232 |
return self.transformer
|
233 |
|
234 |
+
def forward(self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple[torch.FloatTensor]]]=None, attention_mask: Optional[torch.ByteTensor]=None, prefix_mask: Optional[torch.ByteTensor]=None, sequence_id: Optional[torch.LongTensor]=None, labels: Optional[torch.LongTensor]=None, return_dict: Optional[bool]=None, output_attentions: Optional[bool]=None, output_hidden_states: Optional[bool]=None, use_cache: Optional[bool]=None, inputs_embeds: Optional[torch.FloatTensor] = None):
|
235 |
return_dict = return_dict if return_dict is not None else self.config.return_dict
|
236 |
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
237 |
outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, attention_mask=attention_mask, prefix_mask=prefix_mask, sequence_id=sequence_id, return_dict=return_dict, output_attentions=output_attentions, output_hidden_states=output_hidden_states, use_cache=use_cache)
|