Crystalcareai commited on
Commit
f846650
1 Parent(s): 082cf66

Update modeling_gemmoe.py

Browse files
Files changed (1) hide show
  1. modeling_gemmoe.py +4 -4
modeling_gemmoe.py CHANGED
@@ -705,8 +705,10 @@ class GemmoeDecoderLayer(nn.Module):
705
  output_attentions: Optional[bool] = False,
706
  output_router_logits: Optional[bool] = False,
707
  use_cache: Optional[bool] = False,
 
708
  **kwargs,
709
  ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
 
710
  if "padding_mask" in kwargs:
711
  warnings.warn(
712
  "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
@@ -944,7 +946,6 @@ class GemmoeModel(GemmoePreTrainedModel):
944
  self.embed_tokens = value
945
 
946
  @add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
947
- # Ignore copy
948
  def forward(
949
  self,
950
  input_ids: torch.LongTensor = None,
@@ -1215,9 +1216,8 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
1215
  ```python
1216
  >>> from transformers import AutoTokenizer, GemmoeForCausalLM
1217
 
1218
- >>> model = GemmoeForCausalLM.from_pretrained("google/GEMMA-7b")
1219
  >>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
1220
-
1221
  >>> prompt = "What is your favorite condiment?"
1222
  >>> inputs = tokenizer(prompt, return_tensors="pt")
1223
 
@@ -1382,7 +1382,6 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
1382
  )
1383
  return reordered_past
1384
 
1385
-
1386
  @add_start_docstrings(
1387
  """
1388
  The Gemmoe Model transformer with a sequence classification head on top (linear layer).
@@ -1447,6 +1446,7 @@ class GemmoeForSequenceClassification(GemmoePreTrainedModel):
1447
  output_attentions=output_attentions,
1448
  output_hidden_states=output_hidden_states,
1449
  return_dict=return_dict,
 
1450
  )
1451
  hidden_states = transformer_outputs[0]
1452
  logits = self.score(hidden_states)
 
705
  output_attentions: Optional[bool] = False,
706
  output_router_logits: Optional[bool] = False,
707
  use_cache: Optional[bool] = False,
708
+ cache_position: Optional[torch.LongTensor] = None,
709
  **kwargs,
710
  ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
711
+ # ... (rest of the code remains the same)
712
  if "padding_mask" in kwargs:
713
  warnings.warn(
714
  "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
 
946
  self.embed_tokens = value
947
 
948
  @add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
 
949
  def forward(
950
  self,
951
  input_ids: torch.LongTensor = None,
 
1216
  ```python
1217
  >>> from transformers import AutoTokenizer, GemmoeForCausalLM
1218
 
1219
+ >>> model= GemmoeForCausalLM.from_pretrained("google/GEMMA-7b")
1220
  >>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
 
1221
  >>> prompt = "What is your favorite condiment?"
1222
  >>> inputs = tokenizer(prompt, return_tensors="pt")
1223
 
 
1382
  )
1383
  return reordered_past
1384
 
 
1385
  @add_start_docstrings(
1386
  """
1387
  The Gemmoe Model transformer with a sequence classification head on top (linear layer).
 
1446
  output_attentions=output_attentions,
1447
  output_hidden_states=output_hidden_states,
1448
  return_dict=return_dict,
1449
+ cache_position=None,
1450
  )
1451
  hidden_states = transformer_outputs[0]
1452
  logits = self.score(hidden_states)