Raghavan
/

indictrans2-en-indic-dist-200M

text2text-generation

Model card Files Files and versions Community

Raghavan commited on Jan 20, 2024

Commit

8b9ed51

·

verified ·

1 Parent(s): 182ddfb

Upload 7 files

Files changed (1) hide show

modeling_indictrans.py +14 -0

modeling_indictrans.py CHANGED Viewed

@@ -40,6 +40,7 @@ logger = logging.get_logger(__name__)
 _CONFIG_FOR_DOC = "IndicTransConfig"
 INDICTRANS_PRETRAINED_MODEL_ARCHIVE_LIST = [""]
 # Copied from transformers.models.bart.modeling_bart.shift_tokens_right
@@ -59,6 +60,16 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start
     return shifted_input_ids
 # Copied from transformers.models.bart.modeling_bart._make_causal_mask
 def _make_causal_mask(
         input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
@@ -1206,6 +1217,9 @@ class IndicTransForConditionalGeneration(IndicTransPreTrainedModel):
         #             labels, self.config.pad_token_id, self.config.decoder_start_token_id
         #         )
         outputs = self.model(
             input_ids,
             attention_mask=attention_mask,

 _CONFIG_FOR_DOC = "IndicTransConfig"
 INDICTRANS_PRETRAINED_MODEL_ARCHIVE_LIST = [""]
+eos_token_id = 2
 # Copied from transformers.models.bart.modeling_bart.shift_tokens_right
     return shifted_input_ids
+def prepare_decoder_input_ids_label(decoder_input_ids, decoder_attention_mask):
+    mask = (decoder_input_ids == eos_token_id)
+    decoder_input_ids[mask] = 1
+    decoder_attention_mask[mask] = 0
+    labels = decoder_input_ids[:, 1:]
+    return decoder_input_ids, decoder_attention_mask, labels
 # Copied from transformers.models.bart.modeling_bart._make_causal_mask
 def _make_causal_mask(
         input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
         #             labels, self.config.pad_token_id, self.config.decoder_start_token_id
         #         )
+        decoder_input_ids, decoder_attention_mask, labels = prepare_decoder_input_ids_label(decoder_input_ids,
+                                                                                            decoder_attention_mask)
         outputs = self.model(
             input_ids,
             attention_mask=attention_mask,