Alibaba-NLP
/

new-impl

Model card Files Files and versions Community

izhx commited on May 27

Commit

d0284a3

•

1 Parent(s): b7ea01b

Update modeling.py

Files changed (1) hide show

modeling.py +8 -5

modeling.py CHANGED Viewed

@@ -975,8 +975,6 @@ class NewForMaskedLM(NewPreTrainedModel):
         self.lm_head = NewLMPredictionHead(config)
         self.loss_fct = nn.CrossEntropyLoss()
-        self.pretraining = True
         # Initialize weights and apply final processing
         self.post_init()
@@ -1009,13 +1007,13 @@ class NewForMaskedLM(NewPreTrainedModel):
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-        if labels is None:
             length = None
             subset_indices = None
         else:
             length = attention_mask.sum(-1).tolist()
             labels = labels[attention_mask.bool()].unsqueeze(0)
-            subset_indices = labels > -100 if self.pretraining else None
         outputs = self.new(
             input_ids,
@@ -1037,7 +1035,12 @@ class NewForMaskedLM(NewPreTrainedModel):
         masked_lm_loss = None
         if labels is not None:
-            labels = labels[subset_indices]
             masked_lm_loss = self.loss_fct(prediction_scores, labels)
         if not return_dict:

         self.lm_head = NewLMPredictionHead(config)
         self.loss_fct = nn.CrossEntropyLoss()
         # Initialize weights and apply final processing
         self.post_init()
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if labels is None or not self.new.config.unpad_inputs:
             length = None
             subset_indices = None
         else:
             length = attention_mask.sum(-1).tolist()
             labels = labels[attention_mask.bool()].unsqueeze(0)
+            subset_indices = labels > -100
         outputs = self.new(
             input_ids,
         masked_lm_loss = None
         if labels is not None:
+            if subset_indices is None:
+                mask = attention_mask.bool()
+                prediction_scores = prediction_scores[mask]
+                labels = labels[mask]
+            else:
+                labels = labels[subset_indices]
             masked_lm_loss = self.loss_fct(prediction_scores, labels)
         if not return_dict: