orionweller
/

test-flex-gpt

Model card Files Files and versions

oweller2 commited on Nov 21, 2024

Commit

b44e834

·

1 Parent(s): 3574e72

fix

Files changed (1) hide show

modeling_flexbert.py +8 -1

modeling_flexbert.py CHANGED Viewed

@@ -1643,7 +1643,14 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         if self.unpad_embeddings and (indices is None and cu_seqlens is None and max_seqlen is None):
-            batch_size, seq_len = input_ids.shape[:2]
             if attention_mask is None:  # Create causal mask (lower triangular)
                 attention_mask = torch.tril(torch.ones(batch_size, seq_len, device=input_ids.device), diagonal=0)
             input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(

         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         if self.unpad_embeddings and (indices is None and cu_seqlens is None and max_seqlen is None):
+            if input_ids.dim() == 2:
+                batch_size, seq_len = input_ids.shape[:2]
+            elif input_ids.dim() >= 3:
+                batch_size, seq_len = input_ids.shape[:2]
+            else: # dim is 1
+                batch_size, seq_len = input_ids.shape[0], 1
+                input_ids = input_ids.unsqueeze(1)
             if attention_mask is None:  # Create causal mask (lower triangular)
                 attention_mask = torch.tril(torch.ones(batch_size, seq_len, device=input_ids.device), diagonal=0)
             input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(