internlm
/

internlm2_5-20b

Text Generation

Model card Files Files and versions Community

x54-729 commited on Aug 20, 2024

Commit

f2b8f8a

·

1 Parent(s): 5370161

update modeling file to newest

Files changed (1) hide show

modeling_internlm2.py +11 -3

modeling_internlm2.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""PyTorch InternLM2.5 model."""
 import math
 import queue
 import threading
@@ -59,6 +59,10 @@ try:
 except:
     pass
 logger = logging.get_logger(__name__)
@@ -1093,7 +1097,11 @@ class InternLM2Model(InternLM2PreTrainedModel):
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
-                causal_mask = torch.triu(causal_mask, diagonal=1)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None:
@@ -1797,4 +1805,4 @@ class InternLM2ForTokenClassification(InternLM2PreTrainedModel):
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
-        )

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""PyTorch InternLM2 model."""
 import math
 import queue
 import threading
 except:
     pass
+try:
+    support_bf16_triu = torch.__version__ >= "2.1.0"
+except Exception:
+    support_bf16_triu = False
 logger = logging.get_logger(__name__)
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
+                if support_bf16_triu or dtype == torch.float32:
+                    causal_mask = torch.triu(causal_mask, diagonal=1)
+                else:
+                    triu_mask = torch.triu(torch.ones(causal_mask.size(), device=device), diagonal=1).bool()
+                    causal_mask.masked_fill_(~triu_mask, 0)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None:
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+        )