internlm
/

internlm2_5-7b-chat

Text Generation

Model card Files Files and versions Community

x54-729 commited on Aug 20, 2024

Commit

9b8d955

·

1 Parent(s): e2c47cf

update modeling file to newest

Files changed (2) hide show

configuration_internlm2.py +1 -1
modeling_internlm2.py +10 -2

configuration_internlm2.py CHANGED Viewed

@@ -177,4 +177,4 @@ class InternLM2Config(PretrainedConfig):
             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
-            )

             raise ValueError(
                 f"`rope_scaling`'s factor field must be a number >= 1, got {rope_scaling_factor} "
                 f"of type {type(rope_scaling_factor)}"
+            )

modeling_internlm2.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""PyTorch InternLM2.5 model."""
 import math
 import queue
 import threading
@@ -59,6 +59,10 @@ try:
 except:
     pass
 logger = logging.get_logger(__name__)
@@ -1093,7 +1097,11 @@ class InternLM2Model(InternLM2PreTrainedModel):
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
-                causal_mask = torch.triu(causal_mask, diagonal=1)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None:

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""PyTorch InternLM2 model."""
 import math
 import queue
 import threading
 except:
     pass
+try:
+    support_bf16_triu = torch.__version__ >= "2.1.0"
+except Exception:
+    support_bf16_triu = False
 logger = logging.get_logger(__name__)
         else:
             causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
             if sequence_length != 1:
+                if support_bf16_triu or dtype == torch.float32:
+                    causal_mask = torch.triu(causal_mask, diagonal=1)
+                else:
+                    triu_mask = torch.triu(torch.ones(causal_mask.size(), device=device), diagonal=1).bool()
+                    causal_mask.masked_fill_(~triu_mask, 0)
             causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
             causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
             if attention_mask is not None: