modeling_flm.py: copyright modification and minor code modification

Browse files

Files changed (2) hide show

configuration_flm.py +14 -6
modeling_flm.py +22 -6

configuration_flm.py CHANGED Viewed

@@ -1,6 +1,18 @@
 # coding=utf-8
-# Copyright
 """ Cofe-AI FLM configuration"""
 from transformers.configuration_utils import PretrainedConfig
@@ -11,10 +23,6 @@ from transformers.utils import logging
 logger = logging.get_logger(__name__)
 FLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    # "freelm": "xxxx/config.json",
-    # "freelm-medium": "xxxx/config.json",
-    # "freelm-large": "xxxx/config.json",
-    # "freelm-xl": "xxxx/config.json",
 }

 # coding=utf-8
+# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """ Cofe-AI FLM configuration"""
 from transformers.configuration_utils import PretrainedConfig
 logger = logging.get_logger(__name__)
 FLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 }

modeling_flm.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # coding=utf-8
-# Copyright 2023 EleutherAI and the HuggingFace Inc. team. All rights reserved.
 #
 # This code is based on OpenAI's GPT-2 library. It has been modified from its
-# original forms to accommodate minor architectural differences compared to GPT-2.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,12 +16,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import Optional, Tuple, Union
 import math
 import torch
-import torch.nn.functional as f
-from einops import rearrange, repeat
 from torch import einsum, nn
 from torch.cuda.amp import autocast
 from transformers.activations import ACT2FN
@@ -31,8 +33,9 @@ from transformers.modeling_outputs import (
 )
 from transformers.modeling_utils import PreTrainedModel
 from transformers.pytorch_utils import find_pruneable_heads_and_indices, prune_conv1d_layer
-from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
 from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
 from .configuration_flm import FLMConfig
@@ -102,6 +105,18 @@ class RotaryEmbedding(nn.Module):
         rotated_k = apply_rotary_emb(freqs_k, k, scale=scale_k ** -1)
         return rotated_q, rotated_k
     def get_scale(self, t, cache_key=None, offset=0, ):
         assert self.use_xpos, 'This function is only useful for xpos.'
         if exists(cache_key) and cache_key in self.cache_scale:
@@ -372,6 +387,7 @@ class FLMAttention(nn.Module):
         batch_size, head_num, k_seq_len, head_features = key.shape
         _, _, q_seq_len, _ = query.shape
         if rotary_embedding is not None:
             query = query.contiguous().view(batch_size * head_num, q_seq_len, head_features)
             key = key.contiguous().view(batch_size * head_num, k_seq_len, head_features)
@@ -381,7 +397,7 @@ class FLMAttention(nn.Module):
                 # query: [batch_size * head_num, seqlen, hn]
                 query, key = rotary_embedding.rotate_queries_and_keys(query, key)
             else:
-                query = rotary_embedding.rotate_queries_or_keys(query)
                 key = rotary_embedding.rotate_queries_or_keys(key)
             # batch_size * head_num, k_seq_len(q_seq_len), head_features
             query = query.view(batch_size, head_num, q_seq_len, head_features)

 # coding=utf-8
+# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 #
 # This code is based on OpenAI's GPT-2 library. It has been modified from its
+# original forms to accommodate architectural differences compared to GPT-2.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""PyTorch FLM model."""
 from typing import Optional, Tuple, Union
 import math
 import torch
+from einops import rearrange
 from torch import einsum, nn
 from torch.cuda.amp import autocast
 from transformers.activations import ACT2FN
 )
 from transformers.modeling_utils import PreTrainedModel
 from transformers.pytorch_utils import find_pruneable_heads_and_indices, prune_conv1d_layer
+from transformers.utils import logging
 from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
 from .configuration_flm import FLMConfig
         rotated_k = apply_rotary_emb(freqs_k, k, scale=scale_k ** -1)
         return rotated_q, rotated_k
+    def rotate_queries_or_keys(self, t, seq_dim=-2, offset=0):
+        """
+        use this only when xpos is NOT activated.
+        """
+        # t's shape e.g.  -> (batchsize, headnum, seqlen, dimofhead)
+        assert not self.use_xpos, 'you must use `.rotate_queries_and_keys` method instead and pass in both queries and keys, for length extrapolatable rotary embeddings'
+        device, seq_len = t.device, t.shape[seq_dim]
+        pos_seq_t = torch.arange(offset, offset + seq_len, device=device, dtype=torch.float32)
+        freqs = self.forward(pos_seq_t, cache_key=f"{offset}:{offset+seq_len}")
+        # freqs   seqlen  x  dim
+        return apply_rotary_emb(freqs, t)
     def get_scale(self, t, cache_key=None, offset=0, ):
         assert self.use_xpos, 'This function is only useful for xpos.'
         if exists(cache_key) and cache_key in self.cache_scale:
         batch_size, head_num, k_seq_len, head_features = key.shape
         _, _, q_seq_len, _ = query.shape
+        query_offset = k_seq_len - q_seq_len
         if rotary_embedding is not None:
             query = query.contiguous().view(batch_size * head_num, q_seq_len, head_features)
             key = key.contiguous().view(batch_size * head_num, k_seq_len, head_features)
                 # query: [batch_size * head_num, seqlen, hn]
                 query, key = rotary_embedding.rotate_queries_and_keys(query, key)
             else:
+                query = rotary_embedding.rotate_queries_or_keys(query, offset=query_offset)
                 key = rotary_embedding.rotate_queries_or_keys(key)
             # batch_size * head_num, k_seq_len(q_seq_len), head_features
             query = query.view(batch_size, head_num, q_seq_len, head_features)