internlm
/

internlm-xcomposer-7b-4bit

Feature Extraction

Transformers

PyTorch

InternLMXComposer

custom_code

Model card Files Files and versions Community

myownskyW7 commited on Oct 17, 2023

Commit

35d5984

•

1 Parent(s): 1b20edf

Speed up chat

Browse files

Files changed (2) hide show

modeling_InternLM_XComposer.py +7 -0
modeling_utils.py +36 -25

modeling_InternLM_XComposer.py CHANGED Viewed

@@ -103,6 +103,13 @@ conversation
         self.eoh = '<TOKENS_UNUSED_0>'  # end of human
         self.eoa = '<TOKENS_UNUSED_1>'  # end of assistant
     def maybe_autocast(self, dtype=torch.float16):
         # if on cpu, don't use autocast

         self.eoh = '<TOKENS_UNUSED_0>'  # end of human
         self.eoa = '<TOKENS_UNUSED_1>'  # end of assistant
+        stop_words_ids = [
+            torch.tensor([103027]).to(config.device),
+            torch.tensor([103028]).to(config.device),
+        ]
+        stopping_criteria = StoppingCriteriaList(
+            [StoppingCriteriaSub(stops=stop_words_ids)])
+        self.gen_config['stopping_criteria'] = stopping_criteria
     def maybe_autocast(self, dtype=torch.float16):
         # if on cpu, don't use autocast

modeling_utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 import math
 import os
 from contextlib import contextmanager
 import timm.models.hub as timm_hub
 import torch
@@ -32,6 +33,7 @@ def download_cached_file(url, check_hash=True, progress=False):
     Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
     If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
     """
     def get_cached_file_path():
         # a hack to sync the file path across processes
         parts = torch.hub.urlparse(url)
@@ -74,49 +76,58 @@ def all_logging_disabled(highest_level=logging.CRITICAL):
 class LoRALinear(nn.Linear):
-    def __init__(self,
-                 in_features: int,
-                 out_features: int,
-                 bias: bool = True,
-                 device=None,
-                 dtype=None,
-                 lora_r=8,
-                 lora_alpha=16,
-                 lora_dropout=0.05,
-                 **kwargs) -> None:
         super().__init__(in_features, out_features, bias, device, dtype)
         self.lora_r = lora_r
         self.lora_alpha = lora_alpha
-        if lora_dropout > 0.:
             self.lora_dropout = nn.Dropout(p=lora_dropout)
         else:
             self.lora_dropout = lambda x: x
         self.lora_scaling = self.lora_alpha / self.lora_r
-        self.lora_A = nn.Linear(in_features,
-                                self.lora_r,
-                                bias=False,
-                                device=device,
-                                dtype=dtype)
-        self.lora_B = nn.Linear(self.lora_r,
-                                out_features,
-                                bias=False,
-                                device=device,
-                                dtype=dtype)
         self.reset_parameters()
     def reset_parameters(self):
-        if hasattr(self, 'lora_A'):
             # initialize A the same way as the default for nn.Linear and B to zero
             nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
             nn.init.zeros_(self.lora_B.weight)
-            #print ("lora weight init {} {}".format(torch.mean(self.lora_A.weight), torch.mean(self.lora_B.weight)))
     def forward(self, x):
         orig_type = x.dtype
         res = super().forward(x)
         x = x.float()
-        res += self.lora_B(self.lora_A(
-            self.lora_dropout(x))) * self.lora_scaling
         return res.to(orig_type)

 import math
 import os
 from contextlib import contextmanager
+from transformers import StoppingCriteria, StoppingCriteriaList
 import timm.models.hub as timm_hub
 import torch
     Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
     If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
     """
     def get_cached_file_path():
         # a hack to sync the file path across processes
         parts = torch.hub.urlparse(url)
 class LoRALinear(nn.Linear):
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        device=None,
+        dtype=None,
+        lora_r=8,
+        lora_alpha=16,
+        lora_dropout=0.05,
+        **kwargs
+    ) -> None:
         super().__init__(in_features, out_features, bias, device, dtype)
         self.lora_r = lora_r
         self.lora_alpha = lora_alpha
+        if lora_dropout > 0.0:
             self.lora_dropout = nn.Dropout(p=lora_dropout)
         else:
             self.lora_dropout = lambda x: x
         self.lora_scaling = self.lora_alpha / self.lora_r
+        self.lora_A = nn.Linear(
+            in_features, self.lora_r, bias=False, device=device, dtype=dtype
+        )
+        self.lora_B = nn.Linear(
+            self.lora_r, out_features, bias=False, device=device, dtype=dtype
+        )
         self.reset_parameters()
     def reset_parameters(self):
+        if hasattr(self, "lora_A"):
             # initialize A the same way as the default for nn.Linear and B to zero
             nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
             nn.init.zeros_(self.lora_B.weight)
     def forward(self, x):
         orig_type = x.dtype
         res = super().forward(x)
         x = x.float()
+        res += self.lora_B(self.lora_A(self.lora_dropout(x))) * self.lora_scaling
         return res.to(orig_type)
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[], encounters=1):
+        super().__init__()
+        self.stops = stops
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[:, -len(stop) :])).item():
+                return True
+        return False