update readme and fix convert_tokens_to_string

Files changed (3) hide show

README.md CHANGED Viewed

@@ -61,11 +61,16 @@ We show an example of multi-turn interaction with Qwen-7B-Chat in the following
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
 # use bf16
 # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
 # use fp16
 # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
 # use fp32
 model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
 model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
+# Note: our tokenizer rejects attacks and so that you cannot input special tokens like <|endoftext|> or it will throw an error.
+# To remove the strategy, you can add `allowed_special`, which accepts the string "all" or a `set` of special tokens.
+# For example: tokens = tokenizer(text, allowed_special="all")
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
 # use bf16
 # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
 # use fp16
 # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True, fp16=True).eval()
+# use cpu only
+# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="cpu", trust_remote_code=True).eval()
 # use fp32
 model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
 model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参

modeling_qwen.py CHANGED Viewed

@@ -1071,4 +1071,4 @@ class RMSNorm(torch.nn.Module):
             return rms_norm(x, self.weight, self.eps)
         else:
             output = self._norm(x.float()).type_as(x)
-            return output * self.weight

             return rms_norm(x, self.weight, self.eps)
         else:
             output = self._norm(x.float()).type_as(x)
+            return output * self.weight

tokenization_qwen.py CHANGED Viewed

@@ -22,7 +22,6 @@ logger = logging.getLogger(__name__)
 VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken"}
 class QWenTokenizer(PreTrainedTokenizer):
     """QWen tokenizer."""
@@ -199,17 +198,16 @@ class QWenTokenizer(PreTrainedTokenizer):
         return tokens
-    def convert_tokens_to_string(self, tokens: List[str]) -> str:
         """
         Converts a sequence of tokens in a single string. The most simple way to do it is `" ".join(tokens)` but we
         often want to remove sub-word tokenization artifacts at the same time.
         """
-        text = "".join(tokens)
-        text = bytearray([self.byte_decoder[c] for c in text]).decode(
-            "utf-8", errors=self.errors
-        )
-        return text
     @property
     def vocab_size(self):
         return self.tokenizer.n_vocab
@@ -263,4 +261,4 @@ class QWenTokenizer(PreTrainedTokenizer):
             token_ids = [token_ids]
         if skip_special_tokens:
             token_ids = [i for i in token_ids if i not in self.all_special_ids]
-        return self.tokenizer.decode(token_ids)

 VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken"}
 class QWenTokenizer(PreTrainedTokenizer):
     """QWen tokenizer."""
         return tokens
+    def convert_tokens_to_string(self, tokens: List[bytes]) -> str:
         """
         Converts a sequence of tokens in a single string. The most simple way to do it is `" ".join(tokens)` but we
         often want to remove sub-word tokenization artifacts at the same time.
         """
+        text = b""
+        for token in tokens:
+            text += token
+        return text.decode('utf-8')
     @property
     def vocab_size(self):
         return self.tokenizer.n_vocab
             token_ids = [token_ids]
         if skip_special_tokens:
             token_ids = [i for i in token_ids if i not in self.all_special_ids]
+        return self.tokenizer.decode(token_ids)