Update model code & README.md

Browse files

Files changed (9) hide show

config.json +4 -63
configuration_intern_vit.py +1 -0
configuration_internvl_chat.py +5 -5
conversation.py +15 -17
generation_config.json +5 -1
modeling_intern_vit.py +1 -0
modeling_internvl_chat.py +14 -14
preprocessor_config.json +19 -0
tokenization_internlm2_fast.py +211 -0

config.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
   "_commit_hash": null,
-  "_name_or_path": "ckpt/OpenGVLab/InternVL2-8B",
   "architectures": [
     "InternVLChatModel"
   ],
@@ -12,9 +11,8 @@
   "downsample_ratio": 0.5,
   "dynamic_image_size": true,
   "force_image_size": 448,
-  "hidden_size": 4096,
   "llm_config": {
-    "_name_or_path": "./pretrained/internlm2_5-7b-chat",
     "add_cross_attention": false,
     "architectures": [
       "InternLM2ForCausalLM"
@@ -94,109 +92,52 @@
     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
-    "top_p": null,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.37.2",
     "typical_p": 1.0,
     "use_bfloat16": true,
-    "use_cache": false,
     "vocab_size": 92553
   },
   "max_dynamic_patch": 12,
   "min_dynamic_patch": 1,
   "model_type": "internvl_chat",
-  "pad2square": false,
   "ps_version": "v2",
   "select_layer": -1,
   "template": "internlm2-chat",
-  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": null,
   "use_backbone_lora": 0,
   "use_llm_lora": 0,
   "use_thumbnail": true,
   "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
     "architectures": [
       "InternVisionModel"
     ],
     "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "drop_path_rate": 0.1,
     "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
     "hidden_act": "gelu",
     "hidden_size": 1024,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
     "image_size": 448,
     "initializer_factor": 1.0,
     "initializer_range": 0.02,
     "intermediate_size": 4096,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
     "layer_norm_eps": 1e-06,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
     "model_type": "intern_vit_6b",
-    "no_repeat_ngram_size": 0,
     "norm_type": "layer_norm",
     "num_attention_heads": 16,
-    "num_beam_groups": 1,
-    "num_beams": 1,
     "num_channels": 3,
     "num_hidden_layers": 24,
-    "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
     "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
     "qk_normalization": false,
     "qkv_bias": true,
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
     "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": null,
     "torch_dtype": "bfloat16",
-    "torchscript": false,
     "transformers_version": "4.37.2",
-    "typical_p": 1.0,
     "use_bfloat16": true,
     "use_flash_attn": true
   }

 {
   "_commit_hash": null,
   "architectures": [
     "InternVLChatModel"
   ],
   "downsample_ratio": 0.5,
   "dynamic_image_size": true,
   "force_image_size": 448,
   "llm_config": {
+    "_name_or_path": "internlm/internlm2_5-7b-chat",
     "add_cross_attention": false,
     "architectures": [
       "InternLM2ForCausalLM"
     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
+    "top_p": 1.0,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.37.2",
     "typical_p": 1.0,
     "use_bfloat16": true,
+    "use_cache": true,
     "vocab_size": 92553
   },
   "max_dynamic_patch": 12,
   "min_dynamic_patch": 1,
   "model_type": "internvl_chat",
   "ps_version": "v2",
   "select_layer": -1,
   "template": "internlm2-chat",
   "torch_dtype": "bfloat16",
   "use_backbone_lora": 0,
   "use_llm_lora": 0,
   "use_thumbnail": true,
   "vision_config": {
     "architectures": [
       "InternVisionModel"
     ],
     "attention_dropout": 0.0,
+    "drop_path_rate": 0.0,
     "dropout": 0.0,
     "hidden_act": "gelu",
     "hidden_size": 1024,
     "image_size": 448,
     "initializer_factor": 1.0,
     "initializer_range": 0.02,
     "intermediate_size": 4096,
     "layer_norm_eps": 1e-06,
     "model_type": "intern_vit_6b",
     "norm_type": "layer_norm",
     "num_attention_heads": 16,
     "num_channels": 3,
     "num_hidden_layers": 24,
     "output_attentions": false,
     "output_hidden_states": false,
     "patch_size": 14,
     "qk_normalization": false,
     "qkv_bias": true,
     "return_dict": true,
     "torch_dtype": "bfloat16",
     "transformers_version": "4.37.2",
     "use_bfloat16": true,
     "use_flash_attn": true
   }

configuration_intern_vit.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os
 from typing import Union

 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os
 from typing import Union

configuration_internvl_chat.py CHANGED Viewed

@@ -39,20 +39,20 @@ class InternVLChatConfig(PretrainedConfig):
         super().__init__(**kwargs)
         if vision_config is None:
-            vision_config = {}
             logger.info('vision_config is None. Initializing the InternVisionConfig with default values.')
         if llm_config is None:
-            llm_config = {}
             logger.info('llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`).')
         self.vision_config = InternVisionConfig(**vision_config)
-        if llm_config['architectures'][0] == 'LlamaForCausalLM':
             self.llm_config = LlamaConfig(**llm_config)
-        elif llm_config['architectures'][0] == 'InternLM2ForCausalLM':
             self.llm_config = InternLM2Config(**llm_config)
         else:
-            raise ValueError('Unsupported architecture: {}'.format(llm_config['architectures'][0]))
         self.use_backbone_lora = use_backbone_lora
         self.use_llm_lora = use_llm_lora
         self.select_layer = select_layer

         super().__init__(**kwargs)
         if vision_config is None:
+            vision_config = {'architectures': ['InternVisionModel']}
             logger.info('vision_config is None. Initializing the InternVisionConfig with default values.')
         if llm_config is None:
+            llm_config = {'architectures': ['InternLM2ForCausalLM']}
             logger.info('llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`).')
         self.vision_config = InternVisionConfig(**vision_config)
+        if llm_config.get('architectures')[0] == 'LlamaForCausalLM':
             self.llm_config = LlamaConfig(**llm_config)
+        elif llm_config.get('architectures')[0] == 'InternLM2ForCausalLM':
             self.llm_config = InternLM2Config(**llm_config)
         else:
+            raise ValueError('Unsupported architecture: {}'.format(llm_config.get('architectures')[0]))
         self.use_backbone_lora = use_backbone_lora
         self.use_llm_lora = use_llm_lora
         self.select_layer = select_layer

conversation.py CHANGED Viewed

@@ -3,11 +3,13 @@ Conversation prompt templates.
 We kindly request that you import fastchat instead of copying this file if you wish to use it.
 If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
 """
 import dataclasses
 from enum import IntEnum, auto
-from typing import Any, Dict, List, Tuple, Union
 class SeparatorStyle(IntEnum):
@@ -344,12 +346,6 @@ register_conv_template(
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
-        stop_token_ids=[
-            2,
-            6,
-            7,
-            8,
-        ],
         stop_str='<|endoftext|>',
     )
 )
@@ -365,11 +361,6 @@ register_conv_template(
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
-        stop_token_ids=[
-            2,
-            92543,
-            92542
-        ]
     )
 )
@@ -384,10 +375,17 @@ register_conv_template(
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|end|>',
-        stop_token_ids=[
-            2,
-            32000,
-            32007
-        ]
     )
 )

 We kindly request that you import fastchat instead of copying this file if you wish to use it.
 If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
+Modified from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
 """
 import dataclasses
 from enum import IntEnum, auto
+from typing import Dict, List, Tuple, Union
 class SeparatorStyle(IntEnum):
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
         stop_str='<|endoftext|>',
     )
 )
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
     )
 )
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|end|>',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='internvl2_5',
+        system_template='<|im_start|>system\n{system_message}',
+        system_message='你是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>\n',
     )
 )

generation_config.json CHANGED Viewed

@@ -1,4 +1,8 @@
 {
   "_from_model_config": true,
-  "transformers_version": "4.37.2"
 }

 {
   "_from_model_config": true,
+  "transformers_version": "4.37.2",
+  "eos_token_id": [
+    92542,
+    92543
+  ]
 }

modeling_intern_vit.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 from typing import Optional, Tuple, Union
 import torch

 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 from typing import Optional, Tuple, Union
 import torch

modeling_internvl_chat.py CHANGED Viewed

@@ -3,8 +3,9 @@
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings
-from typing import Any, List, Optional, Tuple, Union
 import torch.utils.checkpoint
 import transformers
@@ -35,13 +36,14 @@ def version_cmp(v1, v2, op='eq'):
 class InternVLChatModel(PreTrainedModel):
     config_class = InternVLChatConfig
     main_input_name = 'pixel_values'
     _supports_flash_attn_2 = True
     _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
     def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
         super().__init__(config)
-        assert version_cmp(transformers.__version__, '4.36.2', 'ge')
         image_size = config.force_image_size or config.vision_config.image_size
         patch_size = config.vision_config.patch_size
         self.patch_size = patch_size
@@ -101,7 +103,7 @@ class InternVLChatModel(PreTrainedModel):
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         image_flags = image_flags.squeeze(-1)
-        input_embeds = self.language_model.get_input_embeddings()(input_ids)
         vit_embeds = self.extract_feature(pixel_values)
         vit_embeds = vit_embeds[image_flags == 1]
@@ -110,7 +112,7 @@ class InternVLChatModel(PreTrainedModel):
         B, N, C = input_embeds.shape
         input_embeds = input_embeds.reshape(B * N, C)
-        if torch.distributed.get_rank() == 0:
             print(f'dynamic ViT batch size: {vit_batch_size}, images per sample: {vit_batch_size / B}, dynamic token length: {N}')
         input_ids = input_ids.reshape(B * N)
@@ -234,9 +236,9 @@ class InternVLChatModel(PreTrainedModel):
         tokenizer.padding_side = 'left'
         model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
-        input_ids = model_inputs['input_ids'].cuda()
-        attention_mask = model_inputs['attention_mask'].cuda()
-        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
@@ -245,7 +247,7 @@ class InternVLChatModel(PreTrainedModel):
             **generation_config
         )
         responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
-        responses = [response.split(template.sep)[0].strip() for response in responses]
         return responses
     def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
@@ -264,7 +266,7 @@ class InternVLChatModel(PreTrainedModel):
         template = get_conv_template(self.template)
         template.system_message = self.system_message
-        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         history = [] if history is None else history
         for (old_question, old_answer) in history:
@@ -283,8 +285,8 @@ class InternVLChatModel(PreTrainedModel):
             query = query.replace('<image>', image_tokens, 1)
         model_inputs = tokenizer(query, return_tensors='pt')
-        input_ids = model_inputs['input_ids'].cuda()
-        attention_mask = model_inputs['attention_mask'].cuda()
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
@@ -293,7 +295,7 @@ class InternVLChatModel(PreTrainedModel):
             **generation_config
         )
         response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
-        response = response.split(template.sep)[0].strip()
         history.append((question, response))
         if return_history:
             return response, history
@@ -313,7 +315,6 @@ class InternVLChatModel(PreTrainedModel):
             visual_features: Optional[torch.FloatTensor] = None,
             generation_config: Optional[GenerationConfig] = None,
             output_hidden_states: Optional[bool] = None,
-            return_dict: Optional[bool] = None,
             **generate_kwargs,
     ) -> torch.LongTensor:
@@ -341,7 +342,6 @@ class InternVLChatModel(PreTrainedModel):
             attention_mask=attention_mask,
             generation_config=generation_config,
             output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
             use_cache=True,
             **generate_kwargs,
         )

 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings
+from typing import List, Optional, Tuple, Union
 import torch.utils.checkpoint
 import transformers
 class InternVLChatModel(PreTrainedModel):
     config_class = InternVLChatConfig
     main_input_name = 'pixel_values'
+    base_model_prefix = 'language_model'
     _supports_flash_attn_2 = True
     _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
     def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
         super().__init__(config)
+        assert version_cmp(transformers.__version__, '4.37.0', 'ge')
         image_size = config.force_image_size or config.vision_config.image_size
         patch_size = config.vision_config.patch_size
         self.patch_size = patch_size
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         image_flags = image_flags.squeeze(-1)
+        input_embeds = self.language_model.get_input_embeddings()(input_ids).clone()
         vit_embeds = self.extract_feature(pixel_values)
         vit_embeds = vit_embeds[image_flags == 1]
         B, N, C = input_embeds.shape
         input_embeds = input_embeds.reshape(B * N, C)
+        if torch.distributed.is_initialized() and torch.distributed.get_rank() == 0:
             print(f'dynamic ViT batch size: {vit_batch_size}, images per sample: {vit_batch_size / B}, dynamic token length: {N}')
         input_ids = input_ids.reshape(B * N)
         tokenizer.padding_side = 'left'
         model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
+        input_ids = model_inputs['input_ids'].to(self.device)
+        attention_mask = model_inputs['attention_mask'].to(self.device)
+        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep.strip())
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
             **generation_config
         )
         responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
+        responses = [response.split(template.sep.strip())[0].strip() for response in responses]
         return responses
     def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
         template = get_conv_template(self.template)
         template.system_message = self.system_message
+        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep.strip())
         history = [] if history is None else history
         for (old_question, old_answer) in history:
             query = query.replace('<image>', image_tokens, 1)
         model_inputs = tokenizer(query, return_tensors='pt')
+        input_ids = model_inputs['input_ids'].to(self.device)
+        attention_mask = model_inputs['attention_mask'].to(self.device)
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
             **generation_config
         )
         response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
+        response = response.split(template.sep.strip())[0].strip()
         history.append((question, response))
         if return_history:
             return response, history
             visual_features: Optional[torch.FloatTensor] = None,
             generation_config: Optional[GenerationConfig] = None,
             output_hidden_states: Optional[bool] = None,
             **generate_kwargs,
     ) -> torch.LongTensor:
             attention_mask=attention_mask,
             generation_config=generation_config,
             output_hidden_states=output_hidden_states,
             use_cache=True,
             **generate_kwargs,
         )

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "crop_size": 448,
+  "do_center_crop": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "resample": 3,
+  "size": 448
+}

tokenization_internlm2_fast.py ADDED Viewed

	@@ -0,0 +1,211 @@

+# Copyright (c) The InternLM team and The HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on transformers/src/transformers/models/llama/tokenization_llama_fast.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization Fast class for InternLM."""
+import os
+from shutil import copyfile
+from typing import Any, Dict, Optional, Tuple
+from tokenizers import Tokenizer, decoders, normalizers, processors
+from tokenizers.models import BPE
+from transformers.convert_slow_tokenizer import (SLOW_TO_FAST_CONVERTERS,
+                                                 SentencePieceExtractor,
+                                                 SpmConverter)
+from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
+from transformers.utils import logging
+from .tokenization_internlm2 import InternLM2Tokenizer
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {'vocab_file': './tokenizer.model'}
+# Modified from transformers.convert_slow_tokenizer.LlamaConverter
+class InternLM2Converter(SpmConverter):
+    handle_byte_fallback = True
+    def vocab(self, proto):
+        vocab = [
+            ('<unk>', 0.0),
+            ('<s>', 0.0),
+            ('</s>', 0.0),
+        ]
+        vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
+        return vocab
+    def unk_id(self, proto):
+        unk_id = 0
+        return unk_id
+    def decoder(self, replacement, add_prefix_space):
+        return decoders.Sequence(
+            [
+                decoders.Replace('▁', ' '),
+                decoders.ByteFallback(),
+                decoders.Fuse(),
+                decoders.Strip(content=' ', left=1),
+            ]
+        )
+    def tokenizer(self, proto):
+        model_type = proto.trainer_spec.model_type
+        vocab_scores = self.vocab(proto)
+        # special tokens
+        added_tokens = self.original_tokenizer.added_tokens_decoder
+        for i in range(len(vocab_scores)):
+            piece, score = vocab_scores[i]
+            if i in added_tokens:
+                vocab_scores[i] = (added_tokens[i].content, score)
+        if model_type == 1:
+            raise RuntimeError('InternLM2 is supposed to be a BPE model!')
+        elif model_type == 2:
+            _, merges = SentencePieceExtractor(self.original_tokenizer.vocab_file).extract(vocab_scores)
+            bpe_vocab = {word: i for i, (word, _score) in enumerate(vocab_scores)}
+            tokenizer = Tokenizer(
+                BPE(bpe_vocab, merges, unk_token=proto.trainer_spec.unk_piece, fuse_unk=True, byte_fallback=True)
+            )
+            tokenizer.add_special_tokens(
+                [ added_token for index, added_token in added_tokens.items()]
+            )
+        else:
+            raise Exception(
+                "You're trying to run a `Unigram` model but you're file was trained with a different algorithm"
+            )
+        return tokenizer
+    def normalizer(self, proto):
+        normalizers_list = []
+        if proto.normalizer_spec.add_dummy_prefix:
+            normalizers_list.append(normalizers.Prepend(prepend='▁'))
+        normalizers_list.append(normalizers.Replace(pattern=' ', content='▁'))
+        return normalizers.Sequence(normalizers_list)
+    def pre_tokenizer(self, replacement, add_prefix_space):
+        return None
+SLOW_TO_FAST_CONVERTERS['InternLM2Tokenizer'] = InternLM2Converter
+# Modified from transformers.model.llama.tokenization_llama_fast.LlamaTokenizerFast -> InternLM2TokenizerFast
+class InternLM2TokenizerFast(PreTrainedTokenizerFast):
+    vocab_files_names = VOCAB_FILES_NAMES
+    slow_tokenizer_class = InternLM2Tokenizer
+    padding_side = 'left'
+    model_input_names = ['input_ids', 'attention_mask']
+    _auto_class = 'AutoTokenizer'
+    def __init__(
+        self,
+        vocab_file,
+        unk_token='<unk>',
+        bos_token='<s>',
+        eos_token='</s>',
+        pad_token='</s>',
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token=True,
+        add_eos_token=False,
+        decode_with_prefix_space=False,
+        clean_up_tokenization_spaces=False,
+        **kwargs,
+    ):
+        super().__init__(
+            vocab_file=vocab_file,
+            unk_token=unk_token,
+            bos_token=bos_token,
+            eos_token=eos_token,
+            pad_token=pad_token,
+            sp_model_kwargs=sp_model_kwargs,
+            add_bos_token=add_bos_token,
+            add_eos_token=add_eos_token,
+            decode_with_prefix_space=decode_with_prefix_space,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+        self._add_bos_token = add_bos_token
+        self._add_eos_token = add_eos_token
+        self.update_post_processor()
+        self.vocab_file = vocab_file
+    @property
+    def can_save_slow_tokenizer(self) -> bool:
+        return os.path.isfile(self.vocab_file) if self.vocab_file else False
+    def update_post_processor(self):
+        """
+        Updates the underlying post processor with the current `bos_token` and `eos_token`.
+        """
+        bos = self.bos_token
+        bos_token_id = self.bos_token_id
+        if bos is None and self.add_bos_token:
+            raise ValueError('add_bos_token = True but bos_token = None')
+        eos = self.eos_token
+        eos_token_id = self.eos_token_id
+        if eos is None and self.add_eos_token:
+            raise ValueError('add_eos_token = True but eos_token = None')
+        single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
+        pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"
+        special_tokens = []
+        if self.add_bos_token:
+            special_tokens.append((bos, bos_token_id))
+        if self.add_eos_token:
+            special_tokens.append((eos, eos_token_id))
+        self._tokenizer.post_processor = processors.TemplateProcessing(
+            single=single, pair=pair, special_tokens=special_tokens
+        )
+    @property
+    def add_eos_token(self):
+        return self._add_eos_token
+    @property
+    def add_bos_token(self):
+        return self._add_bos_token
+    @add_eos_token.setter
+    def add_eos_token(self, value):
+        self._add_eos_token = value
+        self.update_post_processor()
+    @add_bos_token.setter
+    def add_bos_token(self, value):
+        self._add_bos_token = value
+        self.update_post_processor()
+    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        if not self.can_save_slow_tokenizer:
+            raise ValueError(
+                'Your fast tokenizer does not have the necessary information to save the vocabulary for a slow '
+                'tokenizer.'
+            )
+        if not os.path.isdir(save_directory):
+            logger.error(f'Vocabulary path ({save_directory}) should be a directory')
+            return
+        out_vocab_file = os.path.join(
+            save_directory, (filename_prefix + '-' if filename_prefix else '') + VOCAB_FILES_NAMES['vocab_file']
+        )
+        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):
+            copyfile(self.vocab_file, out_vocab_file)
+        return (out_vocab_file,)