fix compatibility issue for transformers 4.46+

Browse files

Files changed (4) hide show

configuration_intern_vit.py +1 -0
configuration_internvl_chat.py +3 -3
conversation.py +15 -17
modeling_internvl_chat.py +6 -7

configuration_intern_vit.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os
 from typing import Union

 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os
 from typing import Union

configuration_internvl_chat.py CHANGED Viewed

@@ -47,12 +47,12 @@ class InternVLChatConfig(PretrainedConfig):
             logger.info('llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`).')
         self.vision_config = InternVisionConfig(**vision_config)
-        if llm_config['architectures'][0] == 'LlamaForCausalLM':
             self.llm_config = LlamaConfig(**llm_config)
-        elif llm_config['architectures'][0] == 'InternLM2ForCausalLM':
             self.llm_config = InternLM2Config(**llm_config)
         else:
-            raise ValueError('Unsupported architecture: {}'.format(llm_config['architectures'][0]))
         self.use_backbone_lora = use_backbone_lora
         self.use_llm_lora = use_llm_lora
         self.select_layer = select_layer

             logger.info('llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`).')
         self.vision_config = InternVisionConfig(**vision_config)
+        if llm_config.get(['architectures'])[0] == 'LlamaForCausalLM':
             self.llm_config = LlamaConfig(**llm_config)
+        elif llm_config.get(['architectures'])[0] == 'InternLM2ForCausalLM':
             self.llm_config = InternLM2Config(**llm_config)
         else:
+            raise ValueError('Unsupported architecture: {}'.format(llm_config.get(['architectures'])[0]))
         self.use_backbone_lora = use_backbone_lora
         self.use_llm_lora = use_llm_lora
         self.select_layer = select_layer

conversation.py CHANGED Viewed

@@ -3,11 +3,13 @@ Conversation prompt templates.
 We kindly request that you import fastchat instead of copying this file if you wish to use it.
 If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
 """
 import dataclasses
 from enum import IntEnum, auto
-from typing import Any, Dict, List, Tuple, Union
 class SeparatorStyle(IntEnum):
@@ -344,12 +346,6 @@ register_conv_template(
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
-        stop_token_ids=[
-            2,
-            6,
-            7,
-            8,
-        ],
         stop_str='<|endoftext|>',
     )
 )
@@ -365,11 +361,6 @@ register_conv_template(
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
-        stop_token_ids=[
-            2,
-            92543,
-            92542
-        ]
     )
 )
@@ -384,10 +375,17 @@ register_conv_template(
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|end|>',
-        stop_token_ids=[
-            2,
-            32000,
-            32007
-        ]
     )
 )

 We kindly request that you import fastchat instead of copying this file if you wish to use it.
 If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
+Modified from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
 """
 import dataclasses
 from enum import IntEnum, auto
+from typing import Dict, List, Tuple, Union
 class SeparatorStyle(IntEnum):
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
         stop_str='<|endoftext|>',
     )
 )
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|im_end|>',
     )
 )
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,
         sep='<|end|>',
+    )
+)
+register_conv_template(
+    Conversation(
+        name='internvl2_5',
+        system_template='<|im_start|>system\n{system_message}',
+        system_message='你是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
+        roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
+        sep_style=SeparatorStyle.MPT,
+        sep='<|im_end|>\n',
     )
 )

modeling_internvl_chat.py CHANGED Viewed

@@ -3,8 +3,9 @@
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings
-from typing import Any, List, Optional, Tuple, Union
 import torch.utils.checkpoint
 import transformers
@@ -237,7 +238,7 @@ class InternVLChatModel(PreTrainedModel):
         model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
         input_ids = model_inputs['input_ids'].to(self.device)
         attention_mask = model_inputs['attention_mask'].to(self.device)
-        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
@@ -246,7 +247,7 @@ class InternVLChatModel(PreTrainedModel):
             **generation_config
         )
         responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
-        responses = [response.split(template.sep)[0].strip() for response in responses]
         return responses
     def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
@@ -265,7 +266,7 @@ class InternVLChatModel(PreTrainedModel):
         template = get_conv_template(self.template)
         template.system_message = self.system_message
-        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         history = [] if history is None else history
         for (old_question, old_answer) in history:
@@ -294,7 +295,7 @@ class InternVLChatModel(PreTrainedModel):
             **generation_config
         )
         response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
-        response = response.split(template.sep)[0].strip()
         history.append((question, response))
         if return_history:
             return response, history
@@ -314,7 +315,6 @@ class InternVLChatModel(PreTrainedModel):
             visual_features: Optional[torch.FloatTensor] = None,
             generation_config: Optional[GenerationConfig] = None,
             output_hidden_states: Optional[bool] = None,
-            return_dict: Optional[bool] = None,
             **generate_kwargs,
     ) -> torch.LongTensor:
@@ -342,7 +342,6 @@ class InternVLChatModel(PreTrainedModel):
             attention_mask=attention_mask,
             generation_config=generation_config,
             output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
             use_cache=True,
             **generate_kwargs,
         )

 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import warnings
+from typing import List, Optional, Tuple, Union
 import torch.utils.checkpoint
 import transformers
         model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
         input_ids = model_inputs['input_ids'].to(self.device)
         attention_mask = model_inputs['attention_mask'].to(self.device)
+        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep.strip())
         generation_config['eos_token_id'] = eos_token_id
         generation_output = self.generate(
             pixel_values=pixel_values,
             **generation_config
         )
         responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
+        responses = [response.split(template.sep.strip())[0].strip() for response in responses]
         return responses
     def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
         template = get_conv_template(self.template)
         template.system_message = self.system_message
+        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep.strip())
         history = [] if history is None else history
         for (old_question, old_answer) in history:
             **generation_config
         )
         response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
+        response = response.split(template.sep.strip())[0].strip()
         history.append((question, response))
         if return_history:
             return response, history
             visual_features: Optional[torch.FloatTensor] = None,
             generation_config: Optional[GenerationConfig] = None,
             output_hidden_states: Optional[bool] = None,
             **generate_kwargs,
     ) -> torch.LongTensor:
             attention_mask=attention_mask,
             generation_config=generation_config,
             output_hidden_states=output_hidden_states,
             use_cache=True,
             **generate_kwargs,
         )