update internlm-xcomposer-7b

Browse files

Files changed (9) hide show

config.json +2 -2
configuration_InternLM_XComposer.py +2 -2
modeling_InternLM.py +1 -1
modeling_InternLM_XComposer.py +22 -23
pytorch_model-00001-of-00004.bin +2 -2
pytorch_model-00002-of-00004.bin +2 -2
pytorch_model-00003-of-00004.bin +2 -2
pytorch_model-00004-of-00004.bin +2 -2
pytorch_model.bin.index.json +0 -0

config.json CHANGED Viewed

@@ -17,8 +17,7 @@
   "initializer_range": 0.02,
   "intermediate_size": 11008,
   "intern_converted_llm": true,
-  "kqvo_bias": true,
-  "llama_lora": {
     "freeze": false,
     "learn_param": [
       "q",
@@ -29,6 +28,7 @@
     "lora_dropout": 0.05,
     "lora_r": 256
   },
   "lora_cfg": {
     "freeze": false,
     "learn_param": [

   "initializer_range": 0.02,
   "intermediate_size": 11008,
   "intern_converted_llm": true,
+  "internlm_lora": {
     "freeze": false,
     "learn_param": [
       "q",
     "lora_dropout": 0.05,
     "lora_r": 256
   },
+  "kqvo_bias": true,
   "lora_cfg": {
     "freeze": false,
     "learn_param": [

configuration_InternLM_XComposer.py CHANGED Viewed

@@ -33,7 +33,7 @@ class InternLMXComposerConfig(PretrainedConfig):
         intern_converted_llm=True,
         kqvo_bias=True,
         device='cuda',
-        llama_lora=None,
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -49,7 +49,7 @@ class InternLMXComposerConfig(PretrainedConfig):
         self.bias = bias
         self.num_query_token = num_query_token
         self.num_quant = num_quant
-        self.llama_lora = llama_lora
         self.kqvo_bias = kqvo_bias
         self.intern_converted_llm = intern_converted_llm
         self.device = device

         intern_converted_llm=True,
         kqvo_bias=True,
         device='cuda',
+        internlm_lora=None,
         **kwargs,
     ):
         self.vocab_size = vocab_size
         self.bias = bias
         self.num_query_token = num_query_token
         self.num_quant = num_quant
+        self.internlm_lora = internlm_lora
         self.kqvo_bias = kqvo_bias
         self.intern_converted_llm = intern_converted_llm
         self.device = device

modeling_InternLM.py CHANGED Viewed

@@ -15,8 +15,8 @@ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutpu
 from transformers.modeling_utils import PreTrainedModel
 from transformers.utils import logging
-from .modeling_utils import LoRALinear
 from .configuration_InternLM_XComposer import InternLMXComposerConfig
 logger = logging.get_logger(__name__)

 from transformers.modeling_utils import PreTrainedModel
 from transformers.utils import logging
 from .configuration_InternLM_XComposer import InternLMXComposerConfig
+from .modeling_utils import LoRALinear
 logger = logging.get_logger(__name__)

modeling_InternLM_XComposer.py CHANGED Viewed

@@ -14,7 +14,6 @@ from torchvision.transforms.functional import InterpolationMode
 from PIL import Image
 from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
-from .configuration_InternLM_XComposer import InternLMXComposerConfig
 from .modeling_vit import *
 from .modeling_InternLM import *
 from .modeling_utils import *
@@ -63,25 +62,25 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         self.flag_image_start.requires_grad = False
         self.flag_image_end.requires_grad = False
-        llama_lora = config.llama_lora
-        self.llama_lora = llama_lora
-        setattr(LlamaForCausalLM, 'lora_cfg', llama_lora)
         if int(torch.__version__[0]) == 1:
-            self.llama_model = LlamaForCausalLM._from_config(config).to(
                 torch.float16)
         else:
             assert int(torch.__version__[0]) == 2
             # speed up init llm
             with torch.device('meta'):
-                self.llama_model = LlamaForCausalLM._from_config(config)
-            self.llama_model.to_empty(device=config.device).to(torch.float16)
-        for n, m in self.llama_model.named_modules():
             if 'lora' in n:
                 m.float()
-        self.llama_proj = nn.Linear(self.Qformer.config.hidden_size,
-                                    self.llama_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
@@ -159,14 +158,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
-            inputs_llama = self.llama_proj(query_output.last_hidden_state)
-            inputs_llama = torch.cat([
-                self.flag_image_start.expand(inputs_llama.shape[0], -1, -1),
-                inputs_llama,
-                self.flag_image_end.expand(inputs_llama.shape[0], -1, -1)
             ],
-                                     dim=1)
-        return inputs_llama
     def encode_text(self, text, add_special_tokens=False):
         text_token_ids = self.tokenizer(
@@ -174,7 +173,7 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
             return_tensors='pt',
             add_special_tokens=add_special_tokens,
         ).input_ids.to(self.device)
-        text_embeds = self.llama_model.model.embed_tokens(text_token_ids)
         return text_embeds
     def decode_text(self, out_embeds):
@@ -200,8 +199,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
-        out_embeds = self.llama_model.generate(inputs_embeds=prompt_embeds,
-                                               **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
@@ -211,14 +210,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
-        out_embeds = self.llama_model.generate(inputs_embeds=prompt_embeds,
-                                               **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
         clean_out_text_token_ids = self.tokenizer(
             out_text, return_tensors='pt').input_ids.to(self.device)
-        clean_out_text_embeds = self.llama_model.model.embed_tokens(
             clean_out_text_token_ids)
         clean_prompt_embeds = self.wrap_prompt(text_embeds,
                                                img_embeds,

 from PIL import Image
 from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
 from .modeling_vit import *
 from .modeling_InternLM import *
 from .modeling_utils import *
         self.flag_image_start.requires_grad = False
         self.flag_image_end.requires_grad = False
+        internlm_lora = config.internlm_lora
+        self.internlm_lora = internlm_lora
+        setattr(InternLMForCausalLM, 'lora_cfg', internlm_lora)
         if int(torch.__version__[0]) == 1:
+            self.internlm_model = InternLMForCausalLM._from_config(config).to(
                 torch.float16)
         else:
             assert int(torch.__version__[0]) == 2
             # speed up init llm
             with torch.device('meta'):
+                self.internlm_model = InternLMForCausalLM._from_config(config)
+            self.internlm_model.to_empty(device=config.device).to(torch.float16)
+        for n, m in self.internlm_model.named_modules():
             if 'lora' in n:
                 m.float()
+        self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
+                                    self.internlm_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
+            inputs_internlm = self.internlm_proj(query_output.last_hidden_state)
+            inputs_internlm = torch.cat([
+                self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
+                inputs_internlm,
+                self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
             ],
+                                      dim=1)
+        return inputs_internlm
     def encode_text(self, text, add_special_tokens=False):
         text_token_ids = self.tokenizer(
             return_tensors='pt',
             add_special_tokens=add_special_tokens,
         ).input_ids.to(self.device)
+        text_embeds = self.internlm_model.model.embed_tokens(text_token_ids)
         return text_embeds
     def decode_text(self, out_embeds):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
+        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
+                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
+        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
+                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
         clean_out_text_token_ids = self.tokenizer(
             out_text, return_tensors='pt').input_ids.to(self.device)
+        clean_out_text_embeds = self.internlm_model.model.embed_tokens(
             clean_out_text_token_ids)
         clean_prompt_embeds = self.wrap_prompt(text_embeds,
                                                img_embeds,

pytorch_model-00001-of-00004.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e8d5fcfb834f7ae65529931a01e9a6d4ee00583ca60153647b486af28b6b5c0
-size 4943795320

 version https://git-lfs.github.com/spec/v1
+oid sha256:a60c0a6a091960cab42833a3df2e196649fa874f1afa528eb43e483402f20650
+size 4943795512

pytorch_model-00002-of-00004.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e8a31dcdf74ddb5c5ebd4590f6c3144841ccc352f4f554f660071fd2608b01d
-size 4977696869

 version https://git-lfs.github.com/spec/v1
+oid sha256:37f259132ac2bbd54a29a5ebf18c8ade362ae2bc0533b20994e9a3c2177eceee
+size 4977697573

pytorch_model-00003-of-00004.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75f2aca460c0881d83beda34d5bfaa94de0ed92bc773405022aba4744761933e
-size 4977696997

 version https://git-lfs.github.com/spec/v1
+oid sha256:05b9d20f4b00902b0210573bdd4ac62c1e45725095157ad1fe17cbc5f08f76c2
+size 4977697701

pytorch_model-00004-of-00004.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d772cfe1fa6b2ab8242fc6cf67c07ac2a4c75c7d8e9b90bce17c870b3cb2ea1d
-size 3678530338

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7aff93bbc9a98d49c07453ee17efdc07018284281e25f1e33f1179177cb783f
+size 3678530786

pytorch_model.bin.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff