myownskyW7 commited on
Commit
b497ed8
1 Parent(s): db8a0eb

update internlm-xcomposer-7b

Browse files
config.json CHANGED
@@ -17,8 +17,7 @@
17
  "initializer_range": 0.02,
18
  "intermediate_size": 11008,
19
  "intern_converted_llm": true,
20
- "kqvo_bias": true,
21
- "llama_lora": {
22
  "freeze": false,
23
  "learn_param": [
24
  "q",
@@ -29,6 +28,7 @@
29
  "lora_dropout": 0.05,
30
  "lora_r": 256
31
  },
 
32
  "lora_cfg": {
33
  "freeze": false,
34
  "learn_param": [
 
17
  "initializer_range": 0.02,
18
  "intermediate_size": 11008,
19
  "intern_converted_llm": true,
20
+ "internlm_lora": {
 
21
  "freeze": false,
22
  "learn_param": [
23
  "q",
 
28
  "lora_dropout": 0.05,
29
  "lora_r": 256
30
  },
31
+ "kqvo_bias": true,
32
  "lora_cfg": {
33
  "freeze": false,
34
  "learn_param": [
configuration_InternLM_XComposer.py CHANGED
@@ -33,7 +33,7 @@ class InternLMXComposerConfig(PretrainedConfig):
33
  intern_converted_llm=True,
34
  kqvo_bias=True,
35
  device='cuda',
36
- llama_lora=None,
37
  **kwargs,
38
  ):
39
  self.vocab_size = vocab_size
@@ -49,7 +49,7 @@ class InternLMXComposerConfig(PretrainedConfig):
49
  self.bias = bias
50
  self.num_query_token = num_query_token
51
  self.num_quant = num_quant
52
- self.llama_lora = llama_lora
53
  self.kqvo_bias = kqvo_bias
54
  self.intern_converted_llm = intern_converted_llm
55
  self.device = device
 
33
  intern_converted_llm=True,
34
  kqvo_bias=True,
35
  device='cuda',
36
+ internlm_lora=None,
37
  **kwargs,
38
  ):
39
  self.vocab_size = vocab_size
 
49
  self.bias = bias
50
  self.num_query_token = num_query_token
51
  self.num_quant = num_quant
52
+ self.internlm_lora = internlm_lora
53
  self.kqvo_bias = kqvo_bias
54
  self.intern_converted_llm = intern_converted_llm
55
  self.device = device
modeling_InternLM.py CHANGED
@@ -15,8 +15,8 @@ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutpu
15
  from transformers.modeling_utils import PreTrainedModel
16
  from transformers.utils import logging
17
 
18
- from .modeling_utils import LoRALinear
19
  from .configuration_InternLM_XComposer import InternLMXComposerConfig
 
20
 
21
  logger = logging.get_logger(__name__)
22
 
 
15
  from transformers.modeling_utils import PreTrainedModel
16
  from transformers.utils import logging
17
 
 
18
  from .configuration_InternLM_XComposer import InternLMXComposerConfig
19
+ from .modeling_utils import LoRALinear
20
 
21
  logger = logging.get_logger(__name__)
22
 
modeling_InternLM_XComposer.py CHANGED
@@ -14,7 +14,6 @@ from torchvision.transforms.functional import InterpolationMode
14
  from PIL import Image
15
 
16
  from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
17
- from .configuration_InternLM_XComposer import InternLMXComposerConfig
18
  from .modeling_vit import *
19
  from .modeling_InternLM import *
20
  from .modeling_utils import *
@@ -63,25 +62,25 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
63
  self.flag_image_start.requires_grad = False
64
  self.flag_image_end.requires_grad = False
65
 
66
- llama_lora = config.llama_lora
67
- self.llama_lora = llama_lora
68
- setattr(LlamaForCausalLM, 'lora_cfg', llama_lora)
69
 
70
  if int(torch.__version__[0]) == 1:
71
- self.llama_model = LlamaForCausalLM._from_config(config).to(
72
  torch.float16)
73
  else:
74
  assert int(torch.__version__[0]) == 2
75
  # speed up init llm
76
  with torch.device('meta'):
77
- self.llama_model = LlamaForCausalLM._from_config(config)
78
- self.llama_model.to_empty(device=config.device).to(torch.float16)
79
- for n, m in self.llama_model.named_modules():
80
  if 'lora' in n:
81
  m.float()
82
 
83
- self.llama_proj = nn.Linear(self.Qformer.config.hidden_size,
84
- self.llama_model.config.hidden_size)
85
  print('Done')
86
 
87
  self.vis_processor = transforms.Compose([
@@ -159,14 +158,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
159
  encoder_attention_mask=image_atts,
160
  return_dict=True,
161
  )
162
- inputs_llama = self.llama_proj(query_output.last_hidden_state)
163
- inputs_llama = torch.cat([
164
- self.flag_image_start.expand(inputs_llama.shape[0], -1, -1),
165
- inputs_llama,
166
- self.flag_image_end.expand(inputs_llama.shape[0], -1, -1)
167
  ],
168
- dim=1)
169
- return inputs_llama
170
 
171
  def encode_text(self, text, add_special_tokens=False):
172
  text_token_ids = self.tokenizer(
@@ -174,7 +173,7 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
174
  return_tensors='pt',
175
  add_special_tokens=add_special_tokens,
176
  ).input_ids.to(self.device)
177
- text_embeds = self.llama_model.model.embed_tokens(text_token_ids)
178
  return text_embeds
179
 
180
  def decode_text(self, out_embeds):
@@ -200,8 +199,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
200
  text_embeds = self.encode_text(text)
201
  img_embeds = self.encode_img(image)
202
  prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
203
- out_embeds = self.llama_model.generate(inputs_embeds=prompt_embeds,
204
- **self.get_gen_args(**kwargs))
205
  out_text = self.decode_text(out_embeds)
206
  return out_text
207
 
@@ -211,14 +210,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
211
  prompt_embeds = self.wrap_prompt(text_embeds,
212
  img_embeds,
213
  history=history)
214
- out_embeds = self.llama_model.generate(inputs_embeds=prompt_embeds,
215
- **self.get_gen_args(**kwargs))
216
  out_text = self.decode_text(out_embeds)
217
 
218
  # trunc at eoh and eoa
219
  clean_out_text_token_ids = self.tokenizer(
220
  out_text, return_tensors='pt').input_ids.to(self.device)
221
- clean_out_text_embeds = self.llama_model.model.embed_tokens(
222
  clean_out_text_token_ids)
223
  clean_prompt_embeds = self.wrap_prompt(text_embeds,
224
  img_embeds,
 
14
  from PIL import Image
15
 
16
  from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
 
17
  from .modeling_vit import *
18
  from .modeling_InternLM import *
19
  from .modeling_utils import *
 
62
  self.flag_image_start.requires_grad = False
63
  self.flag_image_end.requires_grad = False
64
 
65
+ internlm_lora = config.internlm_lora
66
+ self.internlm_lora = internlm_lora
67
+ setattr(InternLMForCausalLM, 'lora_cfg', internlm_lora)
68
 
69
  if int(torch.__version__[0]) == 1:
70
+ self.internlm_model = InternLMForCausalLM._from_config(config).to(
71
  torch.float16)
72
  else:
73
  assert int(torch.__version__[0]) == 2
74
  # speed up init llm
75
  with torch.device('meta'):
76
+ self.internlm_model = InternLMForCausalLM._from_config(config)
77
+ self.internlm_model.to_empty(device=config.device).to(torch.float16)
78
+ for n, m in self.internlm_model.named_modules():
79
  if 'lora' in n:
80
  m.float()
81
 
82
+ self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
83
+ self.internlm_model.config.hidden_size)
84
  print('Done')
85
 
86
  self.vis_processor = transforms.Compose([
 
158
  encoder_attention_mask=image_atts,
159
  return_dict=True,
160
  )
161
+ inputs_internlm = self.internlm_proj(query_output.last_hidden_state)
162
+ inputs_internlm = torch.cat([
163
+ self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
164
+ inputs_internlm,
165
+ self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
166
  ],
167
+ dim=1)
168
+ return inputs_internlm
169
 
170
  def encode_text(self, text, add_special_tokens=False):
171
  text_token_ids = self.tokenizer(
 
173
  return_tensors='pt',
174
  add_special_tokens=add_special_tokens,
175
  ).input_ids.to(self.device)
176
+ text_embeds = self.internlm_model.model.embed_tokens(text_token_ids)
177
  return text_embeds
178
 
179
  def decode_text(self, out_embeds):
 
199
  text_embeds = self.encode_text(text)
200
  img_embeds = self.encode_img(image)
201
  prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
202
+ out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
203
+ **self.get_gen_args(**kwargs))
204
  out_text = self.decode_text(out_embeds)
205
  return out_text
206
 
 
210
  prompt_embeds = self.wrap_prompt(text_embeds,
211
  img_embeds,
212
  history=history)
213
+ out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
214
+ **self.get_gen_args(**kwargs))
215
  out_text = self.decode_text(out_embeds)
216
 
217
  # trunc at eoh and eoa
218
  clean_out_text_token_ids = self.tokenizer(
219
  out_text, return_tensors='pt').input_ids.to(self.device)
220
+ clean_out_text_embeds = self.internlm_model.model.embed_tokens(
221
  clean_out_text_token_ids)
222
  clean_prompt_embeds = self.wrap_prompt(text_embeds,
223
  img_embeds,
pytorch_model-00001-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e8d5fcfb834f7ae65529931a01e9a6d4ee00583ca60153647b486af28b6b5c0
3
- size 4943795320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a60c0a6a091960cab42833a3df2e196649fa874f1afa528eb43e483402f20650
3
+ size 4943795512
pytorch_model-00002-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e8a31dcdf74ddb5c5ebd4590f6c3144841ccc352f4f554f660071fd2608b01d
3
- size 4977696869
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f259132ac2bbd54a29a5ebf18c8ade362ae2bc0533b20994e9a3c2177eceee
3
+ size 4977697573
pytorch_model-00003-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f2aca460c0881d83beda34d5bfaa94de0ed92bc773405022aba4744761933e
3
- size 4977696997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b9d20f4b00902b0210573bdd4ac62c1e45725095157ad1fe17cbc5f08f76c2
3
+ size 4977697701
pytorch_model-00004-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d772cfe1fa6b2ab8242fc6cf67c07ac2a4c75c7d8e9b90bce17c870b3cb2ea1d
3
- size 3678530338
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7aff93bbc9a98d49c07453ee17efdc07018284281e25f1e33f1179177cb783f
3
+ size 3678530786
pytorch_model.bin.index.json CHANGED
The diff for this file is too large to render. See raw diff