myownskyW7
commited on
Commit
•
b497ed8
1
Parent(s):
db8a0eb
update internlm-xcomposer-7b
Browse files- config.json +2 -2
- configuration_InternLM_XComposer.py +2 -2
- modeling_InternLM.py +1 -1
- modeling_InternLM_XComposer.py +22 -23
- pytorch_model-00001-of-00004.bin +2 -2
- pytorch_model-00002-of-00004.bin +2 -2
- pytorch_model-00003-of-00004.bin +2 -2
- pytorch_model-00004-of-00004.bin +2 -2
- pytorch_model.bin.index.json +0 -0
config.json
CHANGED
@@ -17,8 +17,7 @@
|
|
17 |
"initializer_range": 0.02,
|
18 |
"intermediate_size": 11008,
|
19 |
"intern_converted_llm": true,
|
20 |
-
"
|
21 |
-
"llama_lora": {
|
22 |
"freeze": false,
|
23 |
"learn_param": [
|
24 |
"q",
|
@@ -29,6 +28,7 @@
|
|
29 |
"lora_dropout": 0.05,
|
30 |
"lora_r": 256
|
31 |
},
|
|
|
32 |
"lora_cfg": {
|
33 |
"freeze": false,
|
34 |
"learn_param": [
|
|
|
17 |
"initializer_range": 0.02,
|
18 |
"intermediate_size": 11008,
|
19 |
"intern_converted_llm": true,
|
20 |
+
"internlm_lora": {
|
|
|
21 |
"freeze": false,
|
22 |
"learn_param": [
|
23 |
"q",
|
|
|
28 |
"lora_dropout": 0.05,
|
29 |
"lora_r": 256
|
30 |
},
|
31 |
+
"kqvo_bias": true,
|
32 |
"lora_cfg": {
|
33 |
"freeze": false,
|
34 |
"learn_param": [
|
configuration_InternLM_XComposer.py
CHANGED
@@ -33,7 +33,7 @@ class InternLMXComposerConfig(PretrainedConfig):
|
|
33 |
intern_converted_llm=True,
|
34 |
kqvo_bias=True,
|
35 |
device='cuda',
|
36 |
-
|
37 |
**kwargs,
|
38 |
):
|
39 |
self.vocab_size = vocab_size
|
@@ -49,7 +49,7 @@ class InternLMXComposerConfig(PretrainedConfig):
|
|
49 |
self.bias = bias
|
50 |
self.num_query_token = num_query_token
|
51 |
self.num_quant = num_quant
|
52 |
-
self.
|
53 |
self.kqvo_bias = kqvo_bias
|
54 |
self.intern_converted_llm = intern_converted_llm
|
55 |
self.device = device
|
|
|
33 |
intern_converted_llm=True,
|
34 |
kqvo_bias=True,
|
35 |
device='cuda',
|
36 |
+
internlm_lora=None,
|
37 |
**kwargs,
|
38 |
):
|
39 |
self.vocab_size = vocab_size
|
|
|
49 |
self.bias = bias
|
50 |
self.num_query_token = num_query_token
|
51 |
self.num_quant = num_quant
|
52 |
+
self.internlm_lora = internlm_lora
|
53 |
self.kqvo_bias = kqvo_bias
|
54 |
self.intern_converted_llm = intern_converted_llm
|
55 |
self.device = device
|
modeling_InternLM.py
CHANGED
@@ -15,8 +15,8 @@ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutpu
|
|
15 |
from transformers.modeling_utils import PreTrainedModel
|
16 |
from transformers.utils import logging
|
17 |
|
18 |
-
from .modeling_utils import LoRALinear
|
19 |
from .configuration_InternLM_XComposer import InternLMXComposerConfig
|
|
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
|
|
15 |
from transformers.modeling_utils import PreTrainedModel
|
16 |
from transformers.utils import logging
|
17 |
|
|
|
18 |
from .configuration_InternLM_XComposer import InternLMXComposerConfig
|
19 |
+
from .modeling_utils import LoRALinear
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
modeling_InternLM_XComposer.py
CHANGED
@@ -14,7 +14,6 @@ from torchvision.transforms.functional import InterpolationMode
|
|
14 |
from PIL import Image
|
15 |
|
16 |
from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
|
17 |
-
from .configuration_InternLM_XComposer import InternLMXComposerConfig
|
18 |
from .modeling_vit import *
|
19 |
from .modeling_InternLM import *
|
20 |
from .modeling_utils import *
|
@@ -63,25 +62,25 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
|
|
63 |
self.flag_image_start.requires_grad = False
|
64 |
self.flag_image_end.requires_grad = False
|
65 |
|
66 |
-
|
67 |
-
self.
|
68 |
-
setattr(
|
69 |
|
70 |
if int(torch.__version__[0]) == 1:
|
71 |
-
self.
|
72 |
torch.float16)
|
73 |
else:
|
74 |
assert int(torch.__version__[0]) == 2
|
75 |
# speed up init llm
|
76 |
with torch.device('meta'):
|
77 |
-
self.
|
78 |
-
self.
|
79 |
-
for n, m in self.
|
80 |
if 'lora' in n:
|
81 |
m.float()
|
82 |
|
83 |
-
self.
|
84 |
-
self.
|
85 |
print('Done')
|
86 |
|
87 |
self.vis_processor = transforms.Compose([
|
@@ -159,14 +158,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
|
|
159 |
encoder_attention_mask=image_atts,
|
160 |
return_dict=True,
|
161 |
)
|
162 |
-
|
163 |
-
|
164 |
-
self.flag_image_start.expand(
|
165 |
-
|
166 |
-
self.flag_image_end.expand(
|
167 |
],
|
168 |
-
|
169 |
-
return
|
170 |
|
171 |
def encode_text(self, text, add_special_tokens=False):
|
172 |
text_token_ids = self.tokenizer(
|
@@ -174,7 +173,7 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
|
|
174 |
return_tensors='pt',
|
175 |
add_special_tokens=add_special_tokens,
|
176 |
).input_ids.to(self.device)
|
177 |
-
text_embeds = self.
|
178 |
return text_embeds
|
179 |
|
180 |
def decode_text(self, out_embeds):
|
@@ -200,8 +199,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
|
|
200 |
text_embeds = self.encode_text(text)
|
201 |
img_embeds = self.encode_img(image)
|
202 |
prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
|
203 |
-
out_embeds = self.
|
204 |
-
|
205 |
out_text = self.decode_text(out_embeds)
|
206 |
return out_text
|
207 |
|
@@ -211,14 +210,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
|
|
211 |
prompt_embeds = self.wrap_prompt(text_embeds,
|
212 |
img_embeds,
|
213 |
history=history)
|
214 |
-
out_embeds = self.
|
215 |
-
|
216 |
out_text = self.decode_text(out_embeds)
|
217 |
|
218 |
# trunc at eoh and eoa
|
219 |
clean_out_text_token_ids = self.tokenizer(
|
220 |
out_text, return_tensors='pt').input_ids.to(self.device)
|
221 |
-
clean_out_text_embeds = self.
|
222 |
clean_out_text_token_ids)
|
223 |
clean_prompt_embeds = self.wrap_prompt(text_embeds,
|
224 |
img_embeds,
|
|
|
14 |
from PIL import Image
|
15 |
|
16 |
from .modeling_perceive_sampler import BertConfig, BertLMHeadModel
|
|
|
17 |
from .modeling_vit import *
|
18 |
from .modeling_InternLM import *
|
19 |
from .modeling_utils import *
|
|
|
62 |
self.flag_image_start.requires_grad = False
|
63 |
self.flag_image_end.requires_grad = False
|
64 |
|
65 |
+
internlm_lora = config.internlm_lora
|
66 |
+
self.internlm_lora = internlm_lora
|
67 |
+
setattr(InternLMForCausalLM, 'lora_cfg', internlm_lora)
|
68 |
|
69 |
if int(torch.__version__[0]) == 1:
|
70 |
+
self.internlm_model = InternLMForCausalLM._from_config(config).to(
|
71 |
torch.float16)
|
72 |
else:
|
73 |
assert int(torch.__version__[0]) == 2
|
74 |
# speed up init llm
|
75 |
with torch.device('meta'):
|
76 |
+
self.internlm_model = InternLMForCausalLM._from_config(config)
|
77 |
+
self.internlm_model.to_empty(device=config.device).to(torch.float16)
|
78 |
+
for n, m in self.internlm_model.named_modules():
|
79 |
if 'lora' in n:
|
80 |
m.float()
|
81 |
|
82 |
+
self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
|
83 |
+
self.internlm_model.config.hidden_size)
|
84 |
print('Done')
|
85 |
|
86 |
self.vis_processor = transforms.Compose([
|
|
|
158 |
encoder_attention_mask=image_atts,
|
159 |
return_dict=True,
|
160 |
)
|
161 |
+
inputs_internlm = self.internlm_proj(query_output.last_hidden_state)
|
162 |
+
inputs_internlm = torch.cat([
|
163 |
+
self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
|
164 |
+
inputs_internlm,
|
165 |
+
self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
|
166 |
],
|
167 |
+
dim=1)
|
168 |
+
return inputs_internlm
|
169 |
|
170 |
def encode_text(self, text, add_special_tokens=False):
|
171 |
text_token_ids = self.tokenizer(
|
|
|
173 |
return_tensors='pt',
|
174 |
add_special_tokens=add_special_tokens,
|
175 |
).input_ids.to(self.device)
|
176 |
+
text_embeds = self.internlm_model.model.embed_tokens(text_token_ids)
|
177 |
return text_embeds
|
178 |
|
179 |
def decode_text(self, out_embeds):
|
|
|
199 |
text_embeds = self.encode_text(text)
|
200 |
img_embeds = self.encode_img(image)
|
201 |
prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
|
202 |
+
out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
|
203 |
+
**self.get_gen_args(**kwargs))
|
204 |
out_text = self.decode_text(out_embeds)
|
205 |
return out_text
|
206 |
|
|
|
210 |
prompt_embeds = self.wrap_prompt(text_embeds,
|
211 |
img_embeds,
|
212 |
history=history)
|
213 |
+
out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
|
214 |
+
**self.get_gen_args(**kwargs))
|
215 |
out_text = self.decode_text(out_embeds)
|
216 |
|
217 |
# trunc at eoh and eoa
|
218 |
clean_out_text_token_ids = self.tokenizer(
|
219 |
out_text, return_tensors='pt').input_ids.to(self.device)
|
220 |
+
clean_out_text_embeds = self.internlm_model.model.embed_tokens(
|
221 |
clean_out_text_token_ids)
|
222 |
clean_prompt_embeds = self.wrap_prompt(text_embeds,
|
223 |
img_embeds,
|
pytorch_model-00001-of-00004.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a60c0a6a091960cab42833a3df2e196649fa874f1afa528eb43e483402f20650
|
3 |
+
size 4943795512
|
pytorch_model-00002-of-00004.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37f259132ac2bbd54a29a5ebf18c8ade362ae2bc0533b20994e9a3c2177eceee
|
3 |
+
size 4977697573
|
pytorch_model-00003-of-00004.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05b9d20f4b00902b0210573bdd4ac62c1e45725095157ad1fe17cbc5f08f76c2
|
3 |
+
size 4977697701
|
pytorch_model-00004-of-00004.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7aff93bbc9a98d49c07453ee17efdc07018284281e25f1e33f1179177cb783f
|
3 |
+
size 3678530786
|
pytorch_model.bin.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|