bingwork commited on
Commit
56243ad
1 Parent(s): 82a5b0d

Upload 2 files

Browse files
Files changed (2) hide show
  1. mm_utils.py +0 -31
  2. mmalaya_arch.py +44 -1
mm_utils.py CHANGED
@@ -21,15 +21,6 @@ DEFAULT_IM_START_TOKEN = "<im_start>"
21
  DEFAULT_IM_END_TOKEN = "<im_end>"
22
 
23
 
24
- def disable_torch_init():
25
- """
26
- Disable the redundant torch default initialization to accelerate model creation.
27
- """
28
- import torch
29
- setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
30
- setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
31
-
32
-
33
  def load_image_from_base64(image):
34
  return Image.open(BytesIO(base64.b64decode(image)))
35
 
@@ -63,28 +54,6 @@ def process_images(images, image_processor, model_cfg):
63
  return new_images
64
 
65
 
66
- def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None):
67
- prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
68
-
69
- def insert_separator(X, sep):
70
- return [ele for sublist in zip(X, [sep]*len(X)) for ele in sublist][:-1]
71
-
72
- input_ids = []
73
- offset = 0
74
- if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
75
- offset = 1
76
- input_ids.append(prompt_chunks[0][0])
77
-
78
- for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
79
- input_ids.extend(x[offset:])
80
-
81
- if return_tensors is not None:
82
- if return_tensors == 'pt':
83
- return torch.tensor(input_ids, dtype=torch.long)
84
- raise ValueError(f'Unsupported tensor type: {return_tensors}')
85
- return input_ids
86
-
87
-
88
  def get_model_name_from_path(model_path):
89
  model_path = model_path.strip("/")
90
  model_paths = model_path.split("/")
 
21
  DEFAULT_IM_END_TOKEN = "<im_end>"
22
 
23
 
 
 
 
 
 
 
 
 
 
24
  def load_image_from_base64(image):
25
  return Image.open(BytesIO(base64.b64decode(image)))
26
 
 
54
  return new_images
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def get_model_name_from_path(model_path):
58
  model_path = model_path.strip("/")
59
  model_paths = model_path.split("/")
mmalaya_arch.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import torch.nn as nn
5
  from transformers import Blip2Model, Blip2Processor, Blip2Config
6
  from .mm_utils import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
 
7
 
8
 
9
  class BLIP2VisionTower(nn.Module):
@@ -265,6 +266,48 @@ class MMAlayaMetaForCausalLM(ABC):
265
 
266
  return None, position_ids, attention_mask, past_key_values, new_input_embeds, new_labels
267
 
268
- def initialize_vision_tokenizer(self, tokenizer):
269
  tokenizer.add_tokens([DEFAULT_IMAGE_TOKEN], special_tokens=True)
270
  self.resize_token_embeddings(len(tokenizer))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import torch.nn as nn
5
  from transformers import Blip2Model, Blip2Processor, Blip2Config
6
  from .mm_utils import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
7
+ from .mm_utils import conv_templates
8
 
9
 
10
  class BLIP2VisionTower(nn.Module):
 
266
 
267
  return None, position_ids, attention_mask, past_key_values, new_input_embeds, new_labels
268
 
269
+ def initialize_tokenizer(self, tokenizer):
270
  tokenizer.add_tokens([DEFAULT_IMAGE_TOKEN], special_tokens=True)
271
  self.resize_token_embeddings(len(tokenizer))
272
+
273
+ def prepare_for_inference(
274
+ self,
275
+ prompt,
276
+ tokenizer,
277
+ image,
278
+ image_token_index=IMAGE_TOKEN_INDEX,
279
+ return_tensors=None
280
+ ):
281
+ # 加载对话模板
282
+ conv = conv_templates["mmalaya_llama"].copy()
283
+ inp = DEFAULT_IMAGE_TOKEN + '\n' + prompt
284
+ conv.append_message(conv.roles[0], inp)
285
+ conv.append_message(conv.roles[1], None)
286
+ prompt = conv.get_prompt()
287
+
288
+ prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
289
+
290
+ def insert_separator(X, sep):
291
+ return [ele for sublist in zip(X, [sep]*len(X)) for ele in sublist][:-1]
292
+
293
+ input_ids = []
294
+ offset = 0
295
+ if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
296
+ offset = 1
297
+ input_ids.append(prompt_chunks[0][0])
298
+
299
+ for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
300
+ input_ids.extend(x[offset:])
301
+
302
+ if return_tensors is not None:
303
+ if return_tensors == 'pt':
304
+ return torch.tensor(input_ids, dtype=torch.long)
305
+ raise ValueError(f'Unsupported tensor type: {return_tensors}')
306
+
307
+ # 加载generate stop条件
308
+ stopping_criteria = KeywordsStoppingCriteria([conv.sep2], tokenizer, input_ids)
309
+ # 加载图像
310
+ image_processor = model.get_vision_tower().image_processor
311
+ image_tensor = image_processor(image, return_tensors='pt')['pixel_values'].half().cuda()
312
+
313
+ return input_ids, image_tensor, stopping_criteria