andrewqian123 commited on
Commit
b0d30e3
·
verified ·
1 Parent(s): 4ca6160

Update modeling_minicpmv.py

Browse files
Files changed (1) hide show
  1. modeling_minicpmv.py +12 -12
modeling_minicpmv.py CHANGED
@@ -102,7 +102,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
102
  padding_value=0.0)
103
  B, L, _ = all_pixel_values.shape
104
  all_pixel_values = all_pixel_values.permute(0, 2, 1).reshape(B, 3, -1, L)
105
- print(B, "BATCH")
106
  patch_attn_mask = torch.zeros((B, 1, max_patches), dtype=torch.bool, device=device)
107
  for i in range(B):
108
  patch_attn_mask[i, :tgt_sizes[i][0] * tgt_sizes[i][1]] = True
@@ -111,7 +111,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
111
  vision_embedding = self.resampler(vision_embedding, tgt_sizes)
112
  else:
113
  # get vision_embedding foreach
114
- print("HERE, NOT BATCH")
115
  vision_embedding = []
116
  for single_tgt_size, single_pixel_values in zip(tgt_sizes, all_pixel_values):
117
  single_pixel_values = single_pixel_values.unsqueeze(0)
@@ -170,7 +170,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
170
  elif self.training:
171
  cur_vllm_emb += cur_vs_hs[0].mean() * 0
172
 
173
- print(vllm_embedding.shape)
174
  return vllm_embedding, vision_hidden_states
175
 
176
  def forward(self, data, **kwargs):
@@ -194,7 +194,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
194
  result = result[1:]
195
  if result[-1] == tokenizer.eos_id or result[-1] == tokenizer.eot_id:
196
  result = result[:-1]
197
- print(result)
198
  result_text.append(tokenizer.decode(result).strip())
199
  return result_text
200
 
@@ -279,7 +279,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
279
  input_embeds,
280
  vision_hidden_states_dummy,
281
  ) = self.get_vllm_embedding(model_inputs)
282
- print(input_embeds.shape, f"INPUT_EMBEDS {counter}")
283
  counter += 1
284
  batch.append(input_embeds)
285
 
@@ -309,7 +309,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
309
  # Concatenate the padding tensor to the left of the original tensor
310
  tensor = torch.cat((padding_tensor, tensor), dim=1)
311
 
312
- print(tensor.shape, "UPDATED_SHAPE")
313
 
314
  # Update the batch with the padded tensor
315
  batch[place] = tensor
@@ -318,17 +318,17 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
318
  attention_mask.append(to_add)
319
 
320
  attention_mask = torch.tensor(attention_mask)
321
- print(attention_mask.shape)
322
- print(attention_mask, "ATTENTION")
323
  # attention_mask = attention_mask.to(self.device)
324
  # padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
325
 
326
  # Step 3: Stack the padded tensors into a single batch
327
- for stuff in batch:
328
- print(stuff.shape, "SHAPE")
329
  batch = torch.cat(batch, dim=0)
330
- print(batch.shape)
331
- print(batch)
332
  # output_ids = self._decode(input_embeds, tokenizer, **kwargs)
333
  if stream:
334
  kwargs.pop("decode_text")
 
102
  padding_value=0.0)
103
  B, L, _ = all_pixel_values.shape
104
  all_pixel_values = all_pixel_values.permute(0, 2, 1).reshape(B, 3, -1, L)
105
+ # print(B, "BATCH")
106
  patch_attn_mask = torch.zeros((B, 1, max_patches), dtype=torch.bool, device=device)
107
  for i in range(B):
108
  patch_attn_mask[i, :tgt_sizes[i][0] * tgt_sizes[i][1]] = True
 
111
  vision_embedding = self.resampler(vision_embedding, tgt_sizes)
112
  else:
113
  # get vision_embedding foreach
114
+ # print("HERE, NOT BATCH")
115
  vision_embedding = []
116
  for single_tgt_size, single_pixel_values in zip(tgt_sizes, all_pixel_values):
117
  single_pixel_values = single_pixel_values.unsqueeze(0)
 
170
  elif self.training:
171
  cur_vllm_emb += cur_vs_hs[0].mean() * 0
172
 
173
+ # print(vllm_embedding.shape)
174
  return vllm_embedding, vision_hidden_states
175
 
176
  def forward(self, data, **kwargs):
 
194
  result = result[1:]
195
  if result[-1] == tokenizer.eos_id or result[-1] == tokenizer.eot_id:
196
  result = result[:-1]
197
+ # print(result)
198
  result_text.append(tokenizer.decode(result).strip())
199
  return result_text
200
 
 
279
  input_embeds,
280
  vision_hidden_states_dummy,
281
  ) = self.get_vllm_embedding(model_inputs)
282
+ # print(input_embeds.shape, f"INPUT_EMBEDS {counter}")
283
  counter += 1
284
  batch.append(input_embeds)
285
 
 
309
  # Concatenate the padding tensor to the left of the original tensor
310
  tensor = torch.cat((padding_tensor, tensor), dim=1)
311
 
312
+ # print(tensor.shape, "UPDATED_SHAPE")
313
 
314
  # Update the batch with the padded tensor
315
  batch[place] = tensor
 
318
  attention_mask.append(to_add)
319
 
320
  attention_mask = torch.tensor(attention_mask)
321
+ # print(attention_mask.shape)
322
+ # print(attention_mask, "ATTENTION")
323
  # attention_mask = attention_mask.to(self.device)
324
  # padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
325
 
326
  # Step 3: Stack the padded tensors into a single batch
327
+ # for stuff in batch:
328
+ # print(stuff.shape, "SHAPE")
329
  batch = torch.cat(batch, dim=0)
330
+ # print(batch.shape)
331
+ # print(batch)
332
  # output_ids = self._decode(input_embeds, tokenizer, **kwargs)
333
  if stream:
334
  kwargs.pop("decode_text")