wenhu commited on
Commit
fb30ad8
·
verified ·
1 Parent(s): e597f74

Update processing_phi3_v.py

Browse files
Files changed (1) hide show
  1. processing_phi3_v.py +17 -20
processing_phi3_v.py CHANGED
@@ -160,12 +160,12 @@ class Phi3VImageProcessor(BaseImageProcessor):
160
  model_input_names = ["pixel_values"]
161
 
162
  def __init__(
163
- self,
164
- num_crops: int = 1,
165
- image_mean: Optional[Union[float, List[float]]] = None,
166
- image_std: Optional[Union[float, List[float]]] = None,
167
- do_convert_rgb: bool = True,
168
- **kwargs,
169
  ) -> None:
170
  super().__init__(**kwargs)
171
  self.num_crops = num_crops
@@ -174,8 +174,8 @@ class Phi3VImageProcessor(BaseImageProcessor):
174
  self.do_convert_rgb = do_convert_rgb
175
 
176
  def calc_num_image_tokens(
177
- self,
178
- images: ImageInput
179
  ):
180
  """ Calculate the number of image tokens for each image.
181
  Args:
@@ -210,12 +210,12 @@ class Phi3VImageProcessor(BaseImageProcessor):
210
  return num_img_tokens
211
 
212
  def preprocess(
213
- self,
214
- images: ImageInput,
215
- image_mean: Optional[Union[float, List[float]]] = None,
216
- image_std: Optional[Union[float, List[float]]] = None,
217
- do_convert_rgb: bool = None,
218
- return_tensors: Optional[Union[str, TensorType]] = None,
219
  ):
220
  """
221
  Args:
@@ -276,8 +276,7 @@ class Phi3VImageProcessor(BaseImageProcessor):
276
  # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
277
  # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
278
  hd_images_reshape = [
279
- im.reshape(1, 3, h // 336, 336, w // 336, 336).permute(0, 2, 4, 1, 3, 5).reshape(-1, 3, 336,
280
- 336).contiguous() for
281
  im, (h, w) in zip(hd_images, shapes)]
282
  # concat global image and local image
283
  hd_images_reshape = [torch.cat([_global_image] + [_im], dim=0) for _global_image, _im in
@@ -443,11 +442,9 @@ class Phi3VProcessor(ProcessorMixin):
443
  unique_image_ids = sorted(list(set(image_ids)))
444
  # image_ids must start from 1, and must be continuous int, e.g. [1, 2, 3], cannot be [1, 4, 5]
445
  # check the condition
446
- assert unique_image_ids == list(range(1,
447
- len(unique_image_ids) + 1)), f"image_ids must start from 1, and must be continuous int, e.g. [1, 2, 3], cannot be {unique_image_ids}"
448
  # total images must be the same as the number of image tags
449
- assert len(unique_image_ids) == len(
450
- images), f"total images must be the same as the number of image tags, got {len(unique_image_ids)} image tags and {len(images)} images"
451
 
452
  image_ids_pad = [[-iid] * num_img_tokens[iid - 1] for iid in image_ids]
453
 
 
160
  model_input_names = ["pixel_values"]
161
 
162
  def __init__(
163
+ self,
164
+ num_crops: int = 1,
165
+ image_mean: Optional[Union[float, List[float]]] = None,
166
+ image_std: Optional[Union[float, List[float]]] = None,
167
+ do_convert_rgb: bool = True,
168
+ **kwargs,
169
  ) -> None:
170
  super().__init__(**kwargs)
171
  self.num_crops = num_crops
 
174
  self.do_convert_rgb = do_convert_rgb
175
 
176
  def calc_num_image_tokens(
177
+ self,
178
+ images: ImageInput
179
  ):
180
  """ Calculate the number of image tokens for each image.
181
  Args:
 
210
  return num_img_tokens
211
 
212
  def preprocess(
213
+ self,
214
+ images: ImageInput,
215
+ image_mean: Optional[Union[float, List[float]]] = None,
216
+ image_std: Optional[Union[float, List[float]]] = None,
217
+ do_convert_rgb: bool = None,
218
+ return_tensors: Optional[Union[str, TensorType]] = None,
219
  ):
220
  """
221
  Args:
 
276
  # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
277
  # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
278
  hd_images_reshape = [
279
+ im.reshape(1, 3, h // 336, 336, w // 336, 336).permute(0, 2, 4, 1, 3, 5).reshape(-1, 3, 336, 336).contiguous() for
 
280
  im, (h, w) in zip(hd_images, shapes)]
281
  # concat global image and local image
282
  hd_images_reshape = [torch.cat([_global_image] + [_im], dim=0) for _global_image, _im in
 
442
  unique_image_ids = sorted(list(set(image_ids)))
443
  # image_ids must start from 1, and must be continuous int, e.g. [1, 2, 3], cannot be [1, 4, 5]
444
  # check the condition
445
+ assert unique_image_ids == list(range(1, len(unique_image_ids) + 1)), f"image_ids must start from 1, and must be continuous int, e.g. [1, 2, 3], cannot be {unique_image_ids}"
 
446
  # total images must be the same as the number of image tags
447
+ assert len(unique_image_ids) == len(images), f"total images must be the same as the number of image tags, got {len(unique_image_ids)} image tags and {len(images)} images"
 
448
 
449
  image_ids_pad = [[-iid] * num_img_tokens[iid - 1] for iid in image_ids]
450