rhendz commited on
Commit
e54fec1
1 Parent(s): bfa394a

Upload processor

Browse files
image_processing_spice_cnn.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional, Union
2
 
3
  import numpy as np
4
 
@@ -9,6 +9,7 @@ from transformers.image_processing_utils import (
9
  )
10
  from transformers.image_transforms import (
11
  normalize,
 
12
  rescale,
13
  resize,
14
  to_channel_dimension_format,
@@ -19,11 +20,23 @@ from transformers.image_utils import (
19
  ChannelDimension,
20
  ImageInput,
21
  PILImageResampling,
 
22
  make_list_of_images,
23
  to_numpy_array,
24
  valid_images,
25
  )
26
- from transformers.utils import TensorType
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  class SpiceCNNImageProcessor(BaseImageProcessor):
@@ -67,6 +80,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
67
  do_normalize: bool = True,
68
  image_mean: Optional[Union[float, List[float]]] = None,
69
  image_std: Optional[Union[float, List[float]]] = None,
 
 
70
  **kwargs,
71
  ) -> None:
72
  super().__init__(**kwargs)
@@ -75,6 +90,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
75
  self.do_resize = do_resize
76
  self.do_rescale = do_rescale
77
  self.do_normalize = do_normalize
 
78
  self.size = size
79
  self.resample = resample
80
  self.rescale_factor = rescale_factor
@@ -82,6 +98,110 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
82
  image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
83
  )
84
  self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  def resize(
87
  self,
@@ -189,6 +309,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
189
  do_normalize: Optional[bool] = None,
190
  image_mean: Optional[Union[float, List[float]]] = None,
191
  image_std: Optional[Union[float, List[float]]] = None,
 
 
192
  return_tensors: Optional[Union[str, TensorType]] = None,
193
  data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
194
  **kwargs,
@@ -239,6 +361,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
239
  )
240
  image_mean = image_mean if image_mean is not None else self.image_mean
241
  image_std = image_std if image_std is not None else self.image_std
 
242
 
243
  size = size if size is not None else self.size
244
  size_dict = get_size_dict(size)
@@ -277,6 +400,9 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
277
  for image in images
278
  ]
279
 
 
 
 
280
  images = [to_channel_dimension_format(image, data_format) for image in images]
281
 
282
  data = {"pixel_values": images}
 
1
+ from typing import Dict, List, Optional, Union, Tuple, Iterable
2
 
3
  import numpy as np
4
 
 
9
  )
10
  from transformers.image_transforms import (
11
  normalize,
12
+ pad,
13
  rescale,
14
  resize,
15
  to_channel_dimension_format,
 
20
  ChannelDimension,
21
  ImageInput,
22
  PILImageResampling,
23
+ infer_channel_dimension_format,
24
  make_list_of_images,
25
  to_numpy_array,
26
  valid_images,
27
  )
28
+ from transformers.utils import ExplicitEnum, TensorType
29
+
30
+
31
+ class PaddingMode(ExplicitEnum):
32
+ """
33
+ Enum class for the different padding modes to use when padding images.
34
+ """
35
+
36
+ CONSTANT = "constant"
37
+ REFLECT = "reflect"
38
+ REPLICATE = "replicate"
39
+ SYMMETRIC = "symmetric"
40
 
41
 
42
  class SpiceCNNImageProcessor(BaseImageProcessor):
 
80
  do_normalize: bool = True,
81
  image_mean: Optional[Union[float, List[float]]] = None,
82
  image_std: Optional[Union[float, List[float]]] = None,
83
+ do_padding: bool = False,
84
+ padding: int = 0,
85
  **kwargs,
86
  ) -> None:
87
  super().__init__(**kwargs)
 
90
  self.do_resize = do_resize
91
  self.do_rescale = do_rescale
92
  self.do_normalize = do_normalize
93
+ self.do_padding = do_padding
94
  self.size = size
95
  self.resample = resample
96
  self.rescale_factor = rescale_factor
 
98
  image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
99
  )
100
  self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
101
+ self.padding = padding
102
+
103
+ def pad(
104
+ image: np.ndarray,
105
+ padding: Union[int, Tuple[int, int], Iterable[Tuple[int, int]]],
106
+ mode: PaddingMode = PaddingMode.CONSTANT,
107
+ constant_values: Union[float, Iterable[float]] = 0.0,
108
+ data_format: Optional[Union[str, ChannelDimension]] = None,
109
+ input_data_format: Optional[Union[str, ChannelDimension]] = None,
110
+ ) -> np.ndarray:
111
+ """
112
+ Pads the `image` with the specified (height, width) `padding` and `mode`.
113
+
114
+ Args:
115
+ image (`np.ndarray`):
116
+ The image to pad.
117
+ padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
118
+ Padding to apply to the edges of the height, width axes. Can be one of three formats:
119
+ - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
120
+ - `((before, after),)` yields same before and after pad for height and width.
121
+ - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
122
+ mode (`PaddingMode`):
123
+ The padding mode to use. Can be one of:
124
+ - `"constant"`: pads with a constant value.
125
+ - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
126
+ vector along each axis.
127
+ - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
128
+ - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
129
+ constant_values (`float` or `Iterable[float]`, *optional*):
130
+ The value to use for the padding if `mode` is `"constant"`.
131
+ data_format (`str` or `ChannelDimension`, *optional*):
132
+ The channel dimension format for the output image. Can be one of:
133
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
134
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
135
+ If unset, will use same as the input image.
136
+ input_data_format (`str` or `ChannelDimension`, *optional*):
137
+ The channel dimension format for the input image. Can be one of:
138
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
139
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
140
+ If unset, will use the inferred format of the input image.
141
+
142
+ Returns:
143
+ `np.ndarray`: The padded image.
144
+
145
+ """
146
+ if input_data_format is None:
147
+ input_data_format = infer_channel_dimension_format(image)
148
+
149
+ def _expand_for_data_format(values):
150
+ """
151
+ Convert values to be in the format expected by np.pad based on the data format.
152
+ """
153
+ if isinstance(values, (int, float)):
154
+ values = ((values, values), (values, values))
155
+ elif isinstance(values, tuple) and len(values) == 1:
156
+ values = ((values[0], values[0]), (values[0], values[0]))
157
+ elif (
158
+ isinstance(values, tuple)
159
+ and len(values) == 2
160
+ and isinstance(values[0], int)
161
+ ):
162
+ values = (values, values)
163
+ elif (
164
+ isinstance(values, tuple)
165
+ and len(values) == 2
166
+ and isinstance(values[0], tuple)
167
+ ):
168
+ values = values
169
+ else:
170
+ raise ValueError(f"Unsupported format: {values}")
171
+
172
+ # add 0 for channel dimension
173
+ values = (
174
+ ((0, 0), *values)
175
+ if input_data_format == ChannelDimension.FIRST
176
+ else (*values, (0, 0))
177
+ )
178
+
179
+ # Add additional padding if there's a batch dimension
180
+ values = (0, *values) if image.ndim == 4 else values
181
+ return values
182
+
183
+ padding = _expand_for_data_format(padding)
184
+
185
+ if mode == PaddingMode.CONSTANT:
186
+ constant_values = _expand_for_data_format(constant_values)
187
+ image = np.pad(
188
+ image, padding, mode="constant", constant_values=constant_values
189
+ )
190
+ elif mode == PaddingMode.REFLECT:
191
+ image = np.pad(image, padding, mode="reflect")
192
+ elif mode == PaddingMode.REPLICATE:
193
+ image = np.pad(image, padding, mode="edge")
194
+ elif mode == PaddingMode.SYMMETRIC:
195
+ image = np.pad(image, padding, mode="symmetric")
196
+ else:
197
+ raise ValueError(f"Invalid padding mode: {mode}")
198
+
199
+ image = (
200
+ to_channel_dimension_format(image, data_format)
201
+ if data_format is not None
202
+ else image
203
+ )
204
+ return image
205
 
206
  def resize(
207
  self,
 
309
  do_normalize: Optional[bool] = None,
310
  image_mean: Optional[Union[float, List[float]]] = None,
311
  image_std: Optional[Union[float, List[float]]] = None,
312
+ do_padding: Optional[bool] = None,
313
+ padding: Optional[int] = None,
314
  return_tensors: Optional[Union[str, TensorType]] = None,
315
  data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
316
  **kwargs,
 
361
  )
362
  image_mean = image_mean if image_mean is not None else self.image_mean
363
  image_std = image_std if image_std is not None else self.image_std
364
+ padding = padding if padding is not None else self.padding
365
 
366
  size = size if size is not None else self.size
367
  size_dict = get_size_dict(size)
 
400
  for image in images
401
  ]
402
 
403
+ if do_padding:
404
+ images = [self.pad(image=image, padding=padding) for image in images]
405
+
406
  images = [to_channel_dimension_format(image, data_format) for image in images]
407
 
408
  data = {"pixel_values": images}
preprocessor_config.json CHANGED
@@ -3,11 +3,13 @@
3
  "AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
4
  },
5
  "do_normalize": false,
 
6
  "do_rescale": false,
7
  "do_resize": true,
8
  "image_mean": 0.5,
9
  "image_processor_type": "SpiceCNNImageProcessor",
10
  "image_std": 0.5,
 
11
  "resample": 2,
12
  "rescale_factor": 0.00392156862745098,
13
  "size": {
 
3
  "AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
4
  },
5
  "do_normalize": false,
6
+ "do_padding": false,
7
  "do_rescale": false,
8
  "do_resize": true,
9
  "image_mean": 0.5,
10
  "image_processor_type": "SpiceCNNImageProcessor",
11
  "image_std": 0.5,
12
+ "padding": 0,
13
  "resample": 2,
14
  "rescale_factor": 0.00392156862745098,
15
  "size": {