Upload processor
Browse files- image_processing_spice_cnn.py +128 -2
- preprocessor_config.json +2 -0
image_processing_spice_cnn.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from typing import Dict, List, Optional, Union
|
2 |
|
3 |
import numpy as np
|
4 |
|
@@ -9,6 +9,7 @@ from transformers.image_processing_utils import (
|
|
9 |
)
|
10 |
from transformers.image_transforms import (
|
11 |
normalize,
|
|
|
12 |
rescale,
|
13 |
resize,
|
14 |
to_channel_dimension_format,
|
@@ -19,11 +20,23 @@ from transformers.image_utils import (
|
|
19 |
ChannelDimension,
|
20 |
ImageInput,
|
21 |
PILImageResampling,
|
|
|
22 |
make_list_of_images,
|
23 |
to_numpy_array,
|
24 |
valid_images,
|
25 |
)
|
26 |
-
from transformers.utils import TensorType
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
class SpiceCNNImageProcessor(BaseImageProcessor):
|
@@ -67,6 +80,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
67 |
do_normalize: bool = True,
|
68 |
image_mean: Optional[Union[float, List[float]]] = None,
|
69 |
image_std: Optional[Union[float, List[float]]] = None,
|
|
|
|
|
70 |
**kwargs,
|
71 |
) -> None:
|
72 |
super().__init__(**kwargs)
|
@@ -75,6 +90,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
75 |
self.do_resize = do_resize
|
76 |
self.do_rescale = do_rescale
|
77 |
self.do_normalize = do_normalize
|
|
|
78 |
self.size = size
|
79 |
self.resample = resample
|
80 |
self.rescale_factor = rescale_factor
|
@@ -82,6 +98,110 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
82 |
image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
83 |
)
|
84 |
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
def resize(
|
87 |
self,
|
@@ -189,6 +309,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
189 |
do_normalize: Optional[bool] = None,
|
190 |
image_mean: Optional[Union[float, List[float]]] = None,
|
191 |
image_std: Optional[Union[float, List[float]]] = None,
|
|
|
|
|
192 |
return_tensors: Optional[Union[str, TensorType]] = None,
|
193 |
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
194 |
**kwargs,
|
@@ -239,6 +361,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
239 |
)
|
240 |
image_mean = image_mean if image_mean is not None else self.image_mean
|
241 |
image_std = image_std if image_std is not None else self.image_std
|
|
|
242 |
|
243 |
size = size if size is not None else self.size
|
244 |
size_dict = get_size_dict(size)
|
@@ -277,6 +400,9 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
277 |
for image in images
|
278 |
]
|
279 |
|
|
|
|
|
|
|
280 |
images = [to_channel_dimension_format(image, data_format) for image in images]
|
281 |
|
282 |
data = {"pixel_values": images}
|
|
|
1 |
+
from typing import Dict, List, Optional, Union, Tuple, Iterable
|
2 |
|
3 |
import numpy as np
|
4 |
|
|
|
9 |
)
|
10 |
from transformers.image_transforms import (
|
11 |
normalize,
|
12 |
+
pad,
|
13 |
rescale,
|
14 |
resize,
|
15 |
to_channel_dimension_format,
|
|
|
20 |
ChannelDimension,
|
21 |
ImageInput,
|
22 |
PILImageResampling,
|
23 |
+
infer_channel_dimension_format,
|
24 |
make_list_of_images,
|
25 |
to_numpy_array,
|
26 |
valid_images,
|
27 |
)
|
28 |
+
from transformers.utils import ExplicitEnum, TensorType
|
29 |
+
|
30 |
+
|
31 |
+
class PaddingMode(ExplicitEnum):
|
32 |
+
"""
|
33 |
+
Enum class for the different padding modes to use when padding images.
|
34 |
+
"""
|
35 |
+
|
36 |
+
CONSTANT = "constant"
|
37 |
+
REFLECT = "reflect"
|
38 |
+
REPLICATE = "replicate"
|
39 |
+
SYMMETRIC = "symmetric"
|
40 |
|
41 |
|
42 |
class SpiceCNNImageProcessor(BaseImageProcessor):
|
|
|
80 |
do_normalize: bool = True,
|
81 |
image_mean: Optional[Union[float, List[float]]] = None,
|
82 |
image_std: Optional[Union[float, List[float]]] = None,
|
83 |
+
do_padding: bool = False,
|
84 |
+
padding: int = 0,
|
85 |
**kwargs,
|
86 |
) -> None:
|
87 |
super().__init__(**kwargs)
|
|
|
90 |
self.do_resize = do_resize
|
91 |
self.do_rescale = do_rescale
|
92 |
self.do_normalize = do_normalize
|
93 |
+
self.do_padding = do_padding
|
94 |
self.size = size
|
95 |
self.resample = resample
|
96 |
self.rescale_factor = rescale_factor
|
|
|
98 |
image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
99 |
)
|
100 |
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
101 |
+
self.padding = padding
|
102 |
+
|
103 |
+
def pad(
|
104 |
+
image: np.ndarray,
|
105 |
+
padding: Union[int, Tuple[int, int], Iterable[Tuple[int, int]]],
|
106 |
+
mode: PaddingMode = PaddingMode.CONSTANT,
|
107 |
+
constant_values: Union[float, Iterable[float]] = 0.0,
|
108 |
+
data_format: Optional[Union[str, ChannelDimension]] = None,
|
109 |
+
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
110 |
+
) -> np.ndarray:
|
111 |
+
"""
|
112 |
+
Pads the `image` with the specified (height, width) `padding` and `mode`.
|
113 |
+
|
114 |
+
Args:
|
115 |
+
image (`np.ndarray`):
|
116 |
+
The image to pad.
|
117 |
+
padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
|
118 |
+
Padding to apply to the edges of the height, width axes. Can be one of three formats:
|
119 |
+
- `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
|
120 |
+
- `((before, after),)` yields same before and after pad for height and width.
|
121 |
+
- `(pad,)` or int is a shortcut for before = after = pad width for all axes.
|
122 |
+
mode (`PaddingMode`):
|
123 |
+
The padding mode to use. Can be one of:
|
124 |
+
- `"constant"`: pads with a constant value.
|
125 |
+
- `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
|
126 |
+
vector along each axis.
|
127 |
+
- `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
|
128 |
+
- `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
|
129 |
+
constant_values (`float` or `Iterable[float]`, *optional*):
|
130 |
+
The value to use for the padding if `mode` is `"constant"`.
|
131 |
+
data_format (`str` or `ChannelDimension`, *optional*):
|
132 |
+
The channel dimension format for the output image. Can be one of:
|
133 |
+
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
134 |
+
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
135 |
+
If unset, will use same as the input image.
|
136 |
+
input_data_format (`str` or `ChannelDimension`, *optional*):
|
137 |
+
The channel dimension format for the input image. Can be one of:
|
138 |
+
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
139 |
+
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
140 |
+
If unset, will use the inferred format of the input image.
|
141 |
+
|
142 |
+
Returns:
|
143 |
+
`np.ndarray`: The padded image.
|
144 |
+
|
145 |
+
"""
|
146 |
+
if input_data_format is None:
|
147 |
+
input_data_format = infer_channel_dimension_format(image)
|
148 |
+
|
149 |
+
def _expand_for_data_format(values):
|
150 |
+
"""
|
151 |
+
Convert values to be in the format expected by np.pad based on the data format.
|
152 |
+
"""
|
153 |
+
if isinstance(values, (int, float)):
|
154 |
+
values = ((values, values), (values, values))
|
155 |
+
elif isinstance(values, tuple) and len(values) == 1:
|
156 |
+
values = ((values[0], values[0]), (values[0], values[0]))
|
157 |
+
elif (
|
158 |
+
isinstance(values, tuple)
|
159 |
+
and len(values) == 2
|
160 |
+
and isinstance(values[0], int)
|
161 |
+
):
|
162 |
+
values = (values, values)
|
163 |
+
elif (
|
164 |
+
isinstance(values, tuple)
|
165 |
+
and len(values) == 2
|
166 |
+
and isinstance(values[0], tuple)
|
167 |
+
):
|
168 |
+
values = values
|
169 |
+
else:
|
170 |
+
raise ValueError(f"Unsupported format: {values}")
|
171 |
+
|
172 |
+
# add 0 for channel dimension
|
173 |
+
values = (
|
174 |
+
((0, 0), *values)
|
175 |
+
if input_data_format == ChannelDimension.FIRST
|
176 |
+
else (*values, (0, 0))
|
177 |
+
)
|
178 |
+
|
179 |
+
# Add additional padding if there's a batch dimension
|
180 |
+
values = (0, *values) if image.ndim == 4 else values
|
181 |
+
return values
|
182 |
+
|
183 |
+
padding = _expand_for_data_format(padding)
|
184 |
+
|
185 |
+
if mode == PaddingMode.CONSTANT:
|
186 |
+
constant_values = _expand_for_data_format(constant_values)
|
187 |
+
image = np.pad(
|
188 |
+
image, padding, mode="constant", constant_values=constant_values
|
189 |
+
)
|
190 |
+
elif mode == PaddingMode.REFLECT:
|
191 |
+
image = np.pad(image, padding, mode="reflect")
|
192 |
+
elif mode == PaddingMode.REPLICATE:
|
193 |
+
image = np.pad(image, padding, mode="edge")
|
194 |
+
elif mode == PaddingMode.SYMMETRIC:
|
195 |
+
image = np.pad(image, padding, mode="symmetric")
|
196 |
+
else:
|
197 |
+
raise ValueError(f"Invalid padding mode: {mode}")
|
198 |
+
|
199 |
+
image = (
|
200 |
+
to_channel_dimension_format(image, data_format)
|
201 |
+
if data_format is not None
|
202 |
+
else image
|
203 |
+
)
|
204 |
+
return image
|
205 |
|
206 |
def resize(
|
207 |
self,
|
|
|
309 |
do_normalize: Optional[bool] = None,
|
310 |
image_mean: Optional[Union[float, List[float]]] = None,
|
311 |
image_std: Optional[Union[float, List[float]]] = None,
|
312 |
+
do_padding: Optional[bool] = None,
|
313 |
+
padding: Optional[int] = None,
|
314 |
return_tensors: Optional[Union[str, TensorType]] = None,
|
315 |
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
316 |
**kwargs,
|
|
|
361 |
)
|
362 |
image_mean = image_mean if image_mean is not None else self.image_mean
|
363 |
image_std = image_std if image_std is not None else self.image_std
|
364 |
+
padding = padding if padding is not None else self.padding
|
365 |
|
366 |
size = size if size is not None else self.size
|
367 |
size_dict = get_size_dict(size)
|
|
|
400 |
for image in images
|
401 |
]
|
402 |
|
403 |
+
if do_padding:
|
404 |
+
images = [self.pad(image=image, padding=padding) for image in images]
|
405 |
+
|
406 |
images = [to_channel_dimension_format(image, data_format) for image in images]
|
407 |
|
408 |
data = {"pixel_values": images}
|
preprocessor_config.json
CHANGED
@@ -3,11 +3,13 @@
|
|
3 |
"AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
|
4 |
},
|
5 |
"do_normalize": false,
|
|
|
6 |
"do_rescale": false,
|
7 |
"do_resize": true,
|
8 |
"image_mean": 0.5,
|
9 |
"image_processor_type": "SpiceCNNImageProcessor",
|
10 |
"image_std": 0.5,
|
|
|
11 |
"resample": 2,
|
12 |
"rescale_factor": 0.00392156862745098,
|
13 |
"size": {
|
|
|
3 |
"AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
|
4 |
},
|
5 |
"do_normalize": false,
|
6 |
+
"do_padding": false,
|
7 |
"do_rescale": false,
|
8 |
"do_resize": true,
|
9 |
"image_mean": 0.5,
|
10 |
"image_processor_type": "SpiceCNNImageProcessor",
|
11 |
"image_std": 0.5,
|
12 |
+
"padding": 0,
|
13 |
"resample": 2,
|
14 |
"rescale_factor": 0.00392156862745098,
|
15 |
"size": {
|