Upload processor

Browse files

Files changed (2) hide show

image_processing_spice_cnn.py +128 -2
preprocessor_config.json +2 -0

image_processing_spice_cnn.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional, Union
 import numpy as np
@@ -9,6 +9,7 @@ from transformers.image_processing_utils import (
 )
 from transformers.image_transforms import (
     normalize,
     rescale,
     resize,
     to_channel_dimension_format,
@@ -19,11 +20,23 @@ from transformers.image_utils import (
     ChannelDimension,
     ImageInput,
     PILImageResampling,
     make_list_of_images,
     to_numpy_array,
     valid_images,
 )
-from transformers.utils import TensorType
 class SpiceCNNImageProcessor(BaseImageProcessor):
@@ -67,6 +80,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
         do_normalize: bool = True,
         image_mean: Optional[Union[float, List[float]]] = None,
         image_std: Optional[Union[float, List[float]]] = None,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
@@ -75,6 +90,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
         self.do_resize = do_resize
         self.do_rescale = do_rescale
         self.do_normalize = do_normalize
         self.size = size
         self.resample = resample
         self.rescale_factor = rescale_factor
@@ -82,6 +98,110 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
             image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
         )
         self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
     def resize(
         self,
@@ -189,6 +309,8 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
         do_normalize: Optional[bool] = None,
         image_mean: Optional[Union[float, List[float]]] = None,
         image_std: Optional[Union[float, List[float]]] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
         data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
         **kwargs,
@@ -239,6 +361,7 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
         )
         image_mean = image_mean if image_mean is not None else self.image_mean
         image_std = image_std if image_std is not None else self.image_std
         size = size if size is not None else self.size
         size_dict = get_size_dict(size)
@@ -277,6 +400,9 @@ class SpiceCNNImageProcessor(BaseImageProcessor):
                 for image in images
             ]
         images = [to_channel_dimension_format(image, data_format) for image in images]
         data = {"pixel_values": images}

+from typing import Dict, List, Optional, Union, Tuple, Iterable
 import numpy as np
 )
 from transformers.image_transforms import (
     normalize,
+    pad,
     rescale,
     resize,
     to_channel_dimension_format,
     ChannelDimension,
     ImageInput,
     PILImageResampling,
+    infer_channel_dimension_format,
     make_list_of_images,
     to_numpy_array,
     valid_images,
 )
+from transformers.utils import ExplicitEnum, TensorType
+class PaddingMode(ExplicitEnum):
+    """
+    Enum class for the different padding modes to use when padding images.
+    """
+    CONSTANT = "constant"
+    REFLECT = "reflect"
+    REPLICATE = "replicate"
+    SYMMETRIC = "symmetric"
 class SpiceCNNImageProcessor(BaseImageProcessor):
         do_normalize: bool = True,
         image_mean: Optional[Union[float, List[float]]] = None,
         image_std: Optional[Union[float, List[float]]] = None,
+        do_padding: bool = False,
+        padding: int = 0,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
         self.do_resize = do_resize
         self.do_rescale = do_rescale
         self.do_normalize = do_normalize
+        self.do_padding = do_padding
         self.size = size
         self.resample = resample
         self.rescale_factor = rescale_factor
             image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
         )
         self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
+        self.padding = padding
+    def pad(
+        image: np.ndarray,
+        padding: Union[int, Tuple[int, int], Iterable[Tuple[int, int]]],
+        mode: PaddingMode = PaddingMode.CONSTANT,
+        constant_values: Union[float, Iterable[float]] = 0.0,
+        data_format: Optional[Union[str, ChannelDimension]] = None,
+        input_data_format: Optional[Union[str, ChannelDimension]] = None,
+    ) -> np.ndarray:
+        """
+        Pads the `image` with the specified (height, width) `padding` and `mode`.
+        Args:
+            image (`np.ndarray`):
+                The image to pad.
+            padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
+                Padding to apply to the edges of the height, width axes. Can be one of three formats:
+                - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
+                - `((before, after),)` yields same before and after pad for height and width.
+                - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
+            mode (`PaddingMode`):
+                The padding mode to use. Can be one of:
+                    - `"constant"`: pads with a constant value.
+                    - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
+                    vector along each axis.
+                    - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
+                    - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
+            constant_values (`float` or `Iterable[float]`, *optional*):
+                The value to use for the padding if `mode` is `"constant"`.
+            data_format (`str` or `ChannelDimension`, *optional*):
+                The channel dimension format for the output image. Can be one of:
+                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
+                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
+                If unset, will use same as the input image.
+            input_data_format (`str` or `ChannelDimension`, *optional*):
+                The channel dimension format for the input image. Can be one of:
+                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
+                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
+                If unset, will use the inferred format of the input image.
+        Returns:
+            `np.ndarray`: The padded image.
+        """
+        if input_data_format is None:
+            input_data_format = infer_channel_dimension_format(image)
+        def _expand_for_data_format(values):
+            """
+            Convert values to be in the format expected by np.pad based on the data format.
+            """
+            if isinstance(values, (int, float)):
+                values = ((values, values), (values, values))
+            elif isinstance(values, tuple) and len(values) == 1:
+                values = ((values[0], values[0]), (values[0], values[0]))
+            elif (
+                isinstance(values, tuple)
+                and len(values) == 2
+                and isinstance(values[0], int)
+            ):
+                values = (values, values)
+            elif (
+                isinstance(values, tuple)
+                and len(values) == 2
+                and isinstance(values[0], tuple)
+            ):
+                values = values
+            else:
+                raise ValueError(f"Unsupported format: {values}")
+            # add 0 for channel dimension
+            values = (
+                ((0, 0), *values)
+                if input_data_format == ChannelDimension.FIRST
+                else (*values, (0, 0))
+            )
+            # Add additional padding if there's a batch dimension
+            values = (0, *values) if image.ndim == 4 else values
+            return values
+        padding = _expand_for_data_format(padding)
+        if mode == PaddingMode.CONSTANT:
+            constant_values = _expand_for_data_format(constant_values)
+            image = np.pad(
+                image, padding, mode="constant", constant_values=constant_values
+            )
+        elif mode == PaddingMode.REFLECT:
+            image = np.pad(image, padding, mode="reflect")
+        elif mode == PaddingMode.REPLICATE:
+            image = np.pad(image, padding, mode="edge")
+        elif mode == PaddingMode.SYMMETRIC:
+            image = np.pad(image, padding, mode="symmetric")
+        else:
+            raise ValueError(f"Invalid padding mode: {mode}")
+        image = (
+            to_channel_dimension_format(image, data_format)
+            if data_format is not None
+            else image
+        )
+        return image
     def resize(
         self,
         do_normalize: Optional[bool] = None,
         image_mean: Optional[Union[float, List[float]]] = None,
         image_std: Optional[Union[float, List[float]]] = None,
+        do_padding: Optional[bool] = None,
+        padding: Optional[int] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
         data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
         **kwargs,
         )
         image_mean = image_mean if image_mean is not None else self.image_mean
         image_std = image_std if image_std is not None else self.image_std
+        padding = padding if padding is not None else self.padding
         size = size if size is not None else self.size
         size_dict = get_size_dict(size)
                 for image in images
             ]
+        if do_padding:
+            images = [self.pad(image=image, padding=padding) for image in images]
         images = [to_channel_dimension_format(image, data_format) for image in images]
         data = {"pixel_values": images}

preprocessor_config.json CHANGED Viewed

@@ -3,11 +3,13 @@
     "AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
   },
   "do_normalize": false,
   "do_rescale": false,
   "do_resize": true,
   "image_mean": 0.5,
   "image_processor_type": "SpiceCNNImageProcessor",
   "image_std": 0.5,
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {

     "AutoImageProcessor": "image_processing_spice_cnn.SpiceCNNImageProcessor"
   },
   "do_normalize": false,
+  "do_padding": false,
   "do_rescale": false,
   "do_resize": true,
   "image_mean": 0.5,
   "image_processor_type": "SpiceCNNImageProcessor",
   "image_std": 0.5,
+  "padding": 0,
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {