Source code for transformers.models.vit.feature_extraction_vit

# coding=utf-8
# Copyright Google AI and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Feature extractor class for ViT."""

from typing import List, Optional, Union

import numpy as np
from PIL import Image

from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...file_utils import TensorType
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
from ...utils import logging


logger = logging.get_logger(__name__)


[docs]class ViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): r""" Constructs a ViT feature extractor. This feature extractor inherits from :class:`~transformers.FeatureExtractionMixin` which contains most of the main methods. Users should refer to this superclass for more information regarding those methods. Args: image_mean (:obj:`int`, defaults to :obj:`[0.5, 0.5, 0.5]`): The sequence of means for each channel, to be used when normalizing images. image_std (:obj:`int`, defaults to :obj:`[0.5, 0.5, 0.5]`): The sequence of standard deviations for each channel, to be used when normalizing images. do_normalize (:obj:`bool`, `optional`, defaults to :obj:`True`): Whether or not to normalize the input with mean and standard deviation. do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`): Whether to resize the input to a certain :obj:`size`. size (:obj:`int`, `optional`, defaults to 224): Resize the input to the given size. Only has an effect if :obj:`do_resize` is set to :obj:`True`. """ model_input_names = ["pixel_values"] def __init__(self, image_mean=None, image_std=None, do_normalize=True, do_resize=True, size=224, **kwargs): super().__init__(**kwargs) self.image_mean = [0.5, 0.5, 0.5] self.image_std = [0.5, 0.5, 0.5] self.do_normalize = do_normalize self.do_resize = do_resize self.size = size
[docs] def __call__( self, images: Union[ Image.Image, np.ndarray, "torch.Tensor", List[Image.Image], List[np.ndarray], List["torch.Tensor"] # noqa ], return_tensors: Optional[Union[str, TensorType]] = None, **kwargs ) -> BatchFeature: """ Main method to prepare for the model one or several image(s). .. warning:: NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass PIL images. Args: images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`): The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`): If set, will return tensors instead of list of python integers. Acceptable values are: * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects.s * :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects. Returns: :class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields: - **pixel_values** -- Pixel values to be fed to a model. """ # Input type checking for clearer error valid_images = False # Check that images has a valid type if isinstance(images, (Image.Image, np.ndarray)) or is_torch_tensor(images): valid_images = True elif isinstance(images, (list, tuple)): if len(images) == 0 or isinstance(images[0], (Image.Image, np.ndarray)) or is_torch_tensor(images[0]): valid_images = True if not valid_images: raise ValueError( "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example)," "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)." ) is_batched = bool( isinstance(images, (list, tuple)) and (isinstance(images[0], (Image.Image, np.ndarray)) or is_torch_tensor(images[0])) ) if not is_batched: images = [images] # transformations (resizing + normalization) if self.do_resize and self.size is not None: images = [self.resize(image=image, size=self.size) for image in images] if self.do_normalize: images = [self.normalize(image=image, mean=self.image_mean, std=self.image_std) for image in images] # return as BatchFeature data = {"pixel_values": images} encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors) return encoded_inputs