File size: 3,625 Bytes
1ce5e18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ctypes import c_float, sizeof
from enum import Enum
from typing import TYPE_CHECKING, Optional, Union
if TYPE_CHECKING:
from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer # tests_ignore
class ParameterFormat(Enum):
Float = c_float
@property
def size(self) -> int:
"""
Number of byte required for this data type
Returns:
Integer > 0
"""
return sizeof(self.value)
def compute_effective_axis_dimension(dimension: int, fixed_dimension: int, num_token_to_add: int = 0) -> int:
"""
Args:
dimension:
fixed_dimension:
num_token_to_add:
Returns:
"""
# < 0 is possible if using a dynamic axis
if dimension <= 0:
dimension = fixed_dimension
dimension -= num_token_to_add
return dimension
def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterFormat) -> int:
"""
Compute the size taken by all the parameters in the given the storage format when serializing the model
Args:
num_parameters: Number of parameters to be saved
dtype: The data format each parameter will be saved
Returns:
Size (in byte) taken to save all the parameters
"""
return num_parameters * dtype.size
def get_preprocessor(model_name: str) -> Optional[Union["AutoTokenizer", "AutoFeatureExtractor", "AutoProcessor"]]:
"""
Gets a preprocessor (tokenizer, feature extractor or processor) that is available for `model_name`.
Args:
model_name (`str`): Name of the model for which a preprocessor are loaded.
Returns:
`Optional[Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]]`:
If a processor is found, it is returned. Otherwise, if a tokenizer or a feature extractor exists, it is
returned. If both a tokenizer and a feature extractor exist, an error is raised. The function returns
`None` if no preprocessor is found.
"""
# Avoid circular imports by only importing this here.
from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer # tests_ignore
try:
return AutoProcessor.from_pretrained(model_name)
except (ValueError, OSError, KeyError):
tokenizer, feature_extractor = None, None
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
except (OSError, KeyError):
pass
try:
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
except (OSError, KeyError):
pass
if tokenizer is not None and feature_extractor is not None:
raise ValueError(
f"Couldn't auto-detect preprocessor for {model_name}. Found both a tokenizer and a feature extractor."
)
elif tokenizer is None and feature_extractor is None:
return None
elif tokenizer is not None:
return tokenizer
else:
return feature_extractor
|