ModelConfig
class lighteval.models.model_config.BaseModelConfig
< source >( pretrained: str accelerator: Accelerator = None tokenizer: typing.Optional[str] = None multichoice_continuations_start_space: typing.Optional[bool] = None pairwise_tokenization: bool = False subfolder: typing.Optional[str] = None revision: str = 'main' batch_size: int = -1 max_gen_toks: typing.Optional[int] = 256 max_length: typing.Optional[int] = None add_special_tokens: bool = True model_parallel: typing.Optional[bool] = None dtype: typing.Union[str, torch.dtype, NoneType] = None device: typing.Union[int, str] = 'cuda' quantization_config: typing.Optional[transformers.utils.quantization_config.BitsAndBytesConfig] = None trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False )
Parameters
- pretrained (str) —
HuggingFace Hub model ID name or the path to a pre-trained
model to load. This is effectively the
pretrained_model_name_or_path
argument offrom_pretrained
in the HuggingFacetransformers
API. - accelerator (Accelerator) — accelerator to use for model training.
- tokenizer (Optional[str]) — HuggingFace Hub tokenizer ID that will be used for tokenization.
- multichoice_continuations_start_space (Optional[bool]) — Whether to add a space at the start of each continuation in multichoice generation. For example, context: “What is the capital of France?” and choices: “Paris”, “London”. Will be tokenized as: “What is the capital of France? Paris” and “What is the capital of France? London”. True adds a space, False strips a space, None does nothing
- pairwise_tokenization (bool) — Whether to tokenize the context and continuation as separately or together.
- subfolder (Optional[str]) — The subfolder within the model repository.
- revision (str) — The revision of the model.
- batch_size (int) — The batch size for model training.
- max_gen_toks (Optional[int]) — The maximum number of tokens to generate.
- max_length (Optional[int]) — The maximum length of the generated output.
- add_special_tokens (bool, optional, defaults to True) — Whether to add special tokens to the input sequences.
If
None
, the default value will be set toTrue
for seq2seq models (e.g. T5) andFalse
for causal models. - model_parallel (bool, optional, defaults to False) —
True/False: force to use or not the
accelerate
library to load a large model across multiple devices. Default: None which corresponds to comparing the number of processes with the number of GPUs. If it’s smaller => model-parallelism, else not. - dtype (Union[str, torch.dtype], optional, defaults to None) —):
Converts the model weights to
dtype
, if specified. Strings get converted totorch.dtype
objects (e.g.float16
->torch.float16
). Usedtype="auto"
to derive the type from the model’s weights. - device (Union[int, str]) — device to use for model training.
- quantization_config (Optional[BitsAndBytesConfig]) — quantization configuration for the model, manually provided to load a normally floating point model at a quantized precision. Needed for 4-bit and 8-bit precision.
- trust_remote_code (bool) — Whether to trust remote code during model loading.
Base configuration class for models.
Methods: post_init(): Performs post-initialization checks on the configuration. _init_configs(model_name, env_config): Initializes the model configuration. init_configs(env_config): Initializes the model configuration using the environment configuration. get_model_sha(): Retrieves the SHA of the model.
class lighteval.models.model_config.AdapterModelConfig
< source >( pretrained: str accelerator: Accelerator = None tokenizer: typing.Optional[str] = None multichoice_continuations_start_space: typing.Optional[bool] = None pairwise_tokenization: bool = False subfolder: typing.Optional[str] = None revision: str = 'main' batch_size: int = -1 max_gen_toks: typing.Optional[int] = 256 max_length: typing.Optional[int] = None add_special_tokens: bool = True model_parallel: typing.Optional[bool] = None dtype: typing.Union[str, torch.dtype, NoneType] = None device: typing.Union[int, str] = 'cuda' quantization_config: typing.Optional[transformers.utils.quantization_config.BitsAndBytesConfig] = None trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False base_model: str = None )
class lighteval.models.model_config.DeltaModelConfig
< source >( pretrained: str accelerator: Accelerator = None tokenizer: typing.Optional[str] = None multichoice_continuations_start_space: typing.Optional[bool] = None pairwise_tokenization: bool = False subfolder: typing.Optional[str] = None revision: str = 'main' batch_size: int = -1 max_gen_toks: typing.Optional[int] = 256 max_length: typing.Optional[int] = None add_special_tokens: bool = True model_parallel: typing.Optional[bool] = None dtype: typing.Union[str, torch.dtype, NoneType] = None device: typing.Union[int, str] = 'cuda' quantization_config: typing.Optional[transformers.utils.quantization_config.BitsAndBytesConfig] = None trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False base_model: str = None )
class lighteval.models.model_config.InferenceEndpointModelConfig
< source >( endpoint_name: str = None model_name: str = None should_reuse_existing: bool = False accelerator: str = 'gpu' model_dtype: str = None vendor: str = 'aws' region: str = 'us-east-1' instance_size: str = None instance_type: str = None framework: str = 'pytorch' endpoint_type: str = 'protected' add_special_tokens: bool = True revision: str = 'main' namespace: str = None image_url: str = None env_vars: dict = None )
class lighteval.models.model_config.InferenceModelConfig
< source >( model: str add_special_tokens: bool = True )
class lighteval.models.model_config.TGIModelConfig
< source >( inference_server_address: str inference_server_auth: str model_id: str )
class lighteval.models.model_config.VLLMModelConfig
< source >( pretrained: str gpu_memory_utilisation: float = 0.9 revision: str = 'main' dtype: str | None = None tensor_parallel_size: int = 1 pipeline_parallel_size: int = 1 data_parallel_size: int = 1 max_model_length: int | None = None swap_space: int = 4 seed: int = 1234 trust_remote_code: bool = False use_chat_template: bool = False add_special_tokens: bool = True multichoice_continuations_start_space: bool = True pairwise_tokenization: bool = False subfolder: typing.Optional[str] = None temperature: float = 0.6 )