laurencer
/

Colourful-Llama7b-Alpaca-Tune-4epochs

Model card Files Files and versions Community

Colourful-Llama7b-Alpaca-Tune-4epochs / colorful /custom_params.py

laurencer

Step 6000

261dbc8 verified 9 months ago

raw

history blame contribute delete

4.52 kB

	from dataclasses import dataclass, field, fields
	from typing import List, Optional

	from torchtune.datasets import ALL_DATASETS
	from torchtune.models import ALL_MODELS, ALL_TOKENIZERS
	from torchtune.utils.metric_logging import ALL_METRIC_LOGGERS
	from torchtune.utils.precision import PRECISION_STR_TO_DTYPE


	@dataclass
	class ColoringFinetuneParams:
	"""Arguments for the finetune_llm recipe.

	Args:
	device (str): Device to use for training. Options are "cpu" and "cuda"
	dtype (str): Data type to use for training.
	seed (int): Random seed to use for training.
	model (str): String specifying model architecture to fine-tune. See ``torchtune.models.get_model`` for options.
	model_checkpoint (str): Local path to load model checkpoint from.
	tokenizer (str): String specifying tokenizer to use. See ``torchtune.models.get_tokenizer`` for options.
	tokenizer_checkpoint (str): Local path to load tokenizer checkpoint from.
	dataset (str): String specifying dataset to use. See ``torchtune.datasets.get_dataset`` for options.
	Currently, only predefined datasets in library are supported.
	shuffle (bool): Whether to shuffle dataset.
	batch_size (int): Batch size to use for training.
	epochs (int): Number of epochs to train for.
	optimizer (str): String specifying optimizer to use. See ``torchtune.optim.get_optimizer`` for options.
	loss (str): String specifying loss function to use. See ``torchtune.losses.get_loss`` for options.
	lr (float): Learning rate to use for optimizer.
	activation_checkpointing (bool): Whether to use activation checkpointing.
	output_dir (str): Local path to save checkpoints and logs to.
	run_generation (int): Run eval on a prompt every ``run_generation`` steps. Set to 0 to disable.
	max_steps_per_epoch (int): Maximum number of steps to take per epoch.
	metric_logger_type (str): String specifying metric logger to use. See ``torchtune.utils.get_metric_logger``
	for options.
	project (str): Project name to use for logging. Used by ``WandBLogger``.
	resume_from_previous_checkpoint (bool): Whether to resume fine-tuning from a previous checkpoint.
	cpu_offload (bool): Whether to offload model to CPU.

	Raises:
	ValueError: If ``cpu_offload`` is ``True`` but ``device`` is not ``cuda`` and <= 1 GPUs.
	"""

	# Model
	model_checkpoint: str = ""

	color_layer_initialization: str = "default"
	norm_before_color_layer: bool = False

	# Tokenizer
	tokenizer_checkpoint: str = ""

	hf_repo_id: Optional[str] = None
	checkpoint_every_n_steps: Optional[int] = None

	# Dataset and Sampler
	dataset: str = ""
	train_on_input: bool = True
	shuffle: bool = True
	batch_size: int = 2

	# Optimizer and Scheduler
	optimizer: str = "SGD"
	lr: float = 2e-5
	loss: str = "CrossEntropyLoss"
	gradient_accumulation_steps: int = 1

	# Training
	compile: bool = False
	epochs: int = 3
	max_steps_per_epoch: Optional[int] = None
	resume_from_checkpoint: bool = False
	run_generation: Optional[int] = None

	# Distributed
	cpu_offload: bool = False
	enable_fsdp: bool = True
	enable_activation_checkpointing: bool = True

	# Environment
	device: str = "cuda"
	dtype: str = "fp16"
	seed: Optional[int] = None

	# Logging
	output_dir: str = "/tmp/full_finetune_output"
	metric_logger_type: str = "disk"
	project: Optional[str] = None
	log_every_n_steps: Optional[int] = None

	def __post_init__(self):
	for param in fields(self):
	if getattr(self, param.name) == "":
	raise TypeError(f"{param.name} needs to be specified")

	if self.cpu_offload and self.device != "cuda":
	raise ValueError(
	"Cannot offload model to CPU if device is not cuda or <= 1 GPUs."
	)
	if self.enable_fsdp and self.device == "cpu":
	raise ValueError("FSDP is not supported on CPU.")

	if self.metric_logger_type not in ALL_METRIC_LOGGERS:
	raise ValueError(
	f"Metric logger not recognized. Expected one of {ALL_METRIC_LOGGERS}, received {self.metric_logger_type}."
	)
	if self.dtype not in PRECISION_STR_TO_DTYPE:
	raise ValueError(
	f"Dtype {self.dtype} must be one of {', '.join(PRECISION_STR_TO_DTYPE.keys())} for finetuning."
	)