File size: 3,110 Bytes
74b17e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from dataclasses import dataclass, field
from typing import Dict, Optional, Sequence, TYPE_CHECKING
import transformers
if TYPE_CHECKING:
import transformers
@dataclass
class ModelArguments:
cache_dir: Optional[str] = field(default=None)
model_name_or_path: Optional[str] = field(default="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer_name_or_path: Optional[str] = field(default=None)
attn_implementation: Optional[str] = field(default=None)
vision_tower: Optional[str] = field(default='')
vision_tower2: Optional[str] = field(default='')
connector_type: str = field(default='linear')
mm_vision_select_layer: Optional[int] = field(default=-1) # default to the last layer
mm_patch_merge_type: Optional[str] = field(default='flat')
mm_vision_select_feature: Optional[str] = field(default="patch")
resampler_hidden_size: Optional[int] = field(default=768)
num_queries: Optional[int] = field(default=128)
num_resampler_layers: Optional[int] = field(default=3)
model_max_length: int = field(
default=512,
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
tokenizer_use_fast: bool = field(default=False)
tokenizer_padding_side: str = field(default='right')
@dataclass
class DataArguments:
data_path: str = field(default=None,
metadata={"help": "Path to the training data."})
lazy_preprocess: bool = False
is_multimodal: bool = True
image_folder: Optional[str] = field(default=None)
image_aspect_ratio: str = 'square'
conv_version: str = 'pretrain'
@dataclass
class TrainingArguments(transformers.TrainingArguments):
training_recipe: str = field(default='common')
tune_type_llm: str = field(default="frozen") # support only: frozen, full, lora, qlora_int4, qlora_int8
tune_type_vision_tower: str = field(default="frozen") # support only: frozen, full, partially-tune
tune_vision_tower_from_layer: Optional[int] = field(default=10)
tune_type_connector: str = field(default="full") # support only: frozen, full
tune_embed_tokens: Optional[int] = field(default=False)
optim: str = field(default="adamw_torch")
remove_unused_columns: bool = field(default=False)
double_quant: bool = field(
default=True,
metadata={"help": "Compress the quantization statistics through double quantization."}
)
quant_type: str = field(
default="nf4",
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
)
bits: int = field(
default=16,
metadata={"help": "How many bits to use."}
)
lora_r: int = 64
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
mm_projector_lr: Optional[float] = None
group_by_modality_length: bool = field(default=False)
vision_tower_lr: Optional[float] = None
pretrained_model_path: Optional[str] = field(default=None)
|