llama3.2-vlm-torchtune / torchtune_config.yaml
gautamgc17's picture
Upload folder using huggingface_hub
8497225 verified
model:
_component_: torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b
decoder_trainable: frozen
encoder_trainable: lora
fusion_trainable: lora
lora_attn_modules:
- q_proj
- v_proj
- output_proj
apply_lora_to_mlp: true
apply_lora_to_output: false
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0
image_size: 560
tokenizer:
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_transform
path: /tmp/Llama-3.2-11B-Vision-Instruct/original/tokenizer.model
image_size: 560
max_seq_len: 8192
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-3.2-11B-Vision-Instruct/
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: '00005'
recipe_checkpoint: null
output_dir: /tmp/Llama-3.2-11B-Vision-Instruct/
model_type: LLAMA3_VISION
resume_from_checkpoint: false
save_adapter_weights_only: false
dataset:
_component_: torchtune.datasets.multimodal.the_cauldron_dataset
subset: diagram_image_to_text
seed: null
shuffle: true
collate_fn: torchtune.data.padded_collate_tiled_images_and_mask
epochs: 1
max_steps_per_epoch: null
batch_size: 2
gradient_accumulation_steps: 8
optimizer:
_component_: torch.optim.AdamW
fused: true
weight_decay: 0.01
lr: 0.0001
lr_scheduler:
_component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
num_warmup_steps: 100
loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
clip_grad_norm: 1.0
compile: false
device: cuda
enable_activation_checkpointing: true
dtype: bf16
output_dir: /tmp/qlora-llama3.2-vision-finetune
metric_logger:
_component_: torchtune.training.metric_logging.WandBLogger
project: llama-3.2-vlm-torchtune
log_every_n_steps: 1
log_peak_memory_stats: true
profiler:
_component_: torchtune.training.setup_torch_profiler
enabled: false
output_dir: ${output_dir}/profiling_outputs
cpu: true
cuda: true
profile_memory: false
with_stack: false
record_shapes: true
with_flops: false
wait_steps: 5
warmup_steps: 3
active_steps: 2
num_cycles: 1