model: _component_: models.lora_mmllama3_8b lora_attn_modules: - q_proj - v_proj apply_lora_to_mlp: false apply_lora_to_output: false lora_rank: 8 lora_alpha: 16 perception_tokens: 2 tokenizer: _component_: models.a2a_tokenizer path: checkpoints/Meta-Llama-3-8B-Instruct/tokenizer.model checkpointer: _component_: torchtune.utils.FullModelMetaCheckpointer checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/ checkpoint_files: - consolidated.00.pth adapter_checkpoint: null recipe_checkpoint: null output_dir: output model_type: LLAMA3 resume_from_checkpoint: false dataset: _component_: ds.EvenBatcher dataset: _component_: ds.RoundRobinDataset datasets: - _component_: ds.LlavaInstructDataset ib_embed_path: ds/coco_llava_instruct/*.ib_embed.pt clip_embed_path: ds/coco_llava_instruct/*.clip_embed.pt caption_path: ds/coco_llava_instruct/*.caption.pt train_on_input: false - _component_: ds.LlavaInstructDataset ib_embed_path: ds/vision_flan/*.ib_embed.pt clip_embed_path: ds/vision_flan/*.clip_embed.pt caption_path: ds/vision_flan/*.caption.pt train_on_input: false - _component_: ds.CaptionInstructDataset ib_embed_path: ds/sam_llava/00.ib_embed.pt clip_embed_path: ds/sam_llava/00.clip_embed.pt caption_path: ds/sam_llava/00.caption.pt train_on_input: false seed: null shuffle: true batch_size: 4 optimizer: _component_: torch.optim.AdamW weight_decay: 0.01 lr: 0.0003 lr_scheduler: _component_: torchtune.modules.get_cosine_schedule_with_warmup num_warmup_steps: 100 loss: _component_: torch.nn.CrossEntropyLoss epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 64 compile: false output_dir: /tmp/lora_finetune_output metric_logger: _component_: torchtune.utils.metric_logging.DiskLogger log_dir: ${output_dir} log_every_n_steps: null device: cuda dtype: bf16 enable_activation_checkpointing: false profiler: _component_: torchtune.utils.profiler enabled: false