model: _component_: models.lora_mmllama3_8b lora_attn_modules: - q_proj - v_proj apply_lora_to_mlp: false apply_lora_to_output: false lora_rank: 32 lora_alpha: 64 perception_tokens: 2 use_clip: false tokenizer: _component_: models.a2a_tokenizer path: models/tokenizer.model checkpointer: _component_: torchtune.utils.FullModelMetaCheckpointer checkpoint_dir: smtst2/ checkpoint_files: - meta_model_1.pt adapter_checkpoint: null recipe_checkpoint: null output_dir: output_checkpoints/experiment_1 model_type: LLAMA3 resume_from_checkpoint: false interim_checkpoint_steps: 10000 interim_gen_steps: null max_new_tokens: 100 temperature: 0.6 top_k: 300 dataset: _component_: ds.EvenBatcher buffer_size: 1 dataset: _component_: ds.RoundRobinDataset datasets: - _component_: ds.OmegaVideoCaptionDataset length: 500000 - _component_: ds.LlavaInstructDataset dataset_path: ds/coco_llava_instruct/output.parquet train_on_input: false - _component_: ds.LlavaInstructDataset dataset_path: ds/vision_flan/output.parquet train_on_input: false - _component_: ds.CaptionInstructDataset dataset_path: ds/sam_llava/output.parquet train_on_input: false # - _component_: ds.BagelLlama3Dataset # parquet_path: ds/bagel-llama-3-v1.0/bagel-input-output-v1.0.parquet # train_on_input: false seed: null shuffle: true batch_size: 64 optimizer: _component_: torch.optim.AdamW weight_decay: 1.5 lr: 1.0 lr_scheduler: _component_: torchtune.modules.get_cosine_schedule_with_warmup num_warmup_steps: 1000 loss: _component_: torch.nn.CrossEntropyLoss epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 192 compile: false output_dir: /tmp/lora_finetune_output metric_logger: _component_: torchtune.utils.metric_logging.DiskLogger log_dir: ${output_dir} log_every_n_steps: null device: cuda dtype: bf16 enable_activation_checkpointing: false profiler: _component_: torchtune.utils.profiler enabled: false inference: prompt_template: 'Video: {video} Caption the previous video.' max_new_tokens: 300 temperature: 0.6 top_k: 300 quantizer: null gradient-accumulation-steps: 32