| model: |
| _component_: models.lora_mmllama3_8b |
| lora_attn_modules: |
| - q_proj |
| - v_proj |
| apply_lora_to_mlp: false |
| apply_lora_to_output: false |
| lora_rank: 16 |
| lora_alpha: 32 |
| perception_tokens: 2 |
| use_clip: false |
| tokenizer: |
| _component_: models.a2a_tokenizer |
| path: models/tokenizer.model |
| checkpointer: |
| _component_: torchtune.utils.FullModelMetaCheckpointer |
| checkpoint_dir: |
| checkpoint_files: |
| - |
| adapter_checkpoint: null |
| recipe_checkpoint: null |
| output_dir: output_checkpoints/experiment_1 |
| model_type: LLAMA3 |
|
|
| resume_from_checkpoint: false |
| interim_checkpoint_steps: 20000 |
| interim_gen_steps: null |
| max_new_tokens: 100 |
| temperature: 0.6 |
| top_k: 225 |
| dataset: |
| _component_: ds.EvenBatcher |
| buffer_size: 1000 |
| dataset: |
| _component_: ds.RoundRobinDataset |
| datasets: |
| - _component_: ds.OmegaVideoCaptionDataset |
| length: 3000 |
| - _component_: ds.LlavaInstructDataset |
| dataset_path: ds/coco_llava_instruct/output.parquet |
| train_on_input: false |
| - _component_: ds.LlavaInstructDataset |
| dataset_path: ds/vision_flan/output.parquet |
| train_on_input: false |
| - _component_: ds.CaptionInstructDataset |
| dataset_path: ds/sam_llava/output.parquet |
| train_on_input: false |
| seed: null |
| shuffle: true |
| batch_size: 6 |
| optimizer: |
| _component_: torch.optim.AdamW |
| weight_decay: 0.0 |
| lr: 0.0 |
| |
| lr_scheduler: |
| _component_: torchtune.modules.get_cosine_schedule_with_warmup |
| num_warmup_steps: 00 |
| loss: |
| _component_: torch.nn.CrossEntropyLoss |
|
|
| epochs: 6 |
| max_steps_per_epoch: null |
| gradient_accumulation_steps: 32 |
| compile: true |
| output_dir: /tmp/lora_finetune_output |
| metric_logger: |
| _component_: torchtune.utils.metric_logging.DiskLogger |
| log_dir: ${output_dir} |
| log_every_n_steps: null |
| device: cuda |
| dtype: bf16 |
| enable_activation_checkpointing: false |
| profiler: |
| _component_: torchtune.utils.profiler |
| enabled: false |
| inference: |
| prompt_template: 'Video: |
| |
| {video} |
| |
| Caption the previous video.' |
| max_new_tokens: 300 |
| temperature: 0.6 |
| top_k: 300 |
| quantizer: null |
| gradient-accumulation-steps: 32 |
|
|