File size: 2,117 Bytes
4b4d144
 
 
 
 
 
 
db0845f
 
4b4d144
 
 
 
6a87e91
4b4d144
 
6a87e91
4b4d144
 
 
 
 
 
 
db0845f
4b4d144
db0845f
6a87e91
db0845f
4b4d144
 
6a87e91
4b4d144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db0845f
4b4d144
 
db0845f
6a87e91
4b4d144
 
db0845f
4b4d144
 
db0845f
4b4d144
db0845f
4b4d144
6a87e91
4b4d144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db0845f
 
 
4b4d144
6a87e91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
model:
  _component_: models.lora_mmllama3_8b
  lora_attn_modules:
  - q_proj
  - v_proj
  apply_lora_to_mlp: false
  apply_lora_to_output: false
  lora_rank: 16
  lora_alpha: 32
  perception_tokens: 2
  use_clip: false
tokenizer:
  _component_: models.a2a_tokenizer
  path: models/tokenizer.model
checkpointer:
  _component_: torchtune.utils.FullModelMetaCheckpointer
  checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
  checkpoint_files:
  - consolidated.00.pth
  adapter_checkpoint: null
  recipe_checkpoint: null
  output_dir: output_checkpoints/experiment_1
  model_type: LLAMA3
resume_from_checkpoint: false
interim_checkpoint_steps: 15000
interim_gen_steps: null
max_new_tokens: 88
temperature: 0.6
top_k: 200
dataset:
  _component_: ds.EvenBatcher
  buffer_size: 36
  dataset:
    _component_: ds.RoundRobinDataset
    datasets:
    - _component_: ds.OmegaVideoCaptionDataset
      length: 500000
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/coco_llava_instruct/output.parquet
      train_on_input: false
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/vision_flan/output.parquet
      train_on_input: false
    - _component_: ds.CaptionInstructDataset
      dataset_path: ds/sam_llava/output.parquet
      train_on_input: false
seed: null
shuffle: true
batch_size: 8
optimizer:
  _component_: torch.optim.AdamW
  weight_decay: 0.99
  lr: 0.0003
lr_scheduler:
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
  num_warmup_steps: 4
loss:
  _component_: torch.nn.CrossEntropyLoss
epochs: 6
max_steps_per_epoch: null
gradient_accumulation_steps: 512
compile: false
output_dir: /tmp/lora_finetune_output
metric_logger:
  _component_: torchtune.utils.metric_logging.DiskLogger
  log_dir: ${output_dir}
log_every_n_steps: null
device: cuda
dtype: bf16
enable_activation_checkpointing: false
profiler:
  _component_: torchtune.utils.profiler
  enabled: false
inference:
  prompt_template: 'Video:

    {video}

    Caption the previous video.'
  max_new_tokens: 222
  temperature: 0.8
  top_k: 222
  quantizer: null
gradient-accumulation-steps: 32