File size: 2,079 Bytes
d948e16
 
 
 
 
 
 
b1e57f0
 
d948e16
 
 
 
b1e57f0
d948e16
 
b1e57f0
d948e16
b1e57f0
d948e16
 
 
 
 
b1e57f0
d948e16
b1e57f0
d948e16
b1e57f0
d948e16
 
b1e57f0
d948e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e57f0
d948e16
 
b1e57f0
 
d948e16
 
b1e57f0
d948e16
 
b1e57f0
d948e16
b1e57f0
d948e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e57f0
 
 
d948e16
b1e57f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
model:
  _component_: models.lora_mmllama3_8b
  lora_attn_modules:
  - q_proj
  - v_proj
  apply_lora_to_mlp: false
  apply_lora_to_output: false
  lora_rank: 32
  lora_alpha: 64
  perception_tokens: 2
  use_clip: false
tokenizer:
  _component_: models.a2a_tokenizer
  path: models/tokenizer.model
checkpointer:
  _component_: torchtune.utils.FullModelMetaCheckpointer
  checkpoint_dir: crazyfrog2
  checkpoint_files:
  - meta_model_0.pt
  adapter_checkpoint: null
  recipe_checkpoint: null
  output_dir: output_checkpoints/experiment_1
  model_type: LLAMA3
resume_from_checkpoint: false
interim_checkpoint_steps: 15000
interim_gen_steps: null
max_new_tokens: 77
temperature: 0.6
top_k: 231
dataset:
  _component_: ds.EvenBatcher
  buffer_size: 72
  dataset:
    _component_: ds.RoundRobinDataset
    datasets:
    - _component_: ds.OmegaVideoCaptionDataset
      length: 500000
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/coco_llava_instruct/output.parquet
      train_on_input: false
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/vision_flan/output.parquet
      train_on_input: false
    - _component_: ds.CaptionInstructDataset
      dataset_path: ds/sam_llava/output.parquet
      train_on_input: false
seed: null
shuffle: true
batch_size: 6
optimizer:
  _component_: torch.optim.AdamW
  weight_decay: 0.99
  lr: 0.0002
lr_scheduler:
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
  num_warmup_steps: 4
loss:
  _component_: torch.nn.CrossEntropyLoss
epochs: 60
max_steps_per_epoch: null
gradient_accumulation_steps: 260
compile: false
output_dir: /tmp/lora_finetune_output
metric_logger:
  _component_: torchtune.utils.metric_logging.DiskLogger
  log_dir: ${output_dir}
log_every_n_steps: null
device: cuda
dtype: bf16
enable_activation_checkpointing: false
profiler:
  _component_: torchtune.utils.profiler
  enabled: false
inference:
  prompt_template: 'Video:

    {video}

    Caption the previous video.'
  max_new_tokens: 231
  temperature: 0.8
  top_k: 231
  quantizer: null
gradient-accumulation-steps: 32