impossibleexchange commited on
Commit
62351af
·
verified ·
1 Parent(s): f49013a

Update training_config.yml

Browse files
Files changed (1) hide show
  1. training_config.yml +17 -11
training_config.yml CHANGED
@@ -22,35 +22,43 @@ checkpointer:
22
  output_dir: output_checkpoints/experiment_1
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
- interim_checkpoint_steps: 10000
26
  interim_gen_steps: null
27
  max_new_tokens: 77
28
  temperature: 0.6
29
  top_k: 231
30
  dataset:
31
  _component_: ds.EvenBatcher
32
- buffer_size: 1
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
36
  - _component_: ds.OmegaVideoCaptionDataset
37
  length: 500000
 
 
 
 
 
 
 
 
 
38
  seed: null
39
  shuffle: true
40
  batch_size: 6
41
  optimizer:
42
  _component_: torch.optim.AdamW
43
- weight_decay: 0.98765
44
- lr: 0.123456
45
  lr_scheduler:
46
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
47
- num_warmup_steps: 25
48
  loss:
49
  _component_: torch.nn.CrossEntropyLoss
50
-
51
- epochs: 15
52
  max_steps_per_epoch: null
53
- gradient_accumulation_steps: 4
54
  compile: false
55
  output_dir: /tmp/lora_finetune_output
56
  metric_logger:
@@ -65,12 +73,10 @@ profiler:
65
  enabled: false
66
  inference:
67
  prompt_template: 'Video:
68
-
69
  {video}
70
-
71
  Caption the previous video.'
72
  max_new_tokens: 231
73
  temperature: 0.8
74
  top_k: 231
75
  quantizer: null
76
- gradient-accumulation-steps: 32
 
22
  output_dir: output_checkpoints/experiment_1
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 15000
26
  interim_gen_steps: null
27
  max_new_tokens: 77
28
  temperature: 0.6
29
  top_k: 231
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 72
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
36
  - _component_: ds.OmegaVideoCaptionDataset
37
  length: 500000
38
+ - _component_: ds.LlavaInstructDataset
39
+ dataset_path: ds/coco_llava_instruct/output.parquet
40
+ train_on_input: false
41
+ - _component_: ds.LlavaInstructDataset
42
+ dataset_path: ds/vision_flan/output.parquet
43
+ train_on_input: false
44
+ - _component_: ds.CaptionInstructDataset
45
+ dataset_path: ds/sam_llava/output.parquet
46
+ train_on_input: false
47
  seed: null
48
  shuffle: true
49
  batch_size: 6
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 0.99
53
+ lr: 20.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 4
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
+ epochs: 60
 
60
  max_steps_per_epoch: null
61
+ gradient_accumulation_steps: 260
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
 
73
  enabled: false
74
  inference:
75
  prompt_template: 'Video:
 
76
  {video}
 
77
  Caption the previous video.'
78
  max_new_tokens: 231
79
  temperature: 0.8
80
  top_k: 231
81
  quantizer: null
82
+ gradient-accumulation-steps: 32