jan-hq commited on
Commit
2d1a375
1 Parent(s): c9f77b6

Upload 2 files

Browse files
Files changed (2) hide show
  1. 3B_pretrain.yaml +90 -0
  2. loss_log.txt +0 -0
3B_pretrain.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for multi-device full finetuning in full_finetune_distributed.py
2
+ # using a Llama3 8B Instruct model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on 4 devices, run the following command from root:
9
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works best when the model is being fine-tuned on 2+ GPUs.
17
+ # Single device full finetuning requires more memory optimizations. It's
18
+ # best to use 8B_full_single_device.yaml for those cases
19
+ # Tokenizer
20
+ tokenizer:
21
+ _component_: torchtune.models.llama3.llama3_s_tokenizer
22
+ path: ../model_zoo_llama3.2/tokenizer.model
23
+ max_seq_len: 512
24
+
25
+ # Dataset
26
+ dataset:
27
+ _component_: torchtune.datasets.sound_completion_dataset
28
+ source: jan-hq/raw-speech-whispervq-v2-merged
29
+ max_seq_len: 512
30
+ split: train
31
+ column: text
32
+
33
+ seed: 42
34
+ shuffle: True
35
+ # Model Arguments
36
+ model:
37
+ _component_: torchtune.models.llama3_2.llama3_2_s_3b
38
+ # path: model_zoo/Llama3.1_s_8b_init
39
+ checkpointer:
40
+ _component_: torchtune.training.FullModelHFCheckpointerSaveSteps
41
+ checkpoint_dir: ../model_zoo_llama3.2/llama3.2-s-3b-init
42
+ checkpoint_files: [
43
+ model-00001-of-00002.safetensors,
44
+ model-00002-of-00002.safetensors,
45
+ ]
46
+ recipe_checkpoint: null
47
+ output_dir: ../model_zoo_llama3.2/llama3.2-3B-s
48
+ model_type: LLAMA3_2
49
+ resume_from_checkpoint: False
50
+ save_every_n_steps: 1000
51
+ max_checkpoints: 3
52
+ # Fine-tuning arguments
53
+ batch_size: 24
54
+ epochs: 1
55
+ max_steps_per_epoch: null
56
+ gradient_accumulation_steps: 2
57
+ compile: False
58
+ # Optimizer and Scheduler
59
+ optimizer:
60
+ _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
61
+ weight_decay: 0.01
62
+ lr: 2e-4
63
+ fused: True
64
+ lr_scheduler:
65
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
66
+ num_warmup_steps: 80
67
+
68
+ loss:
69
+ _component_: torch.nn.CrossEntropyLoss
70
+
71
+ fsdp:
72
+ cpu_offload: False
73
+
74
+ # Training env
75
+ device: cuda
76
+ dtype: bf16
77
+
78
+ # Memory management
79
+ enable_activation_checkpointing: True
80
+ memory_efficient_fsdp_wrap: True
81
+ ac_mode: 'selective'
82
+
83
+
84
+ # Logging
85
+ metric_logger:
86
+ _component_: torchtune.training.metric_logging.DiskLogger
87
+ log_dir: ${output_dir}
88
+ output_dir: ../model_zoo_llama3.2/llama3.2-3B-s-log/
89
+ log_every_n_steps: 1
90
+ log_peak_memory_stats: False
loss_log.txt ADDED
The diff for this file is too large to render. See raw diff