jan-hq commited on
Commit
f16186b
1 Parent(s): 0a31015

Upload 2 files

Browse files
Files changed (2) hide show
  1. 3B_full.yaml +91 -0
  2. loss_log_intruct.txt +0 -0
3B_full.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for multi-device full finetuning in full_finetune_distributed.py
2
+ # using a Llama3 8B Instruct model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on 4 devices, run the following command from root:
9
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works best when the model is being fine-tuned on 2+ GPUs.
17
+ # Single device full finetuning requires more memory optimizations. It's
18
+ # best to use 8B_full_single_device.yaml for those cases
19
+ # Tokenizer
20
+ tokenizer:
21
+ _component_: torchtune.models.llama3.llama3_s_tokenizer
22
+ path: ../model_zoo_llama3.2/tokenizer.model
23
+ max_seq_len: 4096
24
+
25
+ # Dataset
26
+ dataset:
27
+ _component_: torchtune.datasets.chat_dataset
28
+ source: homebrewltd/mixed-instruction-speech-whispervq-v3-full-phase2-3
29
+ conversation_column: conversations
30
+ conversation_style: openai
31
+ split: train
32
+ train_on_input: True
33
+
34
+ seed: 42
35
+ shuffle: False
36
+ # Model Arguments
37
+ model:
38
+ _component_: torchtune.models.llama3_2.llama3_2_s_3b
39
+ # path: model_zoo/Llama3.1_s_8b_init
40
+ checkpointer:
41
+ _component_: torchtune.training.FullModelHFCheckpointerSaveSteps
42
+ checkpoint_dir: ../model_zoo_llama3.2/llama3.2-s-3b-base
43
+ checkpoint_files: [
44
+ model-00001-of-00002.safetensors,
45
+ model-00002-of-00002.safetensors,
46
+ ]
47
+ recipe_checkpoint: null
48
+ output_dir: ../model_zoo_llama3.2/llama3.2-3B-s-instruct
49
+ model_type: LLAMA3_2
50
+ resume_from_checkpoint: False
51
+ save_every_n_steps: 1000
52
+ max_checkpoints: 3
53
+ # Fine-tuning arguments
54
+ batch_size: 3
55
+ epochs: 1
56
+ max_steps_per_epoch: null
57
+ gradient_accumulation_steps: 12
58
+ compile: False
59
+ # Optimizer and Scheduler
60
+ optimizer:
61
+ _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
62
+ weight_decay: 0.005
63
+ lr: 7e-5
64
+ fused: True
65
+ lr_scheduler:
66
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
67
+ num_warmup_steps: 62
68
+
69
+ loss:
70
+ _component_: torch.nn.CrossEntropyLoss
71
+
72
+ fsdp:
73
+ cpu_offload: False
74
+
75
+ # Training env
76
+ device: cuda
77
+ dtype: bf16
78
+
79
+ # Memory management
80
+ enable_activation_checkpointing: True
81
+ memory_efficient_fsdp_wrap: True
82
+ ac_mode: 'selective'
83
+
84
+
85
+ # Logging
86
+ metric_logger:
87
+ _component_: torchtune.training.metric_logging.DiskLogger
88
+ log_dir: ${output_dir}
89
+ output_dir: ../model_zoo_llama3.2/llama3.2-3B-s-instruct-log/
90
+ log_every_n_steps: 1
91
+ log_peak_memory_stats: False
loss_log_intruct.txt ADDED
The diff for this file is too large to render. See raw diff