jrc commited on
Commit
cc620ee
1 Parent(s): 88c1e76

Upload 8B_lora.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. 8B_lora.yaml +93 -0
8B_lora.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for multi-device LoRA finetuning in lora_finetune_distributed.py
2
+ # using a Llama3 8B model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B --output-dir /tmp/Meta-Llama-3-8B --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on 2 devices, run the following command from root:
9
+ # tune run --nproc_per_node 2 lora_finetune_distributed --config llama3/8B_lora
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run --nproc_per_node 2 lora_finetune_distributed --config llama3/8B_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works best when the model is being fine-tuned on 2+ GPUs.
17
+ # For single device LoRA finetuning please use 8B_lora_single_device.yaml
18
+ # or 8B_qlora_single_device.yaml
19
+
20
+ # Tokenizer
21
+ tokenizer:
22
+ _component_: torchtune.models.llama3.llama3_tokenizer
23
+ path: ./model/original/tokenizer.model
24
+
25
+ # Model Arguments
26
+ model:
27
+ _component_: torchtune.models.llama3.lora_llama3_8b
28
+ lora_attn_modules: ['q_proj', 'v_proj']
29
+ apply_lora_to_mlp: False
30
+ apply_lora_to_output: False
31
+ lora_rank: 8
32
+ lora_alpha: 16
33
+
34
+ checkpointer:
35
+ _component_: torchtune.utils.FullModelMetaCheckpointer
36
+ checkpoint_dir: ./model/original/
37
+ checkpoint_files: [
38
+ consolidated.00.pth
39
+ ]
40
+ recipe_checkpoint: null
41
+ output_dir: ./finetuned_model/
42
+ model_type: LLAMA3
43
+ resume_from_checkpoint: False
44
+
45
+ # Dataset and Sampler
46
+ # InstructDataset(
47
+ # tokenizer=tokenizer,
48
+ # source=source,
49
+ # template=GrammarErrorCorrectionTemplate,
50
+ # column_map={"sentence": "input"},
51
+ # train_on_input=train_on_input,
52
+ # split="train",
53
+ # )
54
+ dataset:
55
+ _component_: torchtune.datasets.instruct_dataset
56
+ source: grammarly/coedit
57
+ template: GrammarErrorCorrectionTemplate
58
+ column_map: {"sentence": "src", "output": "tgt"}
59
+ train_on_input: False
60
+ split: train
61
+ seed: 123
62
+ shuffle: True
63
+ batch_size: 4
64
+
65
+ # Optimizer and Scheduler
66
+ optimizer:
67
+ _component_: torch.optim.AdamW
68
+ weight_decay: 0.01
69
+ lr: 3e-4
70
+ lr_scheduler:
71
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
72
+ num_warmup_steps: 100
73
+
74
+ loss:
75
+ _component_: torch.nn.CrossEntropyLoss
76
+
77
+ # Training
78
+ epochs: 2
79
+ max_steps_per_epoch: null
80
+ gradient_accumulation_steps: 32
81
+
82
+ # Logging
83
+ output_dir: ./lora_finetune_output
84
+ metric_logger:
85
+ _component_: torchtune.utils.metric_logging.WandBLogger
86
+ project: torchtune
87
+ group: llama3-grammarly
88
+ log_every_n_steps: null
89
+
90
+ # Environment
91
+ device: cuda
92
+ dtype: bf16
93
+ enable_activation_checkpointing: False