tunji17 commited on
Commit
161eed6
1 Parent(s): a62cc8b

adds code for finetuning llama2

Browse files
Files changed (3) hide show
  1. README.md +45 -0
  2. makebelieve.py +248 -0
  3. requirements.txt +148 -0
README.md CHANGED
@@ -1,3 +1,48 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ # Finetuning llama 2 on our celebrity news dataset located [here](https://huggingface.co/datasets/2nji/makebelieve-480)
5
+
6
+ Disclaimer: This is still work in progress as we need to preprocess our celebrity news dataset to match Llama 2's prompt format as described [here](https://huggingface.co/blog/llama2#how-to-prompt-llama-2)
7
+
8
+ ## Reserve GPU on g5k
9
+
10
+ Log into your Grid5000 account using ssh and run the following code in the terminal
11
+
12
+ ```script
13
+ oarsub -l gpu=4 -I -q production
14
+ ```
15
+
16
+ Wait till GPUs are available and assigned to you, if you need more information about g5k, you can refer to [here](https://www.grid5000.fr/w/Getting_Started)
17
+
18
+ ### Create a virtual environment
19
+
20
+ - Installing PIP
21
+
22
+ ```script
23
+ pip install virtualenv
24
+ ```
25
+
26
+ - Creating environment
27
+
28
+ ```script
29
+ virtualenv venv
30
+ ```
31
+
32
+ - Activating environment
33
+
34
+ ```script
35
+ source venv/bin/activate
36
+ ```
37
+
38
+ ## Install requirements file
39
+
40
+ ```script
41
+ pip install -r requirements.txt
42
+ ```
43
+
44
+ ## Running the script to finetune Llama-2-7b-chat-hf and push to huggingface model repository
45
+
46
+ ```script
47
+ python makebelieve.py
48
+ ```
makebelieve.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import torch
4
+ from datasets import load_dataset
5
+ from transformers import (
6
+ AutoModelForCausalLM,
7
+ AutoTokenizer,
8
+ BitsAndBytesConfig,
9
+ HfArgumentParser,
10
+ TrainingArguments,
11
+ pipeline,
12
+ logging,
13
+ )
14
+ from peft import LoraConfig, PeftModel
15
+ from trl import SFTTrainer
16
+
17
+ # The model that you want to train from the Hugging Face hub
18
+ model_name = "NousResearch/Llama-2-7b-chat-hf"
19
+
20
+ # The instruction dataset to use
21
+ dataset_name = "2nji/makebelieve-480"
22
+
23
+ # Fine-tuned model name
24
+ new_model = "makebelieve"
25
+
26
+ ################################################################################
27
+ # QLoRA parameters
28
+ ################################################################################
29
+
30
+ # LoRA attention dimension
31
+ lora_r = 64
32
+
33
+ # Alpha parameter for LoRA scaling
34
+ lora_alpha = 16
35
+
36
+ # Dropout probability for LoRA layers
37
+ lora_dropout = 0.1
38
+
39
+ ################################################################################
40
+ # bitsandbytes parameters
41
+ ################################################################################
42
+
43
+ # Activate 4-bit precision base model loading
44
+ use_4bit = True
45
+
46
+ # Compute dtype for 4-bit base models
47
+ bnb_4bit_compute_dtype = "float16"
48
+
49
+ # Quantization type (fp4 or nf4)
50
+ bnb_4bit_quant_type = "nf4"
51
+
52
+ # Activate nested quantization for 4-bit base models (double quantization)
53
+ use_nested_quant = False
54
+
55
+ ################################################################################
56
+ # TrainingArguments parameters
57
+ ################################################################################
58
+
59
+ # Output directory where the model predictions and checkpoints will be stored
60
+ output_dir = "./results"
61
+
62
+ # Number of training epochs
63
+ num_train_epochs = 1
64
+
65
+ # Enable fp16/bf16 training (set bf16 to True with an A100)
66
+ fp16 = False
67
+ bf16 = False
68
+
69
+ # Batch size per GPU for training
70
+ per_device_train_batch_size = 1
71
+
72
+ # Batch size per GPU for evaluation
73
+ per_device_eval_batch_size = 1
74
+
75
+ # Number of update steps to accumulate the gradients for
76
+ gradient_accumulation_steps = 1
77
+
78
+ # Enable gradient checkpointing
79
+ gradient_checkpointing = True
80
+
81
+ # Maximum gradient normal (gradient clipping)
82
+ max_grad_norm = 0.3
83
+
84
+ # Initial learning rate (AdamW optimizer)
85
+ learning_rate = 2e-4
86
+
87
+ # Weight decay to apply to all layers except bias/LayerNorm weights
88
+ weight_decay = 0.001
89
+
90
+ # Optimizer to use
91
+ optim = "paged_adamw_32bit"
92
+
93
+ # Learning rate schedule
94
+ lr_scheduler_type = "cosine"
95
+
96
+ # Number of training steps (overrides num_train_epochs)
97
+ max_steps = -1
98
+
99
+ # Ratio of steps for a linear warmup (from 0 to learning rate)
100
+ warmup_ratio = 0.03
101
+
102
+ # Group sequences into batches with same length
103
+ # Saves memory and speeds up training considerably
104
+ group_by_length = True
105
+
106
+ # Save checkpoint every X updates steps
107
+ save_steps = 0
108
+
109
+ # Log every X updates steps
110
+ logging_steps = 25
111
+
112
+ ################################################################################
113
+ # SFT parameters
114
+ ################################################################################
115
+
116
+ # Maximum sequence length to use
117
+ max_seq_length = None
118
+
119
+ # Pack multiple short examples in the same input sequence to increase efficiency
120
+ packing = False
121
+
122
+ # Load the entire model on the GPU 0
123
+ # device_map = {"": 0}
124
+ device_map = "auto"
125
+
126
+ # Load dataset (you can process it here)
127
+ dataset = load_dataset(dataset_name, split="train")
128
+
129
+ # Load tokenizer and model with QLoRA configuration
130
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
131
+
132
+ bnb_config = BitsAndBytesConfig(
133
+ load_in_4bit=use_4bit,
134
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
135
+ bnb_4bit_compute_dtype=compute_dtype,
136
+ bnb_4bit_use_double_quant=use_nested_quant,
137
+ )
138
+
139
+ # Check GPU compatibility with bfloat16
140
+ if compute_dtype == torch.float16 and use_4bit:
141
+ major, _ = torch.cuda.get_device_capability()
142
+ if major >= 8:
143
+ print("=" * 80)
144
+ print("Your GPU supports bfloat16: accelerate training with bf16=True")
145
+ print("=" * 80)
146
+
147
+ # Load base model
148
+ model = AutoModelForCausalLM.from_pretrained(
149
+ model_name,
150
+ quantization_config=bnb_config,
151
+ device_map=device_map
152
+ )
153
+ model.config.use_cache = False
154
+ model.config.pretraining_tp = 1
155
+
156
+ # Load LLaMA tokenizer
157
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
158
+ tokenizer.pad_token = tokenizer.eos_token
159
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
160
+
161
+ # Load LoRA configuration
162
+ peft_config = LoraConfig(
163
+ lora_alpha=lora_alpha,
164
+ lora_dropout=lora_dropout,
165
+ r=lora_r,
166
+ bias="none",
167
+ task_type="CAUSAL_LM",
168
+ )
169
+
170
+ # Set training parameters
171
+ training_arguments = TrainingArguments(
172
+ output_dir=output_dir,
173
+ num_train_epochs=num_train_epochs,
174
+ per_device_train_batch_size=per_device_train_batch_size,
175
+ gradient_accumulation_steps=gradient_accumulation_steps,
176
+ optim=optim,
177
+ save_steps=save_steps,
178
+ logging_steps=logging_steps,
179
+ learning_rate=learning_rate,
180
+ weight_decay=weight_decay,
181
+ fp16=fp16,
182
+ bf16=bf16,
183
+ max_grad_norm=max_grad_norm,
184
+ max_steps=max_steps,
185
+ warmup_ratio=warmup_ratio,
186
+ group_by_length=group_by_length,
187
+ lr_scheduler_type=lr_scheduler_type,
188
+ report_to="tensorboard"
189
+ )
190
+
191
+ # Set supervised fine-tuning parameters
192
+ trainer = SFTTrainer(
193
+ model=model,
194
+ train_dataset=dataset,
195
+ peft_config=peft_config,
196
+ dataset_text_field="text",
197
+ max_seq_length=max_seq_length,
198
+ tokenizer=tokenizer,
199
+ args=training_arguments,
200
+ packing=packing,
201
+ )
202
+
203
+ # Train model
204
+ trainer.train()
205
+
206
+ # Save trained model
207
+ trainer.model.save_pretrained(new_model)
208
+
209
+ # %load_ext tensorboard
210
+ # %tensorboard --logdir results/runs
211
+
212
+ # Ignore warnings
213
+ logging.set_verbosity(logging.CRITICAL)
214
+
215
+ # Run text generation pipeline with our next model
216
+ prompt = "What did taylor swift do?"
217
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
218
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
219
+ print(result[0]['generated_text'])
220
+
221
+ # Empty VRAM
222
+ del model
223
+ del pipe
224
+ del trainer
225
+ gc.collect()
226
+ gc.collect()
227
+
228
+ # Reload model in FP16 and merge it with LoRA weights
229
+ base_model = AutoModelForCausalLM.from_pretrained(
230
+ model_name,
231
+ low_cpu_mem_usage=True,
232
+ return_dict=True,
233
+ torch_dtype=torch.float16,
234
+ device_map=device_map,
235
+ )
236
+ model = PeftModel.from_pretrained(base_model, new_model)
237
+ model = model.merge_and_unload()
238
+
239
+ # Reload tokenizer to save it
240
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
241
+ tokenizer.pad_token = tokenizer.eos_token
242
+ tokenizer.padding_side = "right"
243
+
244
+ # Save model and tokenizer
245
+ # Do not forget to add your token here
246
+ model.push_to_hub(new_model, use_temp_dir=False, token="...")
247
+ tokenizer.push_to_hub(new_model, use_temp_dir=False, token="...")
248
+ print("end of makebelieve.py")
requirements.txt ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.21.0
2
+ aiohttp==3.9.0
3
+ aiosignal==1.3.1
4
+ anyio==4.0.0
5
+ argon2-cffi==23.1.0
6
+ argon2-cffi-bindings==21.2.0
7
+ arrow==1.3.0
8
+ asttokens==2.4.1
9
+ async-lru==2.0.4
10
+ async-timeout==4.0.3
11
+ attrs==23.1.0
12
+ Babel==2.13.1
13
+ beautifulsoup4==4.12.2
14
+ bitsandbytes==0.40.2
15
+ bleach==6.1.0
16
+ certifi==2023.7.22
17
+ cffi==1.16.0
18
+ charset-normalizer==3.3.2
19
+ comm==0.2.0
20
+ datasets==2.15.0
21
+ debugpy==1.8.0
22
+ decorator==5.1.1
23
+ defusedxml==0.7.1
24
+ dill==0.3.7
25
+ exceptiongroup==1.1.3
26
+ executing==2.0.1
27
+ fastjsonschema==2.19.0
28
+ filelock==3.13.1
29
+ fqdn==1.5.1
30
+ frozenlist==1.4.0
31
+ fsspec==2023.10.0
32
+ huggingface-hub==0.19.4
33
+ idna==3.4
34
+ importlib-metadata==6.8.0
35
+ ipykernel==6.26.0
36
+ ipython==8.17.2
37
+ ipywidgets==8.1.1
38
+ isoduration==20.11.0
39
+ jedi==0.19.1
40
+ Jinja2==3.1.2
41
+ json5==0.9.14
42
+ jsonpointer==2.4
43
+ jsonschema==4.19.2
44
+ jsonschema-specifications==2023.11.1
45
+ jupyter==1.0.0
46
+ jupyter-client==8.6.0
47
+ jupyter-console==6.6.3
48
+ jupyter-core==5.5.0
49
+ jupyter-events==0.9.0
50
+ jupyter-lsp==2.2.0
51
+ jupyter-server==2.10.1
52
+ jupyter-server-terminals==0.4.4
53
+ jupyterlab==4.0.8
54
+ jupyterlab-pygments==0.2.2
55
+ jupyterlab-server==2.25.1
56
+ jupyterlab-widgets==3.0.9
57
+ MarkupSafe==2.1.3
58
+ matplotlib-inline==0.1.6
59
+ mistune==3.0.2
60
+ mpmath==1.3.0
61
+ multidict==6.0.4
62
+ multiprocess==0.70.15
63
+ nbclient==0.9.0
64
+ nbconvert==7.11.0
65
+ nbformat==5.9.2
66
+ nest-asyncio==1.5.8
67
+ networkx==3.2.1
68
+ notebook==7.0.6
69
+ notebook-shim==0.2.3
70
+ numpy==1.26.2
71
+ nvidia-cublas-cu11==11.10.3.66
72
+ nvidia-cublas-cu12==12.1.3.1
73
+ nvidia-cuda-cupti-cu12==12.1.105
74
+ nvidia-cuda-nvrtc-cu11==11.7.99
75
+ nvidia-cuda-nvrtc-cu12==12.1.105
76
+ nvidia-cuda-runtime-cu11==11.7.99
77
+ nvidia-cuda-runtime-cu12==12.1.105
78
+ nvidia-cudnn-cu11==8.5.0.96
79
+ nvidia-cudnn-cu12==8.9.2.26
80
+ nvidia-cufft-cu12==11.0.2.54
81
+ nvidia-curand-cu12==10.3.2.106
82
+ nvidia-cusolver-cu12==11.4.5.107
83
+ nvidia-cusparse-cu12==12.1.0.106
84
+ nvidia-nccl-cu12==2.18.1
85
+ nvidia-nvjitlink-cu12==12.3.101
86
+ nvidia-nvtx-cu12==12.1.105
87
+ overrides==7.4.0
88
+ packaging==23.2
89
+ pandas==2.1.3
90
+ pandocfilters==1.5.0
91
+ parso==0.8.3
92
+ peft==0.4.0
93
+ pexpect==4.8.0
94
+ platformdirs==4.0.0
95
+ prometheus-client==0.18.0
96
+ prompt-toolkit==3.0.41
97
+ psutil==5.9.6
98
+ ptyprocess==0.7.0
99
+ pure-eval==0.2.2
100
+ pyarrow==14.0.1
101
+ pyarrow-hotfix==0.5
102
+ pycparser==2.21
103
+ Pygments==2.16.1
104
+ python-dateutil==2.8.2
105
+ python-json-logger==2.0.7
106
+ pytz==2023.3.post1
107
+ PyYAML==6.0.1
108
+ pyzmq==25.1.1
109
+ qtconsole==5.5.1
110
+ QtPy==2.4.1
111
+ referencing==0.31.0
112
+ regex==2023.10.3
113
+ requests==2.31.0
114
+ rfc3339-validator==0.1.4
115
+ rfc3986-validator==0.1.1
116
+ rpds-py==0.12.0
117
+ safetensors==0.4.0
118
+ scipy==1.11.4
119
+ Send2Trash==1.8.2
120
+ six==1.16.0
121
+ sniffio==1.3.0
122
+ soupsieve==2.5
123
+ stack-data==0.6.3
124
+ sympy==1.12
125
+ terminado==0.18.0
126
+ tinycss2==1.2.1
127
+ tokenizers==0.13.3
128
+ tomli==2.0.1
129
+ torch==1.13.1
130
+ tornado==6.3.3
131
+ tqdm==4.66.1
132
+ traitlets==5.13.0
133
+ transformers==4.31.0
134
+ triton==2.1.0
135
+ trl==0.4.7
136
+ types-python-dateutil==2.8.19.14
137
+ typing-extensions==4.8.0
138
+ tzdata==2023.3
139
+ uri-template==1.3.0
140
+ urllib3==2.1.0
141
+ wcwidth==0.2.10
142
+ webcolors==1.13
143
+ webencodings==0.5.1
144
+ websocket-client==1.6.4
145
+ widgetsnbextension==4.0.9
146
+ xxhash==3.4.1
147
+ yarl==1.9.2
148
+ zipp==3.17.0