adds code for finetuning llama2

Browse files

Files changed (3) hide show

README.md +45 -0
makebelieve.py +248 -0
requirements.txt +148 -0

README.md CHANGED Viewed

@@ -1,3 +1,48 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
 ---
+# Finetuning llama 2 on our celebrity news dataset located [here](https://huggingface.co/datasets/2nji/makebelieve-480)
+Disclaimer: This is still work in progress as we need to preprocess our celebrity news dataset to match Llama 2's prompt format as described [here](https://huggingface.co/blog/llama2#how-to-prompt-llama-2)
+## Reserve GPU on g5k
+Log into your Grid5000 account using ssh and run the following code in the terminal
+```script
+oarsub -l gpu=4 -I -q production
+```
+Wait till GPUs are available and assigned to you, if you need more information about g5k, you can refer to [here](https://www.grid5000.fr/w/Getting_Started)
+### Create a virtual environment
+- Installing PIP
+```script
+pip install virtualenv
+```
+- Creating environment
+```script
+virtualenv venv
+```
+- Activating environment
+```script
+source venv/bin/activate
+```
+## Install requirements file
+```script
+pip install -r requirements.txt
+```
+## Running the script to finetune Llama-2-7b-chat-hf and push to huggingface model repository
+```script
+python makebelieve.py
+```

makebelieve.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import os
+import gc
+import torch
+from datasets import load_dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    HfArgumentParser,
+    TrainingArguments,
+    pipeline,
+    logging,
+)
+from peft import LoraConfig, PeftModel
+from trl import SFTTrainer
+# The model that you want to train from the Hugging Face hub
+model_name = "NousResearch/Llama-2-7b-chat-hf"
+# The instruction dataset to use
+dataset_name = "2nji/makebelieve-480"
+# Fine-tuned model name
+new_model = "makebelieve"
+################################################################################
+# QLoRA parameters
+################################################################################
+# LoRA attention dimension
+lora_r = 64
+# Alpha parameter for LoRA scaling
+lora_alpha = 16
+# Dropout probability for LoRA layers
+lora_dropout = 0.1
+################################################################################
+# bitsandbytes parameters
+################################################################################
+# Activate 4-bit precision base model loading
+use_4bit = True
+# Compute dtype for 4-bit base models
+bnb_4bit_compute_dtype = "float16"
+# Quantization type (fp4 or nf4)
+bnb_4bit_quant_type = "nf4"
+# Activate nested quantization for 4-bit base models (double quantization)
+use_nested_quant = False
+################################################################################
+# TrainingArguments parameters
+################################################################################
+# Output directory where the model predictions and checkpoints will be stored
+output_dir = "./results"
+# Number of training epochs
+num_train_epochs = 1
+# Enable fp16/bf16 training (set bf16 to True with an A100)
+fp16 = False
+bf16 = False
+# Batch size per GPU for training
+per_device_train_batch_size = 1
+# Batch size per GPU for evaluation
+per_device_eval_batch_size = 1
+# Number of update steps to accumulate the gradients for
+gradient_accumulation_steps = 1
+# Enable gradient checkpointing
+gradient_checkpointing = True
+# Maximum gradient normal (gradient clipping)
+max_grad_norm = 0.3
+# Initial learning rate (AdamW optimizer)
+learning_rate = 2e-4
+# Weight decay to apply to all layers except bias/LayerNorm weights
+weight_decay = 0.001
+# Optimizer to use
+optim = "paged_adamw_32bit"
+# Learning rate schedule
+lr_scheduler_type = "cosine"
+# Number of training steps (overrides num_train_epochs)
+max_steps = -1
+# Ratio of steps for a linear warmup (from 0 to learning rate)
+warmup_ratio = 0.03
+# Group sequences into batches with same length
+# Saves memory and speeds up training considerably
+group_by_length = True
+# Save checkpoint every X updates steps
+save_steps = 0
+# Log every X updates steps
+logging_steps = 25
+################################################################################
+# SFT parameters
+################################################################################
+# Maximum sequence length to use
+max_seq_length = None
+# Pack multiple short examples in the same input sequence to increase efficiency
+packing = False
+# Load the entire model on the GPU 0
+# device_map = {"": 0}
+device_map = "auto"
+# Load dataset (you can process it here)
+dataset = load_dataset(dataset_name, split="train")
+# Load tokenizer and model with QLoRA configuration
+compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=use_4bit,
+    bnb_4bit_quant_type=bnb_4bit_quant_type,
+    bnb_4bit_compute_dtype=compute_dtype,
+    bnb_4bit_use_double_quant=use_nested_quant,
+)
+# Check GPU compatibility with bfloat16
+if compute_dtype == torch.float16 and use_4bit:
+    major, _ = torch.cuda.get_device_capability()
+    if major >= 8:
+        print("=" * 80)
+        print("Your GPU supports bfloat16: accelerate training with bf16=True")
+        print("=" * 80)
+# Load base model
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
+    device_map=device_map
+)
+model.config.use_cache = False
+model.config.pretraining_tp = 1
+# Load LLaMA tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right"  # Fix weird overflow issue with fp16 training
+# Load LoRA configuration
+peft_config = LoraConfig(
+    lora_alpha=lora_alpha,
+    lora_dropout=lora_dropout,
+    r=lora_r,
+    bias="none",
+    task_type="CAUSAL_LM",
+)
+# Set training parameters
+training_arguments = TrainingArguments(
+    output_dir=output_dir,
+    num_train_epochs=num_train_epochs,
+    per_device_train_batch_size=per_device_train_batch_size,
+    gradient_accumulation_steps=gradient_accumulation_steps,
+    optim=optim,
+    save_steps=save_steps,
+    logging_steps=logging_steps,
+    learning_rate=learning_rate,
+    weight_decay=weight_decay,
+    fp16=fp16,
+    bf16=bf16,
+    max_grad_norm=max_grad_norm,
+    max_steps=max_steps,
+    warmup_ratio=warmup_ratio,
+    group_by_length=group_by_length,
+    lr_scheduler_type=lr_scheduler_type,
+    report_to="tensorboard"
+)
+# Set supervised fine-tuning parameters
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=dataset,
+    peft_config=peft_config,
+    dataset_text_field="text",
+    max_seq_length=max_seq_length,
+    tokenizer=tokenizer,
+    args=training_arguments,
+    packing=packing,
+)
+# Train model
+trainer.train()
+# Save trained model
+trainer.model.save_pretrained(new_model)
+# %load_ext tensorboard
+# %tensorboard --logdir results/runs
+# Ignore warnings
+logging.set_verbosity(logging.CRITICAL)
+# Run text generation pipeline with our next model
+prompt = "What did taylor swift do?"
+pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
+result = pipe(f"<s>[INST] {prompt} [/INST]")
+print(result[0]['generated_text'])
+# Empty VRAM
+del model
+del pipe
+del trainer
+gc.collect()
+gc.collect()
+# Reload model in FP16 and merge it with LoRA weights
+base_model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    low_cpu_mem_usage=True,
+    return_dict=True,
+    torch_dtype=torch.float16,
+    device_map=device_map,
+)
+model = PeftModel.from_pretrained(base_model, new_model)
+model = model.merge_and_unload()
+# Reload tokenizer to save it
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right"
+# Save model and tokenizer
+# Do not forget to add your token here
+model.push_to_hub(new_model, use_temp_dir=False, token="...")
+tokenizer.push_to_hub(new_model, use_temp_dir=False, token="...")
+print("end of makebelieve.py")

requirements.txt ADDED Viewed

	@@ -0,0 +1,148 @@

+accelerate==0.21.0
+aiohttp==3.9.0
+aiosignal==1.3.1
+anyio==4.0.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.1.0
+Babel==2.13.1
+beautifulsoup4==4.12.2
+bitsandbytes==0.40.2
+bleach==6.1.0
+certifi==2023.7.22
+cffi==1.16.0
+charset-normalizer==3.3.2
+comm==0.2.0
+datasets==2.15.0
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.7
+exceptiongroup==1.1.3
+executing==2.0.1
+fastjsonschema==2.19.0
+filelock==3.13.1
+fqdn==1.5.1
+frozenlist==1.4.0
+fsspec==2023.10.0
+huggingface-hub==0.19.4
+idna==3.4
+importlib-metadata==6.8.0
+ipykernel==6.26.0
+ipython==8.17.2
+ipywidgets==8.1.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.19.2
+jsonschema-specifications==2023.11.1
+jupyter==1.0.0
+jupyter-client==8.6.0
+jupyter-console==6.6.3
+jupyter-core==5.5.0
+jupyter-events==0.9.0
+jupyter-lsp==2.2.0
+jupyter-server==2.10.1
+jupyter-server-terminals==0.4.4
+jupyterlab==4.0.8
+jupyterlab-pygments==0.2.2
+jupyterlab-server==2.25.1
+jupyterlab-widgets==3.0.9
+MarkupSafe==2.1.3
+matplotlib-inline==0.1.6
+mistune==3.0.2
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.15
+nbclient==0.9.0
+nbconvert==7.11.0
+nbformat==5.9.2
+nest-asyncio==1.5.8
+networkx==3.2.1
+notebook==7.0.6
+notebook-shim==0.2.3
+numpy==1.26.2
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.3
+pandocfilters==1.5.0
+parso==0.8.3
+peft==0.4.0
+pexpect==4.8.0
+platformdirs==4.0.0
+prometheus-client==0.18.0
+prompt-toolkit==3.0.41
+psutil==5.9.6
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==14.0.1
+pyarrow-hotfix==0.5
+pycparser==2.21
+Pygments==2.16.1
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==25.1.1
+qtconsole==5.5.1
+QtPy==2.4.1
+referencing==0.31.0
+regex==2023.10.3
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.12.0
+safetensors==0.4.0
+scipy==1.11.4
+Send2Trash==1.8.2
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+stack-data==0.6.3
+sympy==1.12
+terminado==0.18.0
+tinycss2==1.2.1
+tokenizers==0.13.3
+tomli==2.0.1
+torch==1.13.1
+tornado==6.3.3
+tqdm==4.66.1
+traitlets==5.13.0
+transformers==4.31.0
+triton==2.1.0
+trl==0.4.7
+types-python-dateutil==2.8.19.14
+typing-extensions==4.8.0
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==2.1.0
+wcwidth==0.2.10
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.6.4
+widgetsnbextension==4.0.9
+xxhash==3.4.1
+yarl==1.9.2
+zipp==3.17.0