bala1802 commited on
Commit
a36cb22
1 Parent(s): 2c8a9ea

Upload 6 files

Browse files
Files changed (6) hide show
  1. adapter_utils.py +14 -0
  2. config.py +53 -0
  3. data_utils.py +35 -0
  4. model_utils.py +48 -0
  5. quantization_utils.py +13 -0
  6. requirements.txt +10 -0
adapter_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peft import LoraConfig
2
+
3
+ import config
4
+
5
+ def load_adapter(target_modules):
6
+ peft_config = LoraConfig(
7
+ lora_alpha=config.LORA_ALPHA,
8
+ lora_dropout=config.LORA_DROPOUT,
9
+ r = config.LORA_RANK,
10
+ bias="none",
11
+ task_type=config.TASK_TYPE,
12
+ target_modules=target_modules
13
+ )
14
+ return peft_config
config.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Data Configuration
3
+ '''
4
+ DATASET = "OpenAssistant/oasst1"
5
+ DATASET_TEXT_FIELD = "prompt_response"
6
+
7
+ '''
8
+ Model Configuration
9
+ '''
10
+ MODEL_NAME = "microsoft/phi-2"
11
+ TRUST_REMOTE_CODE = True
12
+ ENABLE_MODEL_CONFIG_CACHE = False
13
+
14
+ '''
15
+ Quantization Configuration
16
+ '''
17
+ ENABLE_4BIT = True
18
+ QUANTIZATION_TYPE = "nf4"
19
+
20
+ '''
21
+ Adapter Configuration
22
+ '''
23
+ LORA_ALPHA = 16
24
+ LORA_DROPOUT = 0.1
25
+ LORA_RANK = 64
26
+ TASK_TYPE = "CAUSAL_LM"
27
+
28
+ '''
29
+ Model Training Configuration
30
+ '''
31
+ MODEL_OUTPUT_DIR = "results/"
32
+ PER_DEVICE_TRAIN_BATCH_SIZE = 4
33
+ GRADIENT_ACCUMULATION_STEPS = 4
34
+ OPTIM = "paged_adamw_32bit"
35
+ SAVE_STEPS = 100
36
+ LOGGING_STEPS = 10
37
+ LEARNING_RATE = 2e-4
38
+ MAX_GRAD_NORM = 0.3
39
+ MAX_STEPS = 700
40
+ WARMUP_RATIO = 0.05
41
+ LR_SCHEDULER_TYPE = "constant"
42
+ ENABLE_FP_16 = True
43
+ ENABLE_GRADIENT_CHECKPOINTING=False
44
+
45
+ '''
46
+ Model Trainer Configuration
47
+ '''
48
+ MAX_SEQ_LENGTH = 512
49
+
50
+ '''
51
+ Inference Configuration
52
+ '''
53
+ TASK = "text-generation"
data_utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ from datasets import load_dataset
3
+
4
+ import config
5
+
6
+ def download(mode):
7
+ print("Downloading Dataset - ", config.DATASET, "...")
8
+ dataset = load_dataset(config.DATASET, split=mode)
9
+ return dataset
10
+
11
+ def prepare_prompts_responses(dataset):
12
+ print("Preparing Prompt and Assistant....")
13
+ dataset_df = dataset.to_pandas()
14
+ user_prompters = dataset_df[(dataset_df.role=="prompter")]
15
+ user_prompters = user_prompters.set_index("message_id")
16
+ assistants = dataset_df[(dataset_df.role=="assistant") & (dataset_df["rank"] == 0.0)]
17
+
18
+ prompts_responses = []
19
+ for _,record in assistants.iterrows():
20
+ prompt_text = user_prompters.loc[record.parent_id,'text']
21
+ prompt_response = "### Human: " + prompt_text + " ### Assistant: " + record['text']
22
+ prompts_responses.append(prompt_response)
23
+ assistants[config.DATASET_TEXT_FIELD] = prompts_responses
24
+
25
+ return assistants
26
+
27
+ def preparedata(mode):
28
+ print("Preparing data for - ", mode, "...")
29
+ dataset = download(mode=mode)
30
+ prompts_responses = prepare_prompts_responses(dataset)
31
+ prompts_responses_dataset = datasets.Dataset.from_pandas(prompts_responses)
32
+ return prompts_responses_dataset
33
+
34
+
35
+
model_utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from trl import SFTTrainer
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
3
+
4
+ import config
5
+
6
+ def load_model(quantization_config):
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ config.MODEL_NAME,
9
+ quantization_config = quantization_config,
10
+ trust_remote_code = config.TRUST_REMOTE_CODE
11
+ )
12
+ model.config.use_cache = config.ENABLE_MODEL_CONFIG_CACHE
13
+ return model
14
+
15
+ def load_tokenizers():
16
+ tokenizer = AutoTokenizer.from_pretrained(
17
+ config.MODEL_NAME,
18
+ trust_remote_code=config.TRUST_REMOTE_CODE)
19
+ return tokenizer
20
+
21
+ def load_training_arguments():
22
+ training_arguments = TrainingArguments(
23
+ output_dir=config.MODEL_OUTPUT_DIR,
24
+ per_device_train_batch_size=config.PER_DEVICE_TRAIN_BATCH_SIZE,
25
+ gradient_accumulation_steps=config.GRADIENT_ACCUMULATION_STEPS,
26
+ optim=config.OPTIM,
27
+ save_steps=config.SAVE_STEPS,
28
+ logging_steps=config.LOGGING_STEPS,
29
+ learning_rate=config.LEARNING_RATE,
30
+ fp16=config.ENABLE_FP_16,
31
+ max_grad_norm=config.MAX_GRAD_NORM,
32
+ max_steps=config.MAX_STEPS,
33
+ warmup_ratio=config.WARMUP_RATIO,
34
+ gradient_checkpointing=config.ENABLE_GRADIENT_CHECKPOINTING
35
+ )
36
+ return training_arguments
37
+
38
+ def load_trainer(model, training_dataset, peft_config, tokenizer, training_arguments):
39
+ trainer = SFTTrainer(
40
+ model = model,
41
+ train_dataset = training_dataset,
42
+ peft_config = peft_config,
43
+ dataset_text_field = config.DATASET_TEXT_FIELD,
44
+ max_seq_length = config.MAX_SEQ_LENGTH,
45
+ tokenizer = tokenizer,
46
+ args = training_arguments
47
+ )
48
+ return trainer
quantization_utils.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import BitsAndBytesConfig
3
+
4
+ import config
5
+
6
+ def load_bits_and_bytes_config():
7
+ bnb_config = BitsAndBytesConfig(
8
+ load_in_4bit=config.ENABLE_4BIT,
9
+ bnb_4bit_quant_type=config.QUANTIZATION_TYPE,
10
+ bnb_4bit_compute_dtype=torch.float16
11
+ )
12
+
13
+ return bnb_config
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ trl
4
+ transformers
5
+ accelerate
6
+ peft
7
+ einops
8
+ datasets
9
+ bitsandbytes
10
+ scipy