tykiww commited on
Commit
df4fd98
1 Parent(s): f830874

Create modeling.py

Browse files
Files changed (1) hide show
  1. utilities/modeling.py +86 -0
utilities/modeling.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ import torch
3
+ from trl import SFTTrainer
4
+ from transformers import TrainingArguments
5
+ from unsloth import is_bfloat16_supported
6
+
7
+
8
+ def load_model(model_name, max_seq_length):
9
+ dtype = None
10
+ load_in_4bit = True
11
+
12
+ model, tokenizer = FastLanguageModel.from_pretrained(
13
+ model_name = model_name,
14
+ max_seq_length = max_seq_length,
15
+ dtype = dtype,
16
+ load_in_4bit = load_in_4bit,
17
+ # token = ""
18
+ )
19
+ return model, tokenizer
20
+
21
+
22
+ def get_peft(model, peft, max_seq_length, random_seed):
23
+
24
+ model = FastLanguageModel.get_peft_model(
25
+ model,
26
+ r = peft['r',]
27
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
28
+ "gate_proj", "up_proj", "down_proj",],
29
+ lora_alpha = peft['alpha'],
30
+ lora_dropout = peft['dropout'],
31
+ bias = peft['bias'],
32
+ use_gradient_checkpointing = "unsloth",
33
+ random_state = random_seed,
34
+ use_rslora = peft['rslora'], # We support rank stabilized LoRA
35
+ loftq_config = peft['loftq_config'], # And LoftQ
36
+ )
37
+ return model
38
+
39
+
40
+ def get_trainer(model, tokenizer, dataset, sft, data_field, max_seq_length, random_seed):
41
+
42
+ trainer = SFTTrainer(
43
+ model = model,
44
+ tokenizer = tokenizer,
45
+ train_dataset = dataset,
46
+ dataset_text_field = data_field,
47
+ max_seq_length = max_seq_length,
48
+ dataset_num_proc = 2,
49
+ packing = False,
50
+ args = TrainingArguments(
51
+ per_device_train_batch_size = sft['per_device_train_batch_size'],
52
+ gradient_accumulation_steps = sft['gradient_accumulation_steps'],
53
+ warmup_steps = sft['warmup_steps'],
54
+ num_train_epochs = num_epochs,
55
+ max_steps = max_steps,
56
+ learning_rate = sft['learning_rate'],
57
+ fp16 = not is_bfloat16_supported(),
58
+ bf16 = is_bfloat16_supported(),
59
+ logging_steps = sft['logging_steps'],
60
+ optim = sft['optim'],
61
+ weight_decay = sft['weight_decay'],
62
+ lr_scheduler_type = sft['lr_scheduler_type'],
63
+ seed = random_seed,
64
+ output_dir = "outputs",
65
+ ),
66
+ )
67
+ return trainer
68
+
69
+
70
+ def prepare_trainer(model_name, max_seq_length, random_seed,
71
+ peft, sft, dataset, data_field):
72
+
73
+ print("Loading Model")
74
+ model, tokenizer = load_model(model_name, max_seq_length)
75
+
76
+ print("Preparing for PEFT")
77
+ model = get_peft(model, peft, max_seq_length, random_seed)
78
+
79
+ print("Getting Trainer Model")
80
+ trainer = get_trainer(model, tokenizer, dataset, data_field, max_seq_length, random_seed)
81
+
82
+ return trainer
83
+
84
+ if __name__ == "__main__":
85
+ trainer = prepare_trainer()
86
+