DR-Rakshitha commited on
Commit
92e88c4
1 Parent(s): 39db6a3

create appp.py

Browse files
Files changed (1) hide show
  1. app.py +188 -0
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import torch
4
+ from datasets import load_dataset
5
+ from transformers import (
6
+ AutoModelForCausalLM,
7
+ AutoTokenizer,
8
+ BitsAndBytesConfig,
9
+ HfArgumentParser,
10
+ TrainingArguments,
11
+ pipeline,
12
+ logging,
13
+ )
14
+ from peft import LoraConfig, PeftModel
15
+ from trl import SFTTrainer
16
+
17
+ # The model that you want to train from the Hugging Face hub
18
+ model_name = "DR-DRR/Model_001"
19
+ ################################################################################
20
+ # QLoRA parameters
21
+ ################################################################################
22
+
23
+ # LoRA attention dimension
24
+ lora_r = 64
25
+
26
+ # Alpha parameter for LoRA scaling
27
+ lora_alpha = 16
28
+
29
+ # Dropout probability for LoRA layers
30
+ lora_dropout = 0.1
31
+
32
+ ################################################################################
33
+ # bitsandbytes parameters
34
+ ################################################################################
35
+
36
+ # Activate 4-bit precision base model loading
37
+ use_4bit = True
38
+
39
+ # Compute dtype for 4-bit base models
40
+ bnb_4bit_compute_dtype = "float16"
41
+
42
+ # Quantization type (fp4 or nf4)
43
+ bnb_4bit_quant_type = "nf4"
44
+
45
+ # Activate nested quantization for 4-bit base models (double quantization)
46
+ use_nested_quant = False
47
+
48
+ ################################################################################
49
+ # TrainingArguments parameters
50
+ ################################################################################
51
+
52
+ # Output directory where the model predictions and checkpoints will be stored
53
+ output_dir = "./results"
54
+
55
+ # Number of training epochs
56
+ num_train_epochs = 0.1
57
+
58
+ # Enable fp16/bf16 training (set bf16 to True with an A100)
59
+ fp16 = False
60
+ bf16 = False
61
+
62
+ # Batch size per GPU for training
63
+ per_device_train_batch_size = 4
64
+
65
+ # Batch size per GPU for evaluation
66
+ per_device_eval_batch_size = 4
67
+
68
+ # Number of update steps to accumulate the gradients for
69
+ gradient_accumulation_steps = 1
70
+
71
+ # Enable gradient checkpointing
72
+ gradient_checkpointing = True
73
+
74
+ # Maximum gradient normal (gradient clipping)
75
+ max_grad_norm = 0.3
76
+
77
+ # Initial learning rate (AdamW optimizer)
78
+ learning_rate = 2e-4
79
+
80
+ # Weight decay to apply to all layers except bias/LayerNorm weights
81
+ weight_decay = 0.001
82
+
83
+ # Optimizer to use
84
+ optim = "paged_adamw_32bit"
85
+
86
+ # Learning rate schedule
87
+ lr_scheduler_type = "cosine"
88
+
89
+ # Number of training steps (overrides num_train_epochs)
90
+ max_steps = -1
91
+
92
+ # Ratio of steps for a linear warmup (from 0 to learning rate)
93
+ warmup_ratio = 0.03
94
+
95
+ # Group sequences into batches with same length
96
+ # Saves memory and speeds up training considerably
97
+ group_by_length = True
98
+
99
+ # Save checkpoint every X updates steps
100
+ save_steps = 0
101
+
102
+ # Log every X updates steps
103
+ logging_steps = 25
104
+
105
+ ################################################################################
106
+ # SFT parameters
107
+ ################################################################################
108
+
109
+ # Maximum sequence length to use
110
+ max_seq_length = None
111
+
112
+ # Pack multiple short examples in the same input sequence to increase efficiency
113
+ packing = False
114
+
115
+ # Load the entire model on the GPU 0
116
+ device_map = {"": 0}
117
+
118
+ # Parameter end
119
+ #load model
120
+
121
+ # Load tokenizer and model with QLoRA configuration
122
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
123
+
124
+ bnb_config = BitsAndBytesConfig(
125
+ load_in_4bit=use_4bit,
126
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
127
+ bnb_4bit_compute_dtype=compute_dtype,
128
+ bnb_4bit_use_double_quant=use_nested_quant,
129
+ )
130
+
131
+ # Check GPU compatibility with bfloat16
132
+ if compute_dtype == torch.float16 and use_4bit:
133
+ major, _ = torch.cuda.get_device_capability()
134
+ if major >= 8:
135
+ print("=" * 80)
136
+ print("Your GPU supports bfloat16: accelerate training with bf16=True")
137
+ print("=" * 80)
138
+
139
+ # Load base model
140
+ model = AutoModelForCausalLM.from_pretrained(
141
+ model_name,
142
+ quantization_config=bnb_config,
143
+ device_map=device_map
144
+ )
145
+ model.config.use_cache = False
146
+ model.config.pretraining_tp = 1
147
+
148
+ # Load LLaMA tokenizer
149
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
150
+ tokenizer.pad_token = tokenizer.eos_token
151
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
152
+
153
+ # Load LoRA configuration
154
+ peft_config = LoraConfig(
155
+ lora_alpha=lora_alpha,
156
+ lora_dropout=lora_dropout,
157
+ r=lora_r,
158
+ bias="none",
159
+ task_type="CAUSAL_LM",
160
+ )
161
+
162
+ # End model
163
+
164
+
165
+ # Specify the local path to the downloaded model file
166
+ # model_path = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
167
+
168
+ # Initialize the model using the local path
169
+ # model = GPT4All(model_path)
170
+
171
+ def generate_text(prompt):
172
+ # result = model.generate(prompt)
173
+ # return result
174
+ logging.set_verbosity(logging.CRITICAL)
175
+ # prompt = input()
176
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=100)
177
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
178
+ output = result[0]['generated_text']
179
+ return output
180
+
181
+ text_generation_interface = gr.Interface(
182
+ fn=generate_text,
183
+ inputs=[
184
+ gr.inputs.Textbox(label="Input Text"),
185
+ ],
186
+ outputs=gr.outputs.Textbox(label="Generated Text"),
187
+ title="Medibot Text Generation",
188
+ ).launch()