Ashishkr commited on
Commit
b2cb6fd
1 Parent(s): 5e7c6b0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +107 -1
README.md CHANGED
@@ -19,7 +19,9 @@ widget:
19
  library_name: peft
20
  pipeline_tag: text-generation
21
  ---
 
22
 
 
23
 
24
  ```python
25
  import transformers
@@ -129,4 +131,108 @@ response = llama_generate(
129
  print(response)
130
 
131
 
132
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  library_name: peft
20
  pipeline_tag: text-generation
21
  ---
22
+ llama-2-7b-hf model finetuned for medical consultation. Works on T4 GPU (16GB VRAM), as well as CPU (32GB RAM)
23
 
24
+ **To run on GPU :**
25
 
26
  ```python
27
  import transformers
 
131
  print(response)
132
 
133
 
134
+ ```
135
+
136
+ **To run on CPU**
137
+
138
+
139
+ ```python
140
+
141
+
142
+ import torch
143
+ import transformers
144
+ from torch import cuda, bfloat16
145
+ from peft import PeftModel, PeftConfig
146
+ from transformers import AutoModelForCausalLM, AutoTokenizer
147
+
148
+
149
+ base_model_id = 'meta-llama/Llama-2-7b-chat-hf'
150
+
151
+ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
152
+
153
+ bnb_config = transformers.BitsAndBytesConfig(
154
+ llm_int8_enable_fp32_cpu_offload = True
155
+ )
156
+
157
+ import torch
158
+ hf_auth = "YOUR-HUGGINGFACE-ACCESS-TOKEN"
159
+ model_config = transformers.AutoConfig.from_pretrained(
160
+ base_model_id,
161
+ use_auth_token=hf_auth
162
+ )
163
+
164
+ model = transformers.AutoModelForCausalLM.from_pretrained(
165
+ base_model_id,
166
+ trust_remote_code=True,
167
+ config=model_config,
168
+ quantization_config=bnb_config,
169
+ # device_map='auto',
170
+ use_auth_token=hf_auth
171
+ )
172
+
173
+ config = PeftConfig.from_pretrained("Ashishkr/llama-2-medical-consultation")
174
+ model = PeftModel.from_pretrained(model, "Ashishkr/llama-2-medical-consultation").to(device)
175
+
176
+ model.eval()
177
+ print(f"Model loaded on {device}")
178
+
179
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
180
+ base_model_id,
181
+ use_auth_token=hf_auth
182
+ )
183
+
184
+ def llama_generate(
185
+ model: AutoModelForCausalLM,
186
+ tokenizer: AutoTokenizer,
187
+ prompt: str,
188
+ max_new_tokens: int = 128,
189
+ temperature: float = 0.92):
190
+
191
+ inputs = tokenizer(
192
+ [prompt],
193
+ return_tensors="pt",
194
+ return_token_type_ids=False,
195
+ ).to(
196
+ device
197
+ )
198
+
199
+ # Check if bfloat16 is supported, otherwise use float16
200
+ dtype_to_use = torch.float32
201
+ with torch.autocast("cuda", dtype=dtype_to_use):
202
+ response = model.generate(
203
+ **inputs,
204
+ max_new_tokens=max_new_tokens,
205
+ temperature=temperature,
206
+ return_dict_in_generate=True,
207
+ eos_token_id=tokenizer.eos_token_id,
208
+ pad_token_id=tokenizer.pad_token_id,
209
+ )
210
+
211
+ decoded_output = tokenizer.decode(
212
+ response["sequences"][0],
213
+ skip_special_tokens=True,
214
+ )
215
+
216
+ return decoded_output[len(prompt) :]
217
+
218
+ prompt = """
219
+ instruction: "If you are a doctor, please answer the medical questions based on the patient's description." \n
220
+
221
+ input: "Hi, I had a subarachnoid bleed and coiling of brain aneurysm last year.
222
+ I am having some major bilateral temple pain along with numbness that comes and
223
+ goes in my left arm/hand/fingers. I have had headaches since the aneurysm,
224
+ but this is different. Also, my moods have been horrible for the past few weeks.\n
225
+
226
+ response: """
227
+ # You can use the function as before
228
+ response = llama_generate(
229
+ model,
230
+ tokenizer,
231
+ prompt,
232
+ max_new_tokens=100,
233
+ temperature=0.92,
234
+ )
235
+
236
+ print(response)
237
+
238
+ ```