Text Generation
PEFT
Safetensors
llama-2
Eval Results
dfurman commited on
Commit
b840e0a
1 Parent(s): 471a5ef

Delete assets/basic_inference_llama_2_70b_dolphin.ipynb

Browse files
assets/basic_inference_llama_2_70b_dolphin.ipynb DELETED
@@ -1,355 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "6f46e840-8a7f-4be2-a082-49b9ebf5a8c5",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "\n",
14
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
15
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
16
- ]
17
- }
18
- ],
19
- "source": [
20
- "!pip install -q -U huggingface_hub peft transformers torch accelerate\n"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 2,
26
- "id": "2d2918a1-d701-4a66-946c-6f668cb4ac1e",
27
- "metadata": {},
28
- "outputs": [
29
- {
30
- "name": "stdout",
31
- "output_type": "stream",
32
- "text": [
33
- "Mon Jul 24 21:41:13 2023 \n",
34
- "+-----------------------------------------------------------------------------+\n",
35
- "| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |\n",
36
- "|-------------------------------+----------------------+----------------------+\n",
37
- "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
38
- "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
39
- "| | | MIG M. |\n",
40
- "|===============================+======================+======================|\n",
41
- "| 0 NVIDIA H100 PCIe On | 00000000:06:00.0 Off | 0 |\n",
42
- "| N/A 39C P0 52W / 350W | 0MiB / 81559MiB | 0% Default |\n",
43
- "| | | Disabled |\n",
44
- "+-------------------------------+----------------------+----------------------+\n",
45
- " \n",
46
- "+-----------------------------------------------------------------------------+\n",
47
- "| Processes: |\n",
48
- "| GPU GI CI PID Type Process name GPU Memory |\n",
49
- "| ID ID Usage |\n",
50
- "|=============================================================================|\n",
51
- "| No running processes found |\n",
52
- "+-----------------------------------------------------------------------------+\n"
53
- ]
54
- }
55
- ],
56
- "source": [
57
- "!nvidia-smi"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": 3,
63
- "id": "0afdf8a6-ea7d-44ab-a1f9-a19e550e9dbd",
64
- "metadata": {},
65
- "outputs": [
66
- {
67
- "name": "stderr",
68
- "output_type": "stream",
69
- "text": [
70
- "/home/ubuntu/.local/lib/python3.8/site-packages/pandas/core/computation/expressions.py:20: UserWarning: Pandas requires version '2.7.3' or newer of 'numexpr' (version '2.7.1' currently installed).\n",
71
- " from pandas.core.computation.check import NUMEXPR_INSTALLED\n"
72
- ]
73
- }
74
- ],
75
- "source": [
76
- "import torch\n",
77
- "from peft import PeftModel, PeftConfig\n",
78
- "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig"
79
- ]
80
- },
81
- {
82
- "cell_type": "code",
83
- "execution_count": 4,
84
- "id": "adfcd11e-8d98-4cf3-abf4-e9fa933eb0d6",
85
- "metadata": {},
86
- "outputs": [
87
- {
88
- "data": {
89
- "application/vnd.jupyter.widget-view+json": {
90
- "model_id": "7dc80313fdcd41a5a7ee168956df3dd9",
91
- "version_major": 2,
92
- "version_minor": 0
93
- },
94
- "text/plain": [
95
- "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
96
- ]
97
- },
98
- "metadata": {},
99
- "output_type": "display_data"
100
- }
101
- ],
102
- "source": [
103
- "from huggingface_hub import notebook_login\n",
104
- "\n",
105
- "notebook_login()"
106
- ]
107
- },
108
- {
109
- "cell_type": "code",
110
- "execution_count": 5,
111
- "id": "82cfa4fb-af16-4927-82c4-1fbf0fa84bfa",
112
- "metadata": {},
113
- "outputs": [
114
- {
115
- "name": "stderr",
116
- "output_type": "stream",
117
- "text": [
118
- "/home/ubuntu/.local/lib/python3.8/site-packages/transformers/modeling_utils.py:2193: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
119
- " warnings.warn(\n"
120
- ]
121
- },
122
- {
123
- "data": {
124
- "application/vnd.jupyter.widget-view+json": {
125
- "model_id": "d0f18088e32f4d4b857d2de5430528d4",
126
- "version_major": 2,
127
- "version_minor": 0
128
- },
129
- "text/plain": [
130
- "Loading checkpoint shards: 0%| | 0/15 [00:00<?, ?it/s]"
131
- ]
132
- },
133
- "metadata": {},
134
- "output_type": "display_data"
135
- }
136
- ],
137
- "source": [
138
- "# peft_model_id = \"results/checkpoint-12500\"\n",
139
- "peft_model_id = \"dfurman/llama-2-70b-dolphin-peft\"\n",
140
- "config = PeftConfig.from_pretrained(peft_model_id)\n",
141
- "\n",
142
- "bnb_config = BitsAndBytesConfig(\n",
143
- " load_in_4bit=True,\n",
144
- " bnb_4bit_quant_type=\"nf4\",\n",
145
- " bnb_4bit_compute_dtype=torch.bfloat16,\n",
146
- ")\n",
147
- "\n",
148
- "model = AutoModelForCausalLM.from_pretrained(\n",
149
- " config.base_model_name_or_path,\n",
150
- " quantization_config=bnb_config,\n",
151
- " use_auth_token=True,\n",
152
- " torch_dtype=torch.bfloat16,\n",
153
- " device_map=\"auto\",\n",
154
- ")\n",
155
- "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
156
- "tokenizer.pad_token = tokenizer.eos_token\n",
157
- "\n",
158
- "# Load the Lora model\n",
159
- "model = PeftModel.from_pretrained(model, peft_model_id)"
160
- ]
161
- },
162
- {
163
- "cell_type": "code",
164
- "execution_count": 6,
165
- "id": "d86f6a79-95f2-4e05-9bc7-3cbcbbbc9552",
166
- "metadata": {},
167
- "outputs": [],
168
- "source": [
169
- "# text generation function\n",
170
- "\n",
171
- "\n",
172
- "def llama_generate(\n",
173
- " model: AutoModelForCausalLM,\n",
174
- " tokenizer: AutoTokenizer,\n",
175
- " prompt: str,\n",
176
- " max_new_tokens: int = 128,\n",
177
- " temperature: int = 1.0,\n",
178
- ") -> str:\n",
179
- " \"\"\"\n",
180
- " Initialize the pipeline\n",
181
- " Uses Hugging Face GenerationConfig defaults\n",
182
- " https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig\n",
183
- " Args:\n",
184
- " model (transformers.AutoModelForCausalLM): Falcon model for text generation\n",
185
- " tokenizer (transformers.AutoTokenizer): Tokenizer for model\n",
186
- " prompt (str): Prompt for text generation\n",
187
- " max_new_tokens (int, optional): Max new tokens after the prompt to generate. Defaults to 128.\n",
188
- " temperature (float, optional): The value used to modulate the next token probabilities.\n",
189
- " Defaults to 1.0\n",
190
- " \"\"\"\n",
191
- " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
192
- "\n",
193
- " inputs = tokenizer(\n",
194
- " [prompt],\n",
195
- " return_tensors=\"pt\",\n",
196
- " return_token_type_ids=False,\n",
197
- " ).to(\n",
198
- " device\n",
199
- " ) # tokenize inputs, load on device\n",
200
- "\n",
201
- " # when running Torch modules in lower precision, it is best practice to use the torch.autocast context manager.\n",
202
- " with torch.autocast(\"cuda\", dtype=torch.bfloat16):\n",
203
- " response = model.generate(\n",
204
- " **inputs,\n",
205
- " max_new_tokens=max_new_tokens,\n",
206
- " temperature=temperature,\n",
207
- " return_dict_in_generate=True,\n",
208
- " eos_token_id=tokenizer.eos_token_id,\n",
209
- " pad_token_id=tokenizer.pad_token_id,\n",
210
- " )\n",
211
- "\n",
212
- " decoded_output = tokenizer.decode(\n",
213
- " response[\"sequences\"][0],\n",
214
- " skip_special_tokens=True,\n",
215
- " ) # grab output in natural language\n",
216
- "\n",
217
- " return decoded_output[len(prompt) :] # remove prompt from output"
218
- ]
219
- },
220
- {
221
- "cell_type": "code",
222
- "execution_count": 7,
223
- "id": "28be263a-dd15-419f-a67e-7ca05b27435f",
224
- "metadata": {},
225
- "outputs": [
226
- {
227
- "name": "stdout",
228
- "output_type": "stream",
229
- "text": [
230
- "Sure! Here's a delicious and easy vegan banana bread recipe:\n",
231
- "\n",
232
- "Ingredients:\n",
233
- "- 2 cups all-purpose flour\n",
234
- "- 1/2 cup sugar\n",
235
- "- 1/2 cup vegan butter (such as Earth Balance)\n",
236
- "- 1/2 cup vegan milk (such as almond milk)\n",
237
- "- 1/2 cup unsweetened applesauce\n",
238
- "- 1/2 cup mashed ripe bananas (about 2 medium bananas)\n",
239
- "- 1 teaspoon baking soda\n",
240
- "- 1/2 teaspoon salt\n",
241
- "- 1/2 teaspoon ground cinnamon\n",
242
- "- 1/2 teaspoon ground nutmeg\n",
243
- "- 1/2 teaspoon ground cloves\n",
244
- "- 1/2 cup chopped walnuts (optional)\n",
245
- "\n",
246
- "Instructions:\n",
247
- "1. Preheat the oven to 350°F (175°C). Grease a 9x5-inch loaf pan with vegan butter or cooking spray.\n",
248
- "2. In a large bowl, mix together the flour, sugar, vegan butter, vegan milk, applesauce, bananas, baking soda, salt, cinnamon, nutmeg, and cloves. Stir until well combined.\n",
249
- "3. Fold in the chopped walnuts, if using.\n",
250
- "4. Pour the batter into the prepared loaf pan.\n",
251
- "5. Bake for 50-60 minutes, or until a toothpick inserted into the center of the bread comes out clean.\n",
252
- "6. Let the bread cool in the pan for 10 minutes before transferring it to a wire rack to cool completely.\n",
253
- "7. Slice and enjoy!\n",
254
- "\n",
255
- "Note: You can also add chocolate chips, dried fruit, or other mix-ins to the batter for extra flavor and texture. Enjoy your vegan banana bread!\n",
256
- "\n",
257
- "\n",
258
- "\n",
259
- "\n",
260
- "\n",
261
- "\n",
262
- "\n",
263
- "\n",
264
- "\n",
265
- "\n",
266
- "\n",
267
- "\n",
268
- "\n",
269
- "\n",
270
- "\n",
271
- "\n",
272
- "\n",
273
- "\n",
274
- "\n",
275
- "\n",
276
- "\n",
277
- "\n",
278
- "\n",
279
- "\n",
280
- "\n",
281
- "\n",
282
- "\n",
283
- "\n",
284
- "\n",
285
- "\n",
286
- "\n",
287
- "\n",
288
- "\n",
289
- "\n",
290
- "\n",
291
- "\n",
292
- "\n",
293
- "\n",
294
- "\n",
295
- "\n",
296
- "\n",
297
- "\n",
298
- "\n",
299
- "\n",
300
- "\n",
301
- "\n",
302
- "\n",
303
- "\n",
304
- "\n",
305
- "\n",
306
- "\n",
307
- "\n"
308
- ]
309
- }
310
- ],
311
- "source": [
312
- "prompt = \"You are a helpful assistant. Tell me a recipe for vegan banana bread.\\n\"\n",
313
- "\n",
314
- "response = llama_generate(\n",
315
- " model,\n",
316
- " tokenizer,\n",
317
- " prompt,\n",
318
- " max_new_tokens=500,\n",
319
- " temperature=0.92,\n",
320
- ")\n",
321
- "\n",
322
- "print(response)"
323
- ]
324
- },
325
- {
326
- "cell_type": "code",
327
- "execution_count": null,
328
- "id": "3625b3ff-6467-43ea-8557-9541934539ec",
329
- "metadata": {},
330
- "outputs": [],
331
- "source": []
332
- }
333
- ],
334
- "metadata": {
335
- "kernelspec": {
336
- "display_name": "Python 3",
337
- "language": "python",
338
- "name": "python3"
339
- },
340
- "language_info": {
341
- "codemirror_mode": {
342
- "name": "ipython",
343
- "version": 3
344
- },
345
- "file_extension": ".py",
346
- "mimetype": "text/x-python",
347
- "name": "python",
348
- "nbconvert_exporter": "python",
349
- "pygments_lexer": "ipython3",
350
- "version": "3.8.10"
351
- }
352
- },
353
- "nbformat": 4,
354
- "nbformat_minor": 5
355
- }