huseinzol05 commited on
Commit
352e48e
1 Parent(s): 7235673

Upload autoawq-mistral-7b.ipynb

Browse files
Files changed (1) hide show
  1. autoawq-mistral-7b.ipynb +444 -0
autoawq-mistral-7b.ipynb ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "19fe0df6",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# !pip3 install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp310-cp310-linux_x86_64.whl"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "47528ce3",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import os\n",
21
+ "\n",
22
+ "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 3,
28
+ "id": "20861f3e",
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "from awq import AutoAWQForCausalLM\n",
33
+ "from transformers import AutoConfig, AwqConfig, AutoTokenizer, AutoModelForCausalLM\n",
34
+ "import torch\n",
35
+ "\n",
36
+ "model_path = 'mesolitica/malaysian-mistral-7b-32k-instructions-v4'"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 4,
42
+ "id": "9939ad4e",
43
+ "metadata": {
44
+ "scrolled": true
45
+ },
46
+ "outputs": [
47
+ {
48
+ "data": {
49
+ "application/vnd.jupyter.widget-view+json": {
50
+ "model_id": "21aaf06156a0404fab259a4b45a92d6a",
51
+ "version_major": 2,
52
+ "version_minor": 0
53
+ },
54
+ "text/plain": [
55
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
56
+ ]
57
+ },
58
+ "metadata": {},
59
+ "output_type": "display_data"
60
+ }
61
+ ],
62
+ "source": [
63
+ "model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype = torch.bfloat16)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 5,
69
+ "id": "fdb86f50",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "!rm -rf test2"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 6,
79
+ "id": "72e76288",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "model.save_pretrained('./test2', safe_serialization = False)"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 7,
89
+ "id": "aa245150",
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "data": {
94
+ "application/vnd.jupyter.widget-view+json": {
95
+ "model_id": "97bc7ca67ff84ab5b302c7720ec2d0d7",
96
+ "version_major": 2,
97
+ "version_minor": 0
98
+ },
99
+ "text/plain": [
100
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
101
+ ]
102
+ },
103
+ "metadata": {},
104
+ "output_type": "display_data"
105
+ }
106
+ ],
107
+ "source": [
108
+ "model = AutoAWQForCausalLM.from_pretrained('./test2')"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 8,
114
+ "id": "d3949cf4",
115
+ "metadata": {},
116
+ "outputs": [
117
+ {
118
+ "data": {
119
+ "application/vnd.jupyter.widget-view+json": {
120
+ "model_id": "15a4bf485ecc475ca011da02fe421e8b",
121
+ "version_major": 2,
122
+ "version_minor": 0
123
+ },
124
+ "text/plain": [
125
+ "Downloading data: 0%| | 0.00/470M [00:00<?, ?B/s]"
126
+ ]
127
+ },
128
+ "metadata": {},
129
+ "output_type": "display_data"
130
+ },
131
+ {
132
+ "data": {
133
+ "application/vnd.jupyter.widget-view+json": {
134
+ "model_id": "d006dc3bcd834c3d8bf89f20e079d83e",
135
+ "version_major": 2,
136
+ "version_minor": 0
137
+ },
138
+ "text/plain": [
139
+ "Generating train split: 0 examples [00:00, ? examples/s]"
140
+ ]
141
+ },
142
+ "metadata": {},
143
+ "output_type": "display_data"
144
+ },
145
+ {
146
+ "name": "stderr",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "AWQ: 100%|██████████| 32/32 [09:49<00:00, 18.41s/it]\n"
150
+ ]
151
+ }
152
+ ],
153
+ "source": [
154
+ "quant_path = 'malaysian-mistral-7b-32k-instructions-v4-awq'\n",
155
+ "quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }\n",
156
+ "\n",
157
+ "tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n",
158
+ "model.quantize(tokenizer, quant_config=quant_config, calib_data = 'mesolitica/malaysian-calibration')"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 9,
164
+ "id": "ee290c1e",
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "name": "stderr",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "WARNING:root:`quant_config.json` is being deprecated in the future in favor of quantization_config in config.json.\n"
172
+ ]
173
+ },
174
+ {
175
+ "data": {
176
+ "text/plain": [
177
+ "('malaysian-mistral-7b-32k-instructions-v4-awq/tokenizer_config.json',\n",
178
+ " 'malaysian-mistral-7b-32k-instructions-v4-awq/special_tokens_map.json',\n",
179
+ " 'malaysian-mistral-7b-32k-instructions-v4-awq/tokenizer.json')"
180
+ ]
181
+ },
182
+ "execution_count": 9,
183
+ "metadata": {},
184
+ "output_type": "execute_result"
185
+ }
186
+ ],
187
+ "source": [
188
+ "model.save_quantized(quant_path, safetensors = False)\n",
189
+ "tokenizer.save_pretrained(quant_path)"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 10,
195
+ "id": "737f2403",
196
+ "metadata": {},
197
+ "outputs": [
198
+ {
199
+ "data": {
200
+ "text/plain": [
201
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ/commit/cba1704e3977bd29352015ee3b4c2a26efa17666', commit_message='Upload tokenizer', commit_description='', oid='cba1704e3977bd29352015ee3b4c2a26efa17666', pr_url=None, pr_revision=None, pr_num=None)"
202
+ ]
203
+ },
204
+ "execution_count": 10,
205
+ "metadata": {},
206
+ "output_type": "execute_result"
207
+ }
208
+ ],
209
+ "source": [
210
+ "tokenizer.push_to_hub('mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ')"
211
+ ]
212
+ },
213
+ {
214
+ "cell_type": "code",
215
+ "execution_count": 11,
216
+ "id": "ed92c8ee",
217
+ "metadata": {},
218
+ "outputs": [
219
+ {
220
+ "data": {
221
+ "text/plain": [
222
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ/commit/e72805b82ace8600987f5bae5e336b336d9cd7d0', commit_message='Upload config', commit_description='', oid='e72805b82ace8600987f5bae5e336b336d9cd7d0', pr_url=None, pr_revision=None, pr_num=None)"
223
+ ]
224
+ },
225
+ "execution_count": 11,
226
+ "metadata": {},
227
+ "output_type": "execute_result"
228
+ }
229
+ ],
230
+ "source": [
231
+ "quantization_config = AwqConfig(\n",
232
+ " bits=quant_config['w_bit'],\n",
233
+ " group_size=quant_config['q_group_size'],\n",
234
+ " zero_point=quant_config['zero_point'],\n",
235
+ " backend='autoawq',\n",
236
+ " version=quant_config['version'].lower(),\n",
237
+ ")\n",
238
+ "\n",
239
+ "config = AutoConfig.from_pretrained(model_path)\n",
240
+ "config.quantization_config = quantization_config\n",
241
+ "\n",
242
+ "config.push_to_hub('mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ')"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": 12,
248
+ "id": "c74b2f45",
249
+ "metadata": {},
250
+ "outputs": [
251
+ {
252
+ "name": "stdout",
253
+ "output_type": "stream",
254
+ "text": [
255
+ "config.json\t\tquant_config.json\t tokenizer_config.json\r\n",
256
+ "generation_config.json\tspecial_tokens_map.json\r\n",
257
+ "pytorch_model.bin\ttokenizer.json\r\n"
258
+ ]
259
+ },
260
+ {
261
+ "name": "stderr",
262
+ "output_type": "stream",
263
+ "text": [
264
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
265
+ "To disable this warning, you can either:\n",
266
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
267
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
268
+ ]
269
+ }
270
+ ],
271
+ "source": [
272
+ "!ls malaysian-mistral-7b-32k-instructions-v4-awq"
273
+ ]
274
+ },
275
+ {
276
+ "cell_type": "code",
277
+ "execution_count": 13,
278
+ "id": "2e0fb591",
279
+ "metadata": {},
280
+ "outputs": [],
281
+ "source": [
282
+ "from huggingface_hub import HfApi\n",
283
+ "\n",
284
+ "api = HfApi()"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 14,
290
+ "id": "dd06cfa2",
291
+ "metadata": {},
292
+ "outputs": [
293
+ {
294
+ "data": {
295
+ "application/vnd.jupyter.widget-view+json": {
296
+ "model_id": "b38f5cc50f58466fa996e890a43b14a3",
297
+ "version_major": 2,
298
+ "version_minor": 0
299
+ },
300
+ "text/plain": [
301
+ "pytorch_model.bin: 0%| | 0.00/4.15G [00:00<?, ?B/s]"
302
+ ]
303
+ },
304
+ "metadata": {},
305
+ "output_type": "display_data"
306
+ },
307
+ {
308
+ "data": {
309
+ "text/plain": [
310
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ/commit/be94bbf12947f371acbc8ebb374f1340f3a308cd', commit_message='Upload pytorch_model.bin with huggingface_hub', commit_description='', oid='be94bbf12947f371acbc8ebb374f1340f3a308cd', pr_url=None, pr_revision=None, pr_num=None)"
311
+ ]
312
+ },
313
+ "execution_count": 14,
314
+ "metadata": {},
315
+ "output_type": "execute_result"
316
+ }
317
+ ],
318
+ "source": [
319
+ "api.upload_file(\n",
320
+ " path_or_fileobj='malaysian-mistral-7b-32k-instructions-v4-awq/pytorch_model.bin',\n",
321
+ " path_in_repo=\"pytorch_model.bin\",\n",
322
+ " repo_id='mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ',\n",
323
+ " repo_type=\"model\",\n",
324
+ ")"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 15,
330
+ "id": "1383ff2c",
331
+ "metadata": {},
332
+ "outputs": [
333
+ {
334
+ "data": {
335
+ "text/plain": [
336
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ/commit/7235673cad93ca30015d0b1d66f96f658af70f2c', commit_message='Upload quant_config.json with huggingface_hub', commit_description='', oid='7235673cad93ca30015d0b1d66f96f658af70f2c', pr_url=None, pr_revision=None, pr_num=None)"
337
+ ]
338
+ },
339
+ "execution_count": 15,
340
+ "metadata": {},
341
+ "output_type": "execute_result"
342
+ }
343
+ ],
344
+ "source": [
345
+ "api.upload_file(\n",
346
+ " path_or_fileobj='malaysian-mistral-7b-32k-instructions-v4-awq/quant_config.json',\n",
347
+ " path_in_repo=\"quant_config.json\",\n",
348
+ " repo_id='mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ',\n",
349
+ " repo_type=\"model\",\n",
350
+ ")"
351
+ ]
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": null,
356
+ "id": "5852ec02",
357
+ "metadata": {},
358
+ "outputs": [
359
+ {
360
+ "name": "stderr",
361
+ "output_type": "stream",
362
+ "text": [
363
+ "You have loaded an AWQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.\n"
364
+ ]
365
+ },
366
+ {
367
+ "data": {
368
+ "application/vnd.jupyter.widget-view+json": {
369
+ "model_id": "2f96880b405a46f48665f6b5ff9ca39e",
370
+ "version_major": 2,
371
+ "version_minor": 0
372
+ },
373
+ "text/plain": [
374
+ "pytorch_model.bin: 0%| | 0.00/4.15G [00:00<?, ?B/s]"
375
+ ]
376
+ },
377
+ "metadata": {},
378
+ "output_type": "display_data"
379
+ }
380
+ ],
381
+ "source": [
382
+ "quantized_model = AutoModelForCausalLM.from_pretrained('mesolitica/malaysian-mistral-7b-32k-instructions-v4-AWQ')\n",
383
+ "_ = quantized_model.cuda()"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": null,
389
+ "id": "66895e20",
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": [
393
+ "messages = [\n",
394
+ " {'role': 'user', 'content': 'KWSP tu apa'}\n",
395
+ "]\n",
396
+ "prompt = tokenizer.apply_chat_template(messages, tokenize = False)\n",
397
+ "inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": null,
403
+ "id": "4b320f33",
404
+ "metadata": {},
405
+ "outputs": [],
406
+ "source": [
407
+ "%%time\n",
408
+ "\n",
409
+ "generate_kwargs = dict(\n",
410
+ " inputs,\n",
411
+ " max_new_tokens=100,\n",
412
+ " top_p=0.95,\n",
413
+ " top_k=50,\n",
414
+ " temperature=0.9,\n",
415
+ " do_sample=True,\n",
416
+ " num_beams=1,\n",
417
+ ")\n",
418
+ "r = quantized_model.generate(**generate_kwargs)\n",
419
+ "tokenizer.decode(r[0])"
420
+ ]
421
+ }
422
+ ],
423
+ "metadata": {
424
+ "kernelspec": {
425
+ "display_name": "Python 3 (ipykernel)",
426
+ "language": "python",
427
+ "name": "python3"
428
+ },
429
+ "language_info": {
430
+ "codemirror_mode": {
431
+ "name": "ipython",
432
+ "version": 3
433
+ },
434
+ "file_extension": ".py",
435
+ "mimetype": "text/x-python",
436
+ "name": "python",
437
+ "nbconvert_exporter": "python",
438
+ "pygments_lexer": "ipython3",
439
+ "version": "3.10.12"
440
+ }
441
+ },
442
+ "nbformat": 4,
443
+ "nbformat_minor": 5
444
+ }