huseinzol05 commited on
Commit
fc620df
1 Parent(s): eb269f1

Upload fix-llama3-instruct.ipynb

Browse files
Files changed (1) hide show
  1. fix-llama3-instruct.ipynb +445 -0
fix-llama3-instruct.ipynb ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "7ee919d4",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "\n",
12
+ "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n",
13
+ "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "id": "da0b7b04",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
24
+ "import torch"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "id": "97b4ac94",
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
38
+ ]
39
+ }
40
+ ],
41
+ "source": [
42
+ "tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct')"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 4,
48
+ "id": "b9c98dca",
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "application/vnd.jupyter.widget-view+json": {
54
+ "model_id": "689cdd6ffd264f1c88f0ed5f0ab092b7",
55
+ "version_major": 2,
56
+ "version_minor": 0
57
+ },
58
+ "text/plain": [
59
+ "tokenizer_config.json: 0%| | 0.00/50.9k [00:00<?, ?B/s]"
60
+ ]
61
+ },
62
+ "metadata": {},
63
+ "output_type": "display_data"
64
+ },
65
+ {
66
+ "data": {
67
+ "application/vnd.jupyter.widget-view+json": {
68
+ "model_id": "31fd6462bb704cee814751578090f928",
69
+ "version_major": 2,
70
+ "version_minor": 0
71
+ },
72
+ "text/plain": [
73
+ "tokenizer.json: 0%| | 0.00/9.08M [00:00<?, ?B/s]"
74
+ ]
75
+ },
76
+ "metadata": {},
77
+ "output_type": "display_data"
78
+ },
79
+ {
80
+ "data": {
81
+ "application/vnd.jupyter.widget-view+json": {
82
+ "model_id": "9ac644cb13c2482fafa223d2e6fea9c1",
83
+ "version_major": 2,
84
+ "version_minor": 0
85
+ },
86
+ "text/plain": [
87
+ "special_tokens_map.json: 0%| | 0.00/73.0 [00:00<?, ?B/s]"
88
+ ]
89
+ },
90
+ "metadata": {},
91
+ "output_type": "display_data"
92
+ },
93
+ {
94
+ "name": "stderr",
95
+ "output_type": "stream",
96
+ "text": [
97
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
98
+ ]
99
+ }
100
+ ],
101
+ "source": [
102
+ "fix_tokenizer = AutoTokenizer.from_pretrained('philschmid/meta-llama-3-tokenizer')"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 5,
108
+ "id": "c7f8cef4",
109
+ "metadata": {},
110
+ "outputs": [
111
+ {
112
+ "data": {
113
+ "text/plain": [
114
+ "128009"
115
+ ]
116
+ },
117
+ "execution_count": 5,
118
+ "metadata": {},
119
+ "output_type": "execute_result"
120
+ }
121
+ ],
122
+ "source": [
123
+ "tokenizer.eos_token = '<|eot_id|>'\n",
124
+ "tokenizer.eos_token_id"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 6,
130
+ "id": "8e518761",
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "tokenizer.chat_template = fix_tokenizer.chat_template"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": 8,
140
+ "id": "5eda44ec",
141
+ "metadata": {},
142
+ "outputs": [
143
+ {
144
+ "data": {
145
+ "application/vnd.jupyter.widget-view+json": {
146
+ "model_id": "7276d6ee57214ca7ad20d2348da0bb34",
147
+ "version_major": 2,
148
+ "version_minor": 0
149
+ },
150
+ "text/plain": [
151
+ "config.json: 0%| | 0.00/654 [00:00<?, ?B/s]"
152
+ ]
153
+ },
154
+ "metadata": {},
155
+ "output_type": "display_data"
156
+ },
157
+ {
158
+ "data": {
159
+ "application/vnd.jupyter.widget-view+json": {
160
+ "model_id": "b7b776502fc04ee1b0f86ddfc0c1850d",
161
+ "version_major": 2,
162
+ "version_minor": 0
163
+ },
164
+ "text/plain": [
165
+ "model.safetensors.index.json: 0%| | 0.00/23.9k [00:00<?, ?B/s]"
166
+ ]
167
+ },
168
+ "metadata": {},
169
+ "output_type": "display_data"
170
+ },
171
+ {
172
+ "data": {
173
+ "application/vnd.jupyter.widget-view+json": {
174
+ "model_id": "d045e75e2416459881d0117ec51e31b3",
175
+ "version_major": 2,
176
+ "version_minor": 0
177
+ },
178
+ "text/plain": [
179
+ "Downloading shards: 0%| | 0/4 [00:00<?, ?it/s]"
180
+ ]
181
+ },
182
+ "metadata": {},
183
+ "output_type": "display_data"
184
+ },
185
+ {
186
+ "data": {
187
+ "application/vnd.jupyter.widget-view+json": {
188
+ "model_id": "6496935aaea94475a0d76551f6d7dc7b",
189
+ "version_major": 2,
190
+ "version_minor": 0
191
+ },
192
+ "text/plain": [
193
+ "model-00001-of-00004.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
194
+ ]
195
+ },
196
+ "metadata": {},
197
+ "output_type": "display_data"
198
+ },
199
+ {
200
+ "data": {
201
+ "application/vnd.jupyter.widget-view+json": {
202
+ "model_id": "9ffb8efad27c4af8bd3079b6e489f925",
203
+ "version_major": 2,
204
+ "version_minor": 0
205
+ },
206
+ "text/plain": [
207
+ "model-00002-of-00004.safetensors: 0%| | 0.00/5.00G [00:00<?, ?B/s]"
208
+ ]
209
+ },
210
+ "metadata": {},
211
+ "output_type": "display_data"
212
+ },
213
+ {
214
+ "data": {
215
+ "application/vnd.jupyter.widget-view+json": {
216
+ "model_id": "1cf61390b28041d086e8670c46f4104e",
217
+ "version_major": 2,
218
+ "version_minor": 0
219
+ },
220
+ "text/plain": [
221
+ "model-00003-of-00004.safetensors: 0%| | 0.00/4.92G [00:00<?, ?B/s]"
222
+ ]
223
+ },
224
+ "metadata": {},
225
+ "output_type": "display_data"
226
+ },
227
+ {
228
+ "data": {
229
+ "application/vnd.jupyter.widget-view+json": {
230
+ "model_id": "c7a43a747cc44617894cef08e0ab8e19",
231
+ "version_major": 2,
232
+ "version_minor": 0
233
+ },
234
+ "text/plain": [
235
+ "model-00004-of-00004.safetensors: 0%| | 0.00/1.17G [00:00<?, ?B/s]"
236
+ ]
237
+ },
238
+ "metadata": {},
239
+ "output_type": "display_data"
240
+ },
241
+ {
242
+ "data": {
243
+ "application/vnd.jupyter.widget-view+json": {
244
+ "model_id": "f4c37598b2e845a2bd5566d37dbab299",
245
+ "version_major": 2,
246
+ "version_minor": 0
247
+ },
248
+ "text/plain": [
249
+ "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
250
+ ]
251
+ },
252
+ "metadata": {},
253
+ "output_type": "display_data"
254
+ },
255
+ {
256
+ "data": {
257
+ "application/vnd.jupyter.widget-view+json": {
258
+ "model_id": "37879d07cc1d4c15b45735d27eb48dc6",
259
+ "version_major": 2,
260
+ "version_minor": 0
261
+ },
262
+ "text/plain": [
263
+ "generation_config.json: 0%| | 0.00/187 [00:00<?, ?B/s]"
264
+ ]
265
+ },
266
+ "metadata": {},
267
+ "output_type": "display_data"
268
+ }
269
+ ],
270
+ "source": [
271
+ "model = AutoModelForCausalLM.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct', torch_dtype=torch.bfloat16)"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": 9,
277
+ "id": "a211b312",
278
+ "metadata": {},
279
+ "outputs": [
280
+ {
281
+ "data": {
282
+ "text/plain": [
283
+ "GenerationConfig {\n",
284
+ " \"bos_token_id\": 128000,\n",
285
+ " \"do_sample\": true,\n",
286
+ " \"eos_token_id\": 128009,\n",
287
+ " \"max_length\": 4096,\n",
288
+ " \"temperature\": 0.6,\n",
289
+ " \"top_p\": 0.9\n",
290
+ "}"
291
+ ]
292
+ },
293
+ "execution_count": 9,
294
+ "metadata": {},
295
+ "output_type": "execute_result"
296
+ }
297
+ ],
298
+ "source": [
299
+ "model.generation_config.eos_token_id = tokenizer.eos_token_id\n",
300
+ "model.generation_config"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": 10,
306
+ "id": "171423c0",
307
+ "metadata": {},
308
+ "outputs": [
309
+ {
310
+ "data": {
311
+ "application/vnd.jupyter.widget-view+json": {
312
+ "model_id": "93ae481de7784f91a689f83cfec8aed1",
313
+ "version_major": 2,
314
+ "version_minor": 0
315
+ },
316
+ "text/plain": [
317
+ "model-00003-of-00004.safetensors: 0%| | 0.00/4.92G [00:00<?, ?B/s]"
318
+ ]
319
+ },
320
+ "metadata": {},
321
+ "output_type": "display_data"
322
+ },
323
+ {
324
+ "data": {
325
+ "application/vnd.jupyter.widget-view+json": {
326
+ "model_id": "6d1ce6dfbf4940099de2c65d407500bb",
327
+ "version_major": 2,
328
+ "version_minor": 0
329
+ },
330
+ "text/plain": [
331
+ "model-00001-of-00004.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
332
+ ]
333
+ },
334
+ "metadata": {},
335
+ "output_type": "display_data"
336
+ },
337
+ {
338
+ "data": {
339
+ "application/vnd.jupyter.widget-view+json": {
340
+ "model_id": "56f0a27466d942da9ee3b32ce59d274b",
341
+ "version_major": 2,
342
+ "version_minor": 0
343
+ },
344
+ "text/plain": [
345
+ "model-00004-of-00004.safetensors: 0%| | 0.00/1.17G [00:00<?, ?B/s]"
346
+ ]
347
+ },
348
+ "metadata": {},
349
+ "output_type": "display_data"
350
+ },
351
+ {
352
+ "data": {
353
+ "application/vnd.jupyter.widget-view+json": {
354
+ "model_id": "5542c5c6796742aa8e2ad671373f989d",
355
+ "version_major": 2,
356
+ "version_minor": 0
357
+ },
358
+ "text/plain": [
359
+ "Upload 4 LFS files: 0%| | 0/4 [00:00<?, ?it/s]"
360
+ ]
361
+ },
362
+ "metadata": {},
363
+ "output_type": "display_data"
364
+ },
365
+ {
366
+ "data": {
367
+ "application/vnd.jupyter.widget-view+json": {
368
+ "model_id": "2703f658c0a144af8458386472f3331a",
369
+ "version_major": 2,
370
+ "version_minor": 0
371
+ },
372
+ "text/plain": [
373
+ "model-00002-of-00004.safetensors: 0%| | 0.00/5.00G [00:00<?, ?B/s]"
374
+ ]
375
+ },
376
+ "metadata": {},
377
+ "output_type": "display_data"
378
+ },
379
+ {
380
+ "data": {
381
+ "text/plain": [
382
+ "CommitInfo(commit_url='https://huggingface.co/huseinzol05/Meta-Llama-3-8B-Instruct-fix-tokenizer/commit/35ccfcb4478a2ffe1cc35769243b5f7ff7533aa2', commit_message='Upload LlamaForCausalLM', commit_description='', oid='35ccfcb4478a2ffe1cc35769243b5f7ff7533aa2', pr_url=None, pr_revision=None, pr_num=None)"
383
+ ]
384
+ },
385
+ "execution_count": 10,
386
+ "metadata": {},
387
+ "output_type": "execute_result"
388
+ }
389
+ ],
390
+ "source": [
391
+ "model.push_to_hub('Meta-Llama-3-8B-Instruct-fix-tokenizer', safe_serialization=True)"
392
+ ]
393
+ },
394
+ {
395
+ "cell_type": "code",
396
+ "execution_count": 13,
397
+ "id": "d947927c",
398
+ "metadata": {},
399
+ "outputs": [
400
+ {
401
+ "data": {
402
+ "text/plain": [
403
+ "CommitInfo(commit_url='https://huggingface.co/huseinzol05/Meta-Llama-3-8B-Instruct-fix-tokenizer/commit/eb269f1c20542c3900543925549656896fd9441d', commit_message='Upload tokenizer', commit_description='', oid='eb269f1c20542c3900543925549656896fd9441d', pr_url=None, pr_revision=None, pr_num=None)"
404
+ ]
405
+ },
406
+ "execution_count": 13,
407
+ "metadata": {},
408
+ "output_type": "execute_result"
409
+ }
410
+ ],
411
+ "source": [
412
+ "tokenizer.push_to_hub('Meta-Llama-3-8B-Instruct-fix-tokenizer', safe_serialization=True)"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "execution_count": null,
418
+ "id": "63bb4f54",
419
+ "metadata": {},
420
+ "outputs": [],
421
+ "source": []
422
+ }
423
+ ],
424
+ "metadata": {
425
+ "kernelspec": {
426
+ "display_name": "Python 3 (ipykernel)",
427
+ "language": "python",
428
+ "name": "python3"
429
+ },
430
+ "language_info": {
431
+ "codemirror_mode": {
432
+ "name": "ipython",
433
+ "version": 3
434
+ },
435
+ "file_extension": ".py",
436
+ "mimetype": "text/x-python",
437
+ "name": "python",
438
+ "nbconvert_exporter": "python",
439
+ "pygments_lexer": "ipython3",
440
+ "version": "3.10.12"
441
+ }
442
+ },
443
+ "nbformat": 4,
444
+ "nbformat_minor": 5
445
+ }