YuvrajTalukdar commited on
Commit
f869d7b
1 Parent(s): e8afaae

Code Uploaded

Browse files
Files changed (2) hide show
  1. AssamGPT_Inference.ipynb +0 -0
  2. AssameseWikiGPT.ipynb +463 -0
AssamGPT_Inference.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
AssameseWikiGPT.ipynb ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "length of the longest sentence: 10\n",
13
+ "no_of_sentences: 127946\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import os\n",
19
+ "import random\n",
20
+ "\n",
21
+ "#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #disble gpu\n",
22
+ "\n",
23
+ "def get_text_data():\n",
24
+ " sentences=[]\n",
25
+ " file_name=\"cleaned_assamese_text.txt\"\n",
26
+ " file=open(file_name,'r')\n",
27
+ " file_sentences=file.read().split(',')\n",
28
+ " sentences+=file_sentences\n",
29
+ " file.close()\n",
30
+ " sentences=list(filter(None,sentences))\n",
31
+ " return sentences\n",
32
+ "\n",
33
+ "sentences=get_text_data()\n",
34
+ "random.shuffle(sentences)\n",
35
+ "no_of_sentences=len(sentences)\n",
36
+ "text_train=sentences[:int(0.7*no_of_sentences)]\n",
37
+ "text_test=sentences[int(0.7*no_of_sentences):int(0.85*no_of_sentences)]\n",
38
+ "text_valid=sentences[int(0.85*no_of_sentences):]\n",
39
+ "#maxlen = len(max(sentences))\n",
40
+ "maxlen=10\n",
41
+ "print(\"length of the longest sentence: \",maxlen)\n",
42
+ "print(\"no_of_sentences: \",no_of_sentences)"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 2,
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "name": "stderr",
52
+ "output_type": "stream",
53
+ "text": [
54
+ "2023-02-28 23:36:00.068548: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
55
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
56
+ "2023-02-28 23:36:01.115879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n",
57
+ "2023-02-28 23:36:01.116220: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n",
58
+ "2023-02-28 23:36:01.116238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
59
+ "2023-02-28 23:36:02.603014: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
60
+ "2023-02-28 23:36:02.736211: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
61
+ "2023-02-28 23:36:02.736438: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
62
+ "2023-02-28 23:36:02.736847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
63
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
64
+ "2023-02-28 23:36:02.737278: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
65
+ "2023-02-28 23:36:02.737453: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
66
+ "2023-02-28 23:36:02.737574: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
67
+ "2023-02-28 23:36:03.410798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
68
+ "2023-02-28 23:36:03.410969: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
69
+ "2023-02-28 23:36:03.411092: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
70
+ "2023-02-28 23:36:03.411205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2107 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
71
+ ]
72
+ },
73
+ {
74
+ "name": "stdout",
75
+ "output_type": "stream",
76
+ "text": [
77
+ "303475\n"
78
+ ]
79
+ },
80
+ {
81
+ "data": {
82
+ "text/plain": [
83
+ "<tf.Tensor: shape=(1, 11), dtype=int64, numpy=array([[ 17, 3078, 2246, 87, 31, 0, 0, 0, 0, 0, 0]])>"
84
+ ]
85
+ },
86
+ "execution_count": 2,
87
+ "metadata": {},
88
+ "output_type": "execute_result"
89
+ }
90
+ ],
91
+ "source": [
92
+ "from tensorflow.keras.layers import TextVectorization\n",
93
+ "import tensorflow as tf\n",
94
+ "\n",
95
+ "def custom_standardization(input_string):\n",
96
+ " sentence = tf.strings.lower(input_string)\n",
97
+ " #sentence = tf.strings.regex_replace(sentence, \"\\n\", \" \")\n",
98
+ " return sentence\n",
99
+ "\n",
100
+ "vectorize_layer = TextVectorization(\n",
101
+ " standardize = custom_standardization,\n",
102
+ " output_mode=\"int\",\n",
103
+ " output_sequence_length=maxlen + 1,\n",
104
+ ")\n",
105
+ "\n",
106
+ "vectorize_layer.adapt(sentences)\n",
107
+ "vocab = vectorize_layer.get_vocabulary()\n",
108
+ "\n",
109
+ "vocab_size = len(vocab)\n",
110
+ "print(vocab_size) # 49703\n",
111
+ "vectorize_layer(['এক অনন্য মাত্ৰা প্ৰদান কৰাৰ'])"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 3,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "index_lookup = dict(zip(range(len(vocab)), vocab))"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 4,
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "batch_size = 10 #64\n",
130
+ "\n",
131
+ "train_dataset = tf.data.Dataset.from_tensor_slices(text_train)\n",
132
+ "train_dataset = train_dataset.shuffle(buffer_size=256)\n",
133
+ "train_dataset = train_dataset.batch(batch_size)\n",
134
+ "\n",
135
+ "test_dataset = tf.data.Dataset.from_tensor_slices(text_test)\n",
136
+ "test_dataset = test_dataset.shuffle(buffer_size=256)\n",
137
+ "test_dataset = test_dataset.batch(batch_size)\n",
138
+ "\n",
139
+ "valid_dataset = tf.data.Dataset.from_tensor_slices(text_valid)\n",
140
+ "valid_dataset = valid_dataset.shuffle(buffer_size=256)\n",
141
+ "valid_dataset = valid_dataset.batch(batch_size)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 5,
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "def preprocess_text(text):\n",
151
+ " text = tf.expand_dims(text, -1)\n",
152
+ " tokenized_sentences = vectorize_layer(text)\n",
153
+ " x = tokenized_sentences[:, :-1]\n",
154
+ " y = tokenized_sentences[:, 1:]\n",
155
+ " return x, y\n",
156
+ "\n",
157
+ "\n",
158
+ "train_dataset = train_dataset.map(preprocess_text)\n",
159
+ "train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)\n",
160
+ "\n",
161
+ "test_dataset = test_dataset.map(preprocess_text)\n",
162
+ "test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)\n",
163
+ "\n",
164
+ "valid_dataset = valid_dataset.map(preprocess_text)\n",
165
+ "valid_dataset = valid_dataset.prefetch(tf.data.AUTOTUNE)"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 6,
171
+ "metadata": {},
172
+ "outputs": [
173
+ {
174
+ "name": "stdout",
175
+ "output_type": "stream",
176
+ "text": [
177
+ "(<tf.Tensor: shape=(10, 10), dtype=int64, numpy=\n",
178
+ "array([[ 10738, 0, 0, 0, 0, 0, 0, 0,\n",
179
+ " 0, 0],\n",
180
+ " [ 5212, 24846, 504, 51, 71, 8517, 6751, 4828,\n",
181
+ " 681, 0],\n",
182
+ " [ 61, 108, 7418, 252, 2823, 2674, 134, 487,\n",
183
+ " 0, 0],\n",
184
+ " [289690, 2, 112988, 1054, 5367, 31142, 22, 3240,\n",
185
+ " 1115, 2376],\n",
186
+ " [ 393, 2, 352, 125, 6995, 6019, 41625, 12,\n",
187
+ " 1799, 551],\n",
188
+ " [ 265, 4642, 22, 1696, 89473, 126, 3, 5,\n",
189
+ " 410, 3375],\n",
190
+ " [ 8187, 18122, 278, 34, 579, 579, 43, 1119,\n",
191
+ " 710, 395],\n",
192
+ " [ 61, 16, 5291, 150, 1166, 2, 4796, 50192,\n",
193
+ " 5668, 2324],\n",
194
+ " [ 52, 954, 239, 595, 5401, 1006, 2, 3253,\n",
195
+ " 3812, 21],\n",
196
+ " [ 17071, 2, 15782, 5901, 15075, 783, 22, 40,\n",
197
+ " 40782, 34480]])>, <tf.Tensor: shape=(10, 10), dtype=int64, numpy=\n",
198
+ "array([[ 0, 0, 0, 0, 0, 0, 0, 0,\n",
199
+ " 0, 0],\n",
200
+ " [ 24846, 504, 51, 71, 8517, 6751, 4828, 681,\n",
201
+ " 0, 0],\n",
202
+ " [ 108, 7418, 252, 2823, 2674, 134, 487, 0,\n",
203
+ " 0, 0],\n",
204
+ " [ 2, 112988, 1054, 5367, 31142, 22, 3240, 1115,\n",
205
+ " 2376, 2483],\n",
206
+ " [ 2, 352, 125, 6995, 6019, 41625, 12, 1799,\n",
207
+ " 551, 20],\n",
208
+ " [ 4642, 22, 1696, 89473, 126, 3, 5, 410,\n",
209
+ " 3375, 4436],\n",
210
+ " [ 18122, 278, 34, 579, 579, 43, 1119, 710,\n",
211
+ " 395, 710],\n",
212
+ " [ 16, 5291, 150, 1166, 2, 4796, 50192, 5668,\n",
213
+ " 2324, 239],\n",
214
+ " [ 954, 239, 595, 5401, 1006, 2, 3253, 3812,\n",
215
+ " 21, 245],\n",
216
+ " [ 2, 15782, 5901, 15075, 783, 22, 40, 40782,\n",
217
+ " 34480, 0]])>)\n"
218
+ ]
219
+ }
220
+ ],
221
+ "source": [
222
+ "for entry in train_dataset.take(1):\n",
223
+ " print(entry)"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 7,
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "import keras_nlp\n",
233
+ "from tensorflow import keras\n",
234
+ "\n",
235
+ "embed_dim = 128\n",
236
+ "num_heads = 4\n",
237
+ "\n",
238
+ "def create_model2(no_of_decoder=1):\n",
239
+ " inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)\n",
240
+ " x = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\n",
241
+ " for i in range(4):\n",
242
+ " x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*2, num_heads=num_heads,dropout=0.5)(x)\n",
243
+ " do = keras.layers.Dropout(0.4)(x)\n",
244
+ " outputs = keras.layers.Dense(vocab_size, activation='softmax')(do)\n",
245
+ " \n",
246
+ " model = keras.Model(inputs=inputs, outputs=outputs)\n",
247
+ " model.compile(\n",
248
+ " optimizer=\"adam\", \n",
249
+ " loss='sparse_categorical_crossentropy',\n",
250
+ " metrics=[keras_nlp.metrics.Perplexity(), 'accuracy']\n",
251
+ " )\n",
252
+ " return model"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 8,
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "import numpy as np\n",
262
+ "\n",
263
+ "class TextSampler(keras.callbacks.Callback):\n",
264
+ " def __init__(self, start_prompt, max_tokens):\n",
265
+ " self.start_prompt = start_prompt\n",
266
+ " self.max_tokens = max_tokens\n",
267
+ " \n",
268
+ " # Helper method to choose a word from the top K probable words with respect to their probabilities\n",
269
+ " # in a sequence\n",
270
+ " def sample_token(self, logits):\n",
271
+ " logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n",
272
+ " indices = np.asarray(indices).astype(\"int32\")\n",
273
+ " preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n",
274
+ " preds = np.asarray(preds).astype(\"float32\")\n",
275
+ " return np.random.choice(indices, p=preds)\n",
276
+ "\n",
277
+ " def on_epoch_end(self, epoch, logs=None):\n",
278
+ " decoded_sample = self.start_prompt\n",
279
+ " \n",
280
+ " for i in range(self.max_tokens-1):\n",
281
+ " tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n",
282
+ " predictions = self.model.predict([tokenized_prompt], verbose=0)\n",
283
+ " # To find the index of the next word in the prediction array.\n",
284
+ " # The tokenized prompt is already shorter than the original decoded sample\n",
285
+ " # by one, len(decoded_sample.split()) is two words ahead - so we remove 1 to get\n",
286
+ " # the next word in the sequence\n",
287
+ " sample_index = len(decoded_sample.strip().split())-1\n",
288
+ " \n",
289
+ " sampled_token = self.sample_token(predictions[0][sample_index])\n",
290
+ " sampled_token = index_lookup[sampled_token]\n",
291
+ " decoded_sample += \" \" + sampled_token\n",
292
+ " \n",
293
+ " print(f\"\\nSample text:\\n{decoded_sample}...\\n\")\n",
294
+ "\n",
295
+ "# First 5 words of a random sentence to be used as a seed\n",
296
+ "random_sentence = ' '.join(random.choice(text_valid).replace('\\n', ' ').split(' ')[:4])\n",
297
+ "sampler = TextSampler(random_sentence, 30)\n",
298
+ "reducelr = keras.callbacks.ReduceLROnPlateau(patience=10, monitor='val_loss')"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 9,
304
+ "metadata": {},
305
+ "outputs": [
306
+ {
307
+ "name": "stdout",
308
+ "output_type": "stream",
309
+ "text": [
310
+ "Model: \"model\"\n",
311
+ "_________________________________________________________________\n",
312
+ " Layer (type) Output Shape Param # \n",
313
+ "=================================================================\n",
314
+ " input_1 (InputLayer) [(None, 10)] 0 \n",
315
+ " \n",
316
+ " token_and_position_embeddin (None, 10, 128) 38846080 \n",
317
+ " g (TokenAndPositionEmbeddin \n",
318
+ " g) \n",
319
+ " \n",
320
+ " transformer_decoder (Transf (None, 10, 128) 132480 \n",
321
+ " ormerDecoder) \n",
322
+ " \n",
323
+ " transformer_decoder_1 (Tran (None, 10, 128) 132480 \n",
324
+ " sformerDecoder) \n",
325
+ " \n",
326
+ " transformer_decoder_2 (Tran (None, 10, 128) 132480 \n",
327
+ " sformerDecoder) \n",
328
+ " \n",
329
+ " transformer_decoder_3 (Tran (None, 10, 128) 132480 \n",
330
+ " sformerDecoder) \n",
331
+ " \n",
332
+ " dropout (Dropout) (None, 10, 128) 0 \n",
333
+ " \n",
334
+ " dense (Dense) (None, 10, 303475) 39148275 \n",
335
+ " \n",
336
+ "=================================================================\n",
337
+ "Total params: 78,524,275\n",
338
+ "Trainable params: 78,524,275\n",
339
+ "Non-trainable params: 0\n",
340
+ "_________________________________________________________________\n",
341
+ "Epoch 1/150\n"
342
+ ]
343
+ },
344
+ {
345
+ "name": "stderr",
346
+ "output_type": "stream",
347
+ "text": [
348
+ "2023-02-28 23:36:23.887413: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
349
+ "2023-02-28 23:36:24.308423: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7ff6d67579b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
350
+ "2023-02-28 23:36:24.308518: I tensorflow/compiler/xla/service/service.cc:181] StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6\n",
351
+ "2023-02-28 23:36:24.328912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
352
+ "2023-02-28 23:36:24.549826: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n"
353
+ ]
354
+ },
355
+ {
356
+ "name": "stdout",
357
+ "output_type": "stream",
358
+ "text": [
359
+ "3082/8957 [=========>....................] - ETA: 55:03 - loss: 5.8952 - perplexity: 363.2977 - accuracy: 0.4296"
360
+ ]
361
+ },
362
+ {
363
+ "ename": "KeyboardInterrupt",
364
+ "evalue": "",
365
+ "output_type": "error",
366
+ "traceback": [
367
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
368
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
369
+ "\u001b[1;32m/media/yuvrajtalukdar/New Volume/computer/undergoing_projects/AssamWiki GPT/AssameseWikiGPT.ipynb Cell 9\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m model \u001b[39m=\u001b[39m create_model2(\u001b[39m4\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m model\u001b[39m.\u001b[39msummary()\n\u001b[0;32m----> <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m history \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mfit(train_dataset,validation_data\u001b[39m=\u001b[39;49mvalid_dataset,epochs\u001b[39m=\u001b[39;49m\u001b[39m150\u001b[39;49m,callbacks\u001b[39m=\u001b[39;49m[sampler, reducelr])\n",
370
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/utils/traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 65\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 66\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
371
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/engine/training.py:1650\u001b[0m, in \u001b[0;36mModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1642\u001b[0m \u001b[39mwith\u001b[39;00m tf\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mexperimental\u001b[39m.\u001b[39mTrace(\n\u001b[1;32m 1643\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtrain\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 1644\u001b[0m epoch_num\u001b[39m=\u001b[39mepoch,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1647\u001b[0m _r\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m 1648\u001b[0m ):\n\u001b[1;32m 1649\u001b[0m callbacks\u001b[39m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m-> 1650\u001b[0m tmp_logs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_function(iterator)\n\u001b[1;32m 1651\u001b[0m \u001b[39mif\u001b[39;00m data_handler\u001b[39m.\u001b[39mshould_sync:\n\u001b[1;32m 1652\u001b[0m context\u001b[39m.\u001b[39masync_wait()\n",
372
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 151\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
373
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:880\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 877\u001b[0m compiler \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxla\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mnonXla\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 879\u001b[0m \u001b[39mwith\u001b[39;00m OptionalXlaContext(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 880\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m 882\u001b[0m new_tracing_count \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m 883\u001b[0m without_tracing \u001b[39m=\u001b[39m (tracing_count \u001b[39m==\u001b[39m new_tracing_count)\n",
374
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:912\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n\u001b[1;32m 910\u001b[0m \u001b[39m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[39m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_no_variable_creation_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds) \u001b[39m# pylint: disable=not-callable\u001b[39;00m\n\u001b[1;32m 913\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variable_creation_fn \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 914\u001b[0m \u001b[39m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[1;32m 915\u001b[0m \u001b[39m# in parallel.\u001b[39;00m\n\u001b[1;32m 916\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n",
375
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py:134\u001b[0m, in \u001b[0;36mTracingCompiler.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock:\n\u001b[1;32m 132\u001b[0m (concrete_function,\n\u001b[1;32m 133\u001b[0m filtered_flat_args) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[0;32m--> 134\u001b[0m \u001b[39mreturn\u001b[39;00m concrete_function\u001b[39m.\u001b[39;49m_call_flat(\n\u001b[1;32m 135\u001b[0m filtered_flat_args, captured_inputs\u001b[39m=\u001b[39;49mconcrete_function\u001b[39m.\u001b[39;49mcaptured_inputs)\n",
376
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:1745\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1741\u001b[0m possible_gradient_type \u001b[39m=\u001b[39m gradients_util\u001b[39m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[1;32m 1742\u001b[0m \u001b[39mif\u001b[39;00m (possible_gradient_type \u001b[39m==\u001b[39m gradients_util\u001b[39m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[1;32m 1743\u001b[0m \u001b[39mand\u001b[39;00m executing_eagerly):\n\u001b[1;32m 1744\u001b[0m \u001b[39m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[0;32m-> 1745\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_call_outputs(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_inference_function\u001b[39m.\u001b[39;49mcall(\n\u001b[1;32m 1746\u001b[0m ctx, args, cancellation_manager\u001b[39m=\u001b[39;49mcancellation_manager))\n\u001b[1;32m 1747\u001b[0m forward_backward \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[1;32m 1748\u001b[0m args,\n\u001b[1;32m 1749\u001b[0m possible_gradient_type,\n\u001b[1;32m 1750\u001b[0m executing_eagerly)\n\u001b[1;32m 1751\u001b[0m forward_function, args_with_tangents \u001b[39m=\u001b[39m forward_backward\u001b[39m.\u001b[39mforward()\n",
377
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:378\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[39mwith\u001b[39;00m _InterpolateFunctionError(\u001b[39mself\u001b[39m):\n\u001b[1;32m 377\u001b[0m \u001b[39mif\u001b[39;00m cancellation_manager \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 378\u001b[0m outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39;49mexecute(\n\u001b[1;32m 379\u001b[0m \u001b[39mstr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msignature\u001b[39m.\u001b[39;49mname),\n\u001b[1;32m 380\u001b[0m num_outputs\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_num_outputs,\n\u001b[1;32m 381\u001b[0m inputs\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m 382\u001b[0m attrs\u001b[39m=\u001b[39;49mattrs,\n\u001b[1;32m 383\u001b[0m ctx\u001b[39m=\u001b[39;49mctx)\n\u001b[1;32m 384\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 385\u001b[0m outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39mexecute_with_cancellation(\n\u001b[1;32m 386\u001b[0m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msignature\u001b[39m.\u001b[39mname),\n\u001b[1;32m 387\u001b[0m num_outputs\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_outputs,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 390\u001b[0m ctx\u001b[39m=\u001b[39mctx,\n\u001b[1;32m 391\u001b[0m cancellation_manager\u001b[39m=\u001b[39mcancellation_manager)\n",
378
+ "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/execute.py:52\u001b[0m, in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 51\u001b[0m ctx\u001b[39m.\u001b[39mensure_initialized()\n\u001b[0;32m---> 52\u001b[0m tensors \u001b[39m=\u001b[39m pywrap_tfe\u001b[39m.\u001b[39;49mTFE_Py_Execute(ctx\u001b[39m.\u001b[39;49m_handle, device_name, op_name,\n\u001b[1;32m 53\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 54\u001b[0m \u001b[39mexcept\u001b[39;00m core\u001b[39m.\u001b[39m_NotOkStatusException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m name \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
379
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
380
+ ]
381
+ }
382
+ ],
383
+ "source": [
384
+ "model = create_model2(4)\n",
385
+ "model.summary()\n",
386
+ "history = model.fit(train_dataset,validation_data=valid_dataset,epochs=150,callbacks=[sampler, reducelr])"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": null,
392
+ "metadata": {},
393
+ "outputs": [],
394
+ "source": [
395
+ "def sample_token(logits):\n",
396
+ " logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n",
397
+ " indices = np.asarray(indices).astype(\"int32\")\n",
398
+ " preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n",
399
+ " preds = np.asarray(preds).astype(\"float32\")\n",
400
+ " return np.random.choice(indices, p=preds)\n",
401
+ "\n",
402
+ "def generate_text(prompt, response_length=50):\n",
403
+ " decoded_sample = prompt\n",
404
+ " for i in range(response_length-1):\n",
405
+ " tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n",
406
+ " predictions = model.predict([tokenized_prompt], verbose=0)\n",
407
+ " sample_index = len(decoded_sample.strip().split())-1\n",
408
+ "\n",
409
+ " sampled_token = sample_token(predictions[0][sample_index])\n",
410
+ " sampled_token = index_lookup[sampled_token]\n",
411
+ " decoded_sample += \" \" + sampled_token\n",
412
+ " return decoded_sample"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "execution_count": null,
418
+ "metadata": {},
419
+ "outputs": [],
420
+ "source": [
421
+ "import pickle\n",
422
+ "model.save(\"pd_plaintext_transformer.h5\")\n",
423
+ "pickle.dump(model, open('pd_plaintext_transformer.pkl', 'wb'))"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "execution_count": null,
429
+ "metadata": {},
430
+ "outputs": [],
431
+ "source": [
432
+ "generate_text('য়ুৰিৰ দাদাক আৰু ',response_length=50)"
433
+ ]
434
+ }
435
+ ],
436
+ "metadata": {
437
+ "kernelspec": {
438
+ "display_name": "miniproject",
439
+ "language": "python",
440
+ "name": "python3"
441
+ },
442
+ "language_info": {
443
+ "codemirror_mode": {
444
+ "name": "ipython",
445
+ "version": 3
446
+ },
447
+ "file_extension": ".py",
448
+ "mimetype": "text/x-python",
449
+ "name": "python",
450
+ "nbconvert_exporter": "python",
451
+ "pygments_lexer": "ipython3",
452
+ "version": "3.10.4"
453
+ },
454
+ "orig_nbformat": 4,
455
+ "vscode": {
456
+ "interpreter": {
457
+ "hash": "b18115e74db522ea4edaf3f03801a60154dbaca70e4a91a6289c29c6971e06fa"
458
+ }
459
+ }
460
+ },
461
+ "nbformat": 4,
462
+ "nbformat_minor": 2
463
+ }