{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "length of the longest sentence: 10\n", "no_of_sentences: 127946\n" ] } ], "source": [ "import os\n", "import random\n", "\n", "#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #disble gpu\n", "\n", "def get_text_data():\n", " sentences=[]\n", " file_name=\"cleaned_assamese_text.txt\"\n", " file=open(file_name,'r')\n", " file_sentences=file.read().split(',')\n", " sentences+=file_sentences\n", " file.close()\n", " sentences=list(filter(None,sentences))\n", " return sentences\n", "\n", "sentences=get_text_data()\n", "random.shuffle(sentences)\n", "no_of_sentences=len(sentences)\n", "text_train=sentences[:int(0.7*no_of_sentences)]\n", "text_test=sentences[int(0.7*no_of_sentences):int(0.85*no_of_sentences)]\n", "text_valid=sentences[int(0.85*no_of_sentences):]\n", "#maxlen = len(max(sentences))\n", "maxlen=10\n", "print(\"length of the longest sentence: \",maxlen)\n", "print(\"no_of_sentences: \",no_of_sentences)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-02-28 23:36:00.068548: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-28 23:36:01.115879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n", "2023-02-28 23:36:01.116220: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n", "2023-02-28 23:36:01.116238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "2023-02-28 23:36:02.603014: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:02.736211: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:02.736438: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:02.736847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-28 23:36:02.737278: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:02.737453: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:02.737574: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:03.410798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:03.410969: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:03.411092: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-02-28 23:36:03.411205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2107 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "303475\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from tensorflow.keras.layers import TextVectorization\n", "import tensorflow as tf\n", "\n", "def custom_standardization(input_string):\n", " sentence = tf.strings.lower(input_string)\n", " #sentence = tf.strings.regex_replace(sentence, \"\\n\", \" \")\n", " return sentence\n", "\n", "vectorize_layer = TextVectorization(\n", " standardize = custom_standardization,\n", " output_mode=\"int\",\n", " output_sequence_length=maxlen + 1,\n", ")\n", "\n", "vectorize_layer.adapt(sentences)\n", "vocab = vectorize_layer.get_vocabulary()\n", "\n", "vocab_size = len(vocab)\n", "print(vocab_size) # 49703\n", "vectorize_layer(['এক অনন্য মাত্ৰা প্ৰদান কৰাৰ'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "index_lookup = dict(zip(range(len(vocab)), vocab))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "batch_size = 10 #64\n", "\n", "train_dataset = tf.data.Dataset.from_tensor_slices(text_train)\n", "train_dataset = train_dataset.shuffle(buffer_size=256)\n", "train_dataset = train_dataset.batch(batch_size)\n", "\n", "test_dataset = tf.data.Dataset.from_tensor_slices(text_test)\n", "test_dataset = test_dataset.shuffle(buffer_size=256)\n", "test_dataset = test_dataset.batch(batch_size)\n", "\n", "valid_dataset = tf.data.Dataset.from_tensor_slices(text_valid)\n", "valid_dataset = valid_dataset.shuffle(buffer_size=256)\n", "valid_dataset = valid_dataset.batch(batch_size)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def preprocess_text(text):\n", " text = tf.expand_dims(text, -1)\n", " tokenized_sentences = vectorize_layer(text)\n", " x = tokenized_sentences[:, :-1]\n", " y = tokenized_sentences[:, 1:]\n", " return x, y\n", "\n", "\n", "train_dataset = train_dataset.map(preprocess_text)\n", "train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)\n", "\n", "test_dataset = test_dataset.map(preprocess_text)\n", "test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)\n", "\n", "valid_dataset = valid_dataset.map(preprocess_text)\n", "valid_dataset = valid_dataset.prefetch(tf.data.AUTOTUNE)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(, )\n" ] } ], "source": [ "for entry in train_dataset.take(1):\n", " print(entry)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import keras_nlp\n", "from tensorflow import keras\n", "\n", "embed_dim = 128\n", "num_heads = 4\n", "\n", "def create_model2(no_of_decoder=1):\n", " inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)\n", " x = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\n", " for i in range(4):\n", " x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*2, num_heads=num_heads,dropout=0.5)(x)\n", " do = keras.layers.Dropout(0.4)(x)\n", " outputs = keras.layers.Dense(vocab_size, activation='softmax')(do)\n", " \n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " model.compile(\n", " optimizer=\"adam\", \n", " loss='sparse_categorical_crossentropy',\n", " metrics=[keras_nlp.metrics.Perplexity(), 'accuracy']\n", " )\n", " return model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "class TextSampler(keras.callbacks.Callback):\n", " def __init__(self, start_prompt, max_tokens):\n", " self.start_prompt = start_prompt\n", " self.max_tokens = max_tokens\n", " \n", " # Helper method to choose a word from the top K probable words with respect to their probabilities\n", " # in a sequence\n", " def sample_token(self, logits):\n", " logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n", " indices = np.asarray(indices).astype(\"int32\")\n", " preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n", " preds = np.asarray(preds).astype(\"float32\")\n", " return np.random.choice(indices, p=preds)\n", "\n", " def on_epoch_end(self, epoch, logs=None):\n", " decoded_sample = self.start_prompt\n", " \n", " for i in range(self.max_tokens-1):\n", " tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n", " predictions = self.model.predict([tokenized_prompt], verbose=0)\n", " # To find the index of the next word in the prediction array.\n", " # The tokenized prompt is already shorter than the original decoded sample\n", " # by one, len(decoded_sample.split()) is two words ahead - so we remove 1 to get\n", " # the next word in the sequence\n", " sample_index = len(decoded_sample.strip().split())-1\n", " \n", " sampled_token = self.sample_token(predictions[0][sample_index])\n", " sampled_token = index_lookup[sampled_token]\n", " decoded_sample += \" \" + sampled_token\n", " \n", " print(f\"\\nSample text:\\n{decoded_sample}...\\n\")\n", "\n", "# First 5 words of a random sentence to be used as a seed\n", "random_sentence = ' '.join(random.choice(text_valid).replace('\\n', ' ').split(' ')[:4])\n", "sampler = TextSampler(random_sentence, 30)\n", "reducelr = keras.callbacks.ReduceLROnPlateau(patience=10, monitor='val_loss')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " input_1 (InputLayer) [(None, 10)] 0 \n", " \n", " token_and_position_embeddin (None, 10, 128) 38846080 \n", " g (TokenAndPositionEmbeddin \n", " g) \n", " \n", " transformer_decoder (Transf (None, 10, 128) 132480 \n", " ormerDecoder) \n", " \n", " transformer_decoder_1 (Tran (None, 10, 128) 132480 \n", " sformerDecoder) \n", " \n", " transformer_decoder_2 (Tran (None, 10, 128) 132480 \n", " sformerDecoder) \n", " \n", " transformer_decoder_3 (Tran (None, 10, 128) 132480 \n", " sformerDecoder) \n", " \n", " dropout (Dropout) (None, 10, 128) 0 \n", " \n", " dense (Dense) (None, 10, 303475) 39148275 \n", " \n", "=================================================================\n", "Total params: 78,524,275\n", "Trainable params: 78,524,275\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Epoch 1/150\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2023-02-28 23:36:23.887413: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n", "2023-02-28 23:36:24.308423: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7ff6d67579b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "2023-02-28 23:36:24.308518: I tensorflow/compiler/xla/service/service.cc:181] StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6\n", "2023-02-28 23:36:24.328912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", "2023-02-28 23:36:24.549826: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "3082/8957 [=========>....................] - ETA: 55:03 - loss: 5.8952 - perplexity: 363.2977 - accuracy: 0.4296" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/media/yuvrajtalukdar/New Volume/computer/undergoing_projects/AssamWiki GPT/AssameseWikiGPT.ipynb Cell 9\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m model \u001b[39m=\u001b[39m create_model2(\u001b[39m4\u001b[39m)\n\u001b[1;32m 2\u001b[0m model\u001b[39m.\u001b[39msummary()\n\u001b[0;32m----> 3\u001b[0m history \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mfit(train_dataset,validation_data\u001b[39m=\u001b[39;49mvalid_dataset,epochs\u001b[39m=\u001b[39;49m\u001b[39m150\u001b[39;49m,callbacks\u001b[39m=\u001b[39;49m[sampler, reducelr])\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/utils/traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 65\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 66\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/engine/training.py:1650\u001b[0m, in \u001b[0;36mModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1642\u001b[0m \u001b[39mwith\u001b[39;00m tf\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mexperimental\u001b[39m.\u001b[39mTrace(\n\u001b[1;32m 1643\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtrain\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 1644\u001b[0m epoch_num\u001b[39m=\u001b[39mepoch,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1647\u001b[0m _r\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m 1648\u001b[0m ):\n\u001b[1;32m 1649\u001b[0m callbacks\u001b[39m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m-> 1650\u001b[0m tmp_logs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_function(iterator)\n\u001b[1;32m 1651\u001b[0m \u001b[39mif\u001b[39;00m data_handler\u001b[39m.\u001b[39mshould_sync:\n\u001b[1;32m 1652\u001b[0m context\u001b[39m.\u001b[39masync_wait()\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 151\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:880\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 877\u001b[0m compiler \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxla\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mnonXla\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 879\u001b[0m \u001b[39mwith\u001b[39;00m OptionalXlaContext(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 880\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m 882\u001b[0m new_tracing_count \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m 883\u001b[0m without_tracing \u001b[39m=\u001b[39m (tracing_count \u001b[39m==\u001b[39m new_tracing_count)\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:912\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n\u001b[1;32m 910\u001b[0m \u001b[39m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[39m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_no_variable_creation_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds) \u001b[39m# pylint: disable=not-callable\u001b[39;00m\n\u001b[1;32m 913\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variable_creation_fn \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 914\u001b[0m \u001b[39m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[1;32m 915\u001b[0m \u001b[39m# in parallel.\u001b[39;00m\n\u001b[1;32m 916\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py:134\u001b[0m, in \u001b[0;36mTracingCompiler.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock:\n\u001b[1;32m 132\u001b[0m (concrete_function,\n\u001b[1;32m 133\u001b[0m filtered_flat_args) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[0;32m--> 134\u001b[0m \u001b[39mreturn\u001b[39;00m concrete_function\u001b[39m.\u001b[39;49m_call_flat(\n\u001b[1;32m 135\u001b[0m filtered_flat_args, captured_inputs\u001b[39m=\u001b[39;49mconcrete_function\u001b[39m.\u001b[39;49mcaptured_inputs)\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:1745\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1741\u001b[0m possible_gradient_type \u001b[39m=\u001b[39m gradients_util\u001b[39m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[1;32m 1742\u001b[0m \u001b[39mif\u001b[39;00m (possible_gradient_type \u001b[39m==\u001b[39m gradients_util\u001b[39m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[1;32m 1743\u001b[0m \u001b[39mand\u001b[39;00m executing_eagerly):\n\u001b[1;32m 1744\u001b[0m \u001b[39m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[0;32m-> 1745\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_call_outputs(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_inference_function\u001b[39m.\u001b[39;49mcall(\n\u001b[1;32m 1746\u001b[0m ctx, args, cancellation_manager\u001b[39m=\u001b[39;49mcancellation_manager))\n\u001b[1;32m 1747\u001b[0m forward_backward \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[1;32m 1748\u001b[0m args,\n\u001b[1;32m 1749\u001b[0m possible_gradient_type,\n\u001b[1;32m 1750\u001b[0m executing_eagerly)\n\u001b[1;32m 1751\u001b[0m forward_function, args_with_tangents \u001b[39m=\u001b[39m forward_backward\u001b[39m.\u001b[39mforward()\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:378\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[39mwith\u001b[39;00m _InterpolateFunctionError(\u001b[39mself\u001b[39m):\n\u001b[1;32m 377\u001b[0m \u001b[39mif\u001b[39;00m cancellation_manager \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 378\u001b[0m outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39;49mexecute(\n\u001b[1;32m 379\u001b[0m \u001b[39mstr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msignature\u001b[39m.\u001b[39;49mname),\n\u001b[1;32m 380\u001b[0m num_outputs\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_num_outputs,\n\u001b[1;32m 381\u001b[0m inputs\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m 382\u001b[0m attrs\u001b[39m=\u001b[39;49mattrs,\n\u001b[1;32m 383\u001b[0m ctx\u001b[39m=\u001b[39;49mctx)\n\u001b[1;32m 384\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 385\u001b[0m outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39mexecute_with_cancellation(\n\u001b[1;32m 386\u001b[0m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msignature\u001b[39m.\u001b[39mname),\n\u001b[1;32m 387\u001b[0m num_outputs\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_outputs,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 390\u001b[0m ctx\u001b[39m=\u001b[39mctx,\n\u001b[1;32m 391\u001b[0m cancellation_manager\u001b[39m=\u001b[39mcancellation_manager)\n", "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/execute.py:52\u001b[0m, in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 51\u001b[0m ctx\u001b[39m.\u001b[39mensure_initialized()\n\u001b[0;32m---> 52\u001b[0m tensors \u001b[39m=\u001b[39m pywrap_tfe\u001b[39m.\u001b[39;49mTFE_Py_Execute(ctx\u001b[39m.\u001b[39;49m_handle, device_name, op_name,\n\u001b[1;32m 53\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 54\u001b[0m \u001b[39mexcept\u001b[39;00m core\u001b[39m.\u001b[39m_NotOkStatusException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m name \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "model = create_model2(4)\n", "model.summary()\n", "history = model.fit(train_dataset,validation_data=valid_dataset,epochs=150,callbacks=[sampler, reducelr])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def sample_token(logits):\n", " logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n", " indices = np.asarray(indices).astype(\"int32\")\n", " preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n", " preds = np.asarray(preds).astype(\"float32\")\n", " return np.random.choice(indices, p=preds)\n", "\n", "def generate_text(prompt, response_length=50):\n", " decoded_sample = prompt\n", " for i in range(response_length-1):\n", " tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n", " predictions = model.predict([tokenized_prompt], verbose=0)\n", " sample_index = len(decoded_sample.strip().split())-1\n", "\n", " sampled_token = sample_token(predictions[0][sample_index])\n", " sampled_token = index_lookup[sampled_token]\n", " decoded_sample += \" \" + sampled_token\n", " return decoded_sample" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "model.save(\"pd_plaintext_transformer.h5\")\n", "pickle.dump(model, open('pd_plaintext_transformer.pkl', 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "generate_text('য়ুৰিৰ দাদাক আৰু ',response_length=50)" ] } ], "metadata": { "kernelspec": { "display_name": "miniproject", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b18115e74db522ea4edaf3f03801a60154dbaca70e4a91a6289c29c6971e06fa" } } }, "nbformat": 4, "nbformat_minor": 2 }