{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# installing tensorflow_text \n!pip install tensorflow_text ","metadata":{"id":"DGFTkuRvzWqc","outputId":"5cd555ad-f28d-40fc-8931-effd87043ffd","execution":{"iopub.status.busy":"2023-03-16T11:42:55.387809Z","iopub.execute_input":"2023-03-16T11:42:55.388211Z","iopub.status.idle":"2023-03-16T11:43:17.345494Z","shell.execute_reply.started":"2023-03-16T11:42:55.388174Z","shell.execute_reply":"2023-03-16T11:43:17.344235Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Requirement already satisfied: tensorflow_text in /opt/conda/lib/python3.7/site-packages (2.11.0)\nRequirement already satisfied: tensorflow-hub>=0.8.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow_text) (0.12.0)\nRequirement already satisfied: tensorflow<2.12,>=2.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow_text) (2.11.0)\nRequirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.51.1)\nRequirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.16.0)\nRequirement already satisfied: libclang>=13.0.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (15.0.6.1)\nRequirement already satisfied: flatbuffers>=2.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (23.1.21)\nRequirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (59.8.0)\nRequirement already satisfied: keras<2.12,>=2.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (2.11.0)\nRequirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (0.29.0)\nRequirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (3.3.0)\nRequirement already satisfied: absl-py>=1.0.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.4.0)\nRequirement already satisfied: gast<=0.4.0,>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (0.4.0)\nRequirement already satisfied: tensorflow-estimator<2.12,>=2.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (2.11.0)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (23.0)\nRequirement already satisfied: wrapt>=1.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.14.1)\nRequirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.6.3)\nRequirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (3.8.0)\nRequirement already satisfied: tensorboard<2.12,>=2.11 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (2.11.2)\nCollecting protobuf<3.20,>=3.9.2\n Downloading protobuf-3.19.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (4.4.0)\nRequirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (2.2.0)\nRequirement already satisfied: numpy>=1.20 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (1.21.6)\nRequirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow<2.12,>=2.11.0->tensorflow_text) (0.2.0)\nRequirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.7/site-packages (from astunparse>=1.6.0->tensorflow<2.12,>=2.11.0->tensorflow_text) (0.38.4)\nRequirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (1.35.0)\nRequirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (0.6.1)\nRequirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (1.8.1)\nRequirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (3.4.1)\nRequirement already satisfied: werkzeug>=1.0.1 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (2.2.3)\nRequirement already satisfied: requests<3,>=2.21.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (2.28.2)\nRequirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (0.4.6)\nRequirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (0.2.8)\nRequirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (4.9)\nRequirement already satisfied: cachetools<5.0,>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (4.2.4)\nRequirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (1.3.1)\nRequirement already satisfied: importlib-metadata>=4.4 in /opt/conda/lib/python3.7/site-packages (from markdown>=2.6.8->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (4.11.4)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (1.26.14)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (2022.12.7)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (3.4)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (2.1.1)\nRequirement already satisfied: MarkupSafe>=2.1.1 in /opt/conda/lib/python3.7/site-packages (from werkzeug>=1.0.1->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (2.1.1)\nRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (3.11.0)\nRequirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (0.4.8)\nRequirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.12,>=2.11->tensorflow<2.12,>=2.11.0->tensorflow_text) (3.2.2)\nInstalling collected packages: protobuf\n Attempting uninstall: protobuf\n Found existing installation: protobuf 3.20.3\n Uninstalling protobuf-3.20.3:\n Successfully uninstalled protobuf-3.20.3\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ncudf 21.12.2 requires cupy-cuda115, which is not installed.\ntfx-bsl 1.12.0 requires google-api-python-client<2,>=1.7.11, but you have google-api-python-client 2.79.0 which is incompatible.\ntfx-bsl 1.12.0 requires pyarrow<7,>=6, but you have pyarrow 5.0.0 which is incompatible.\ntensorflow-transform 1.12.0 requires pyarrow<7,>=6, but you have pyarrow 5.0.0 which is incompatible.\nonnx 1.13.1 requires protobuf<4,>=3.20.2, but you have protobuf 3.19.6 which is incompatible.\napache-beam 2.44.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.6 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed protobuf-3.19.6\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"# importing required libraries\nimport numpy as np\n\nimport typing\nfrom typing import Any, Tuple\n\nimport tensorflow as tf\n\nimport tensorflow_text as tf_text\n\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker","metadata":{"id":"tnxXKDjq3jEL","execution":{"iopub.status.busy":"2023-03-16T11:43:17.349024Z","iopub.execute_input":"2023-03-16T11:43:17.349386Z","iopub.status.idle":"2023-03-16T11:43:24.129905Z","shell.execute_reply.started":"2023-03-16T11:43:17.349353Z","shell.execute_reply":"2023-03-16T11:43:24.128829Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"# using builtin implementations\nuse_builtins = True","metadata":{"id":"KPJ9J7iPUchc","execution":{"iopub.status.busy":"2023-03-16T11:43:24.131282Z","iopub.execute_input":"2023-03-16T11:43:24.132076Z","iopub.status.idle":"2023-03-16T11:43:24.137016Z","shell.execute_reply.started":"2023-03-16T11:43:24.132030Z","shell.execute_reply":"2023-03-16T11:43:24.135628Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"#@title Shape checker\nclass ShapeChecker():\n def __init__(self):\n # Keep a cache of every axis-name seen\n self.shapes = {}\n\n def __call__(self, tensor, names, broadcast=False):\n if not tf.executing_eagerly():\n return\n\n if isinstance(names, str):\n names = (names,)\n\n shape = tf.shape(tensor)\n rank = tf.rank(tensor)\n\n if rank != len(names):\n raise ValueError(f'Rank mismatch:\\n'\n f' found {rank}: {shape.numpy()}\\n'\n f' expected {len(names)}: {names}\\n')\n\n for i, name in enumerate(names):\n if isinstance(name, int):\n old_dim = name\n else:\n old_dim = self.shapes.get(name, None)\n new_dim = shape[i]\n\n if (broadcast and new_dim == 1):\n continue\n\n if old_dim is None:\n # If the axis name is new, add its length to the cache.\n self.shapes[name] = new_dim\n continue\n\n if new_dim != old_dim:\n raise ValueError(f\"Shape mismatch for dimension: '{name}'\\n\"\n f\" found: {new_dim}\\n\"\n f\" expected: {old_dim}\\n\")","metadata":{"id":"KqFqKi4fqN9X","execution":{"iopub.status.busy":"2023-03-16T11:43:24.141505Z","iopub.execute_input":"2023-03-16T11:43:24.142344Z","iopub.status.idle":"2023-03-16T11:43:24.155793Z","shell.execute_reply.started":"2023-03-16T11:43:24.142304Z","shell.execute_reply":"2023-03-16T11:43:24.154779Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"!pip install Metaphone\nfrom metaphone import doublemetaphone\nimport pandas as pd\nimport re","metadata":{"id":"rOduTbljY-4e","outputId":"b95ed0e1-fb62-464b-f0a4-738b6ffc11a2","execution":{"iopub.status.busy":"2023-03-16T11:43:24.157133Z","iopub.execute_input":"2023-03-16T11:43:24.158036Z","iopub.status.idle":"2023-03-16T11:43:35.424695Z","shell.execute_reply.started":"2023-03-16T11:43:24.157970Z","shell.execute_reply":"2023-03-16T11:43:35.423443Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"Collecting Metaphone\n Downloading Metaphone-0.6.tar.gz (14 kB)\n Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hBuilding wheels for collected packages: Metaphone\n Building wheel for Metaphone (setup.py) ... \u001b[?25ldone\n\u001b[?25h Created wheel for Metaphone: filename=Metaphone-0.6-py3-none-any.whl size=13919 sha256=0ea3a5d91487d74185eb933acaaff4b0ad7165cf87bdbc87c56a244527ff6eb8\n Stored in directory: /root/.cache/pip/wheels/3a/2c/57/cb7d38aa6c80a870a0341ccf4265c2921fb55b1f7f4b76b46b\nSuccessfully built Metaphone\nInstalling collected packages: Metaphone\nSuccessfully installed Metaphone-0.6\n\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"data1=pd.read_csv(\"/kaggle/input/google-reasearch/train.tsv\",sep='\\t')\ndata1","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:35.428421Z","iopub.execute_input":"2023-03-16T11:43:35.428743Z","iopub.status.idle":"2023-03-16T11:43:36.846977Z","shell.execute_reply.started":"2023-03-16T11:43:35.428710Z","shell.execute_reply":"2023-03-16T11:43:36.845910Z"},"trusted":true},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":" en_query \\\n0 Who is the featured performer at The Grand Ole... \n1 Remind me of my meeting at 12 am \n2 set an alarm for 5 am \n3 What ’ s will weather be like at Sydney at 2 pm \n4 What is the weather in Canada ? \n... ... \n170078 How many miles from Humble to Brenham \n170079 What is traffic so bad on Ronald Reagan \n170080 Concerts by The Strokes for this weekend \n170081 How many mm of rain do we expect for tomorrow ? \n170082 cancel all alarms \n\n cs_query \\\n0 Is Saturday night The Grand Ole Opry me featur... \n1 mujhe 12 am ko meri meeting ke baare me yaad d... \n2 subha 5 baje ke liye ek alarm set karen \n3 dopaher 2 baje Sydney me mausam kaisa hoga \n4 Canada me mausam kaisa hai ? \n... ... \n170078 Humble se Brenham tak kitne miles hai \n170079 Ronald Reagan par traffic kyu kharaab hai \n170080 Is weekend The Strokes ke hone wale Concerts \n170081 kal kitni mm baarish ki ummeed hai ? \n170082 sarey alarms cance kare \n\n en_parses \\\n0 [IN:GET_EVENT Who is [SL:CATEGORY_EVENT the fe... \n1 [IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED... \n2 [IN:CREATE_ALARM set an alarm [SL:DATE_TIME fo... \n3 [IN:GET_WEATHER What ’s will weather be like a... \n4 [IN:GET_WEATHER What is the weather in [SL:LOC... \n... ... \n170078 [IN:GET_DISTANCE How many [SL:UNIT_DISTANCE mi... \n170079 [IN:GET_INFO_TRAFFIC What is traffic so bad on... \n170080 [IN:GET_EVENT [SL:CATEGORY_EVENT Concerts ] by... \n170081 [IN:UNSUPPORTED_WEATHER How many mm of [SL:WEA... \n170082 [IN:DELETE_ALARM cancel [SL:AMOUNT all ] alarms ] \n\n cs_parses \n0 [IN:GET_EVENT [SL:DATE_TIME Is Saturday night ... \n1 [IN:CREATE_REMINDER [SL:PERSON_REMINDED mujhe ... \n2 [IN:CREATE_ALARM [SL:DATE_TIME subha 5 baje ke... \n3 [IN:GET_WEATHER [SL:DATE_TIME dopaher 2 baje ]... \n4 [IN:GET_WEATHER [SL:LOCATION Canada ] me mausa... \n... ... \n170078 [IN:GET_DISTANCE [SL:SOURCE Humble ] se [SL:DE... \n170079 [IN:GET_INFO_TRAFFIC [SL:LOCATION Ronald Reaga... \n170080 [IN:GET_EVENT [SL:DATE_TIME Is weekend ] [SL:N... \n170081 [IN:UNSUPPORTED_WEATHER kal kitni mm [SL:WEATH... \n170082 [IN:DELETE_ALARM [SL:AMOUNT sarey ] alarms can... \n\n[170083 rows x 4 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
en_querycs_queryen_parsescs_parses
0Who is the featured performer at The Grand Ole...Is Saturday night The Grand Ole Opry me featur...[IN:GET_EVENT Who is [SL:CATEGORY_EVENT the fe...[IN:GET_EVENT [SL:DATE_TIME Is Saturday night ...
1Remind me of my meeting at 12 ammujhe 12 am ko meri meeting ke baare me yaad d...[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...[IN:CREATE_REMINDER [SL:PERSON_REMINDED mujhe ...
2set an alarm for 5 amsubha 5 baje ke liye ek alarm set karen[IN:CREATE_ALARM set an alarm [SL:DATE_TIME fo...[IN:CREATE_ALARM [SL:DATE_TIME subha 5 baje ke...
3What ’ s will weather be like at Sydney at 2 pmdopaher 2 baje Sydney me mausam kaisa hoga[IN:GET_WEATHER What ’s will weather be like a...[IN:GET_WEATHER [SL:DATE_TIME dopaher 2 baje ]...
4What is the weather in Canada ?Canada me mausam kaisa hai ?[IN:GET_WEATHER What is the weather in [SL:LOC...[IN:GET_WEATHER [SL:LOCATION Canada ] me mausa...
...............
170078How many miles from Humble to BrenhamHumble se Brenham tak kitne miles hai[IN:GET_DISTANCE How many [SL:UNIT_DISTANCE mi...[IN:GET_DISTANCE [SL:SOURCE Humble ] se [SL:DE...
170079What is traffic so bad on Ronald ReaganRonald Reagan par traffic kyu kharaab hai[IN:GET_INFO_TRAFFIC What is traffic so bad on...[IN:GET_INFO_TRAFFIC [SL:LOCATION Ronald Reaga...
170080Concerts by The Strokes for this weekendIs weekend The Strokes ke hone wale Concerts[IN:GET_EVENT [SL:CATEGORY_EVENT Concerts ] by...[IN:GET_EVENT [SL:DATE_TIME Is weekend ] [SL:N...
170081How many mm of rain do we expect for tomorrow ?kal kitni mm baarish ki ummeed hai ?[IN:UNSUPPORTED_WEATHER How many mm of [SL:WEA...[IN:UNSUPPORTED_WEATHER kal kitni mm [SL:WEATH...
170082cancel all alarmssarey alarms cance kare[IN:DELETE_ALARM cancel [SL:AMOUNT all ] alarms ][IN:DELETE_ALARM [SL:AMOUNT sarey ] alarms can...
\n

170083 rows × 4 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"data1['en_query']=data1['en_query'].apply(lambda x: str(x.strip()).lower())\ndata1['cs_query']=data1['cs_query'].apply(lambda x: str(x.strip()).lower())","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:36.848736Z","iopub.execute_input":"2023-03-16T11:43:36.849470Z","iopub.status.idle":"2023-03-16T11:43:37.029500Z","shell.execute_reply.started":"2023-03-16T11:43:36.849430Z","shell.execute_reply":"2023-03-16T11:43:37.028424Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"data1=data1[['cs_query','en_query']]\ndata1.rename(columns = {'cs_query':'Sentence','en_query':'trr'},inplace=True)","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.031152Z","iopub.execute_input":"2023-03-16T11:43:37.031552Z","iopub.status.idle":"2023-03-16T11:43:37.051279Z","shell.execute_reply.started":"2023-03-16T11:43:37.031512Z","shell.execute_reply":"2023-03-16T11:43:37.049916Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:5047: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n errors=errors,\n","output_type":"stream"}]},{"cell_type":"code","source":"data1","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.052792Z","iopub.execute_input":"2023-03-16T11:43:37.053596Z","iopub.status.idle":"2023-03-16T11:43:37.066177Z","shell.execute_reply.started":"2023-03-16T11:43:37.053557Z","shell.execute_reply":"2023-03-16T11:43:37.064925Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":" Sentence \\\n0 is saturday night the grand ole opry me featur... \n1 mujhe 12 am ko meri meeting ke baare me yaad d... \n2 subha 5 baje ke liye ek alarm set karen \n3 dopaher 2 baje sydney me mausam kaisa hoga \n4 canada me mausam kaisa hai ? \n... ... \n170078 humble se brenham tak kitne miles hai \n170079 ronald reagan par traffic kyu kharaab hai \n170080 is weekend the strokes ke hone wale concerts \n170081 kal kitni mm baarish ki ummeed hai ? \n170082 sarey alarms cance kare \n\n trr \n0 who is the featured performer at the grand ole... \n1 remind me of my meeting at 12 am \n2 set an alarm for 5 am \n3 what ’ s will weather be like at sydney at 2 pm \n4 what is the weather in canada ? \n... ... \n170078 how many miles from humble to brenham \n170079 what is traffic so bad on ronald reagan \n170080 concerts by the strokes for this weekend \n170081 how many mm of rain do we expect for tomorrow ? \n170082 cancel all alarms \n\n[170083 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sentencetrr
0is saturday night the grand ole opry me featur...who is the featured performer at the grand ole...
1mujhe 12 am ko meri meeting ke baare me yaad d...remind me of my meeting at 12 am
2subha 5 baje ke liye ek alarm set karenset an alarm for 5 am
3dopaher 2 baje sydney me mausam kaisa hogawhat ’ s will weather be like at sydney at 2 pm
4canada me mausam kaisa hai ?what is the weather in canada ?
.........
170078humble se brenham tak kitne miles haihow many miles from humble to brenham
170079ronald reagan par traffic kyu kharaab haiwhat is traffic so bad on ronald reagan
170080is weekend the strokes ke hone wale concertsconcerts by the strokes for this weekend
170081kal kitni mm baarish ki ummeed hai ?how many mm of rain do we expect for tomorrow ?
170082sarey alarms cance karecancel all alarms
\n

170083 rows × 2 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"data=pd.read_csv(\"/kaggle/input/file-2/file2.csv\")\ndata","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.071478Z","iopub.execute_input":"2023-03-16T11:43:37.071828Z","iopub.status.idle":"2023-03-16T11:43:37.179204Z","shell.execute_reply.started":"2023-03-16T11:43:37.071794Z","shell.execute_reply":"2023-03-16T11:43:37.178042Z"},"trusted":true},"execution_count":10,"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":" Sentence \\\n0 congratulations on you celebrating british kid... \n1 uske liye toh bahot kuch karna padega ye pappi... \n2 yehi to hum semjhane ki koshish kar rahe hain.... \n3 cake kaha hai ?? \n4 jeet ka jashn aur shubah ki shuruat eating bre... \n... ... \n12143 dr kumar vishwas: \"koi deewana kehta hai.. koi... \n12144 me: aaj kuch toofani karte hai.mom: pani ki bo... \n12145 pyar mangi to jaan dengi,milk mango to kher de... \n12146 kaale kaale baal gaal gore gore \n12147 ye sab aunty'on ke saath? \n\n English_Translation \\\n0 users congratulate you for celebrating British... \n1 rat we should a lot more for that, by this evi... \n2 ehi, this is what i'm expecting you to underst... \n3 is Dramebajakudi where is the cake? \n4 the celebration of a victory and the start of ... \n... ... \n12143 Dr Kumar Vishwas: \"Some used to say lover.. So... \n12144 Me: Let's do some stormy today.Mom: Fill the w... \n12145 Ask for love we'll give life, ask for milk we'... \n12146 black hair cheeks fair fair \n12147 All this with aunties? \n\n trr \\\n0 congratulations on you celebrating british kid... \n1 you will have to do a lot for that, it will no... \n2 this is what we are trying to understand. peop... \n3 where is the cake?? \n4 victory celebration and early morning eating b... \n... ... \n12143 dr. kumar vishwas: \"some say crazy..some think... \n12144 me: let's do something stormy today. mother: f... \n12145 if you ask for love, you will give your life, ... \n12146 kale kale bal gal gore gore \n12147 all this with the aunties? \n\n tr \n0 congratulations on you celebrating british kid... \n1 you will have to do a lot for that, it will no... \n2 this is what we are trying to understand. peop... \n3 where is the cake?? \n4 victory celebration and early morning eating b... \n... ... \n12143 dr. kumar vishwas: \"some say crazy..some think... \n12144 me: let's do something stormy today. mother: f... \n12145 if you ask for love, you will give your life, ... \n12146 kale kale bal gal gore gore \n12147 all this with the aunties? \n\n[12148 rows x 4 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SentenceEnglish_Translationtrrtr
0congratulations on you celebrating british kid...users congratulate you for celebrating British...congratulations on you celebrating british kid...congratulations on you celebrating british kid...
1uske liye toh bahot kuch karna padega ye pappi...rat we should a lot more for that, by this evi...you will have to do a lot for that, it will no...you will have to do a lot for that, it will no...
2yehi to hum semjhane ki koshish kar rahe hain....ehi, this is what i'm expecting you to underst...this is what we are trying to understand. peop...this is what we are trying to understand. peop...
3cake kaha hai ??is Dramebajakudi where is the cake?where is the cake??where is the cake??
4jeet ka jashn aur shubah ki shuruat eating bre...the celebration of a victory and the start of ...victory celebration and early morning eating b...victory celebration and early morning eating b...
...............
12143dr kumar vishwas: \"koi deewana kehta hai.. koi...Dr Kumar Vishwas: \"Some used to say lover.. So...dr. kumar vishwas: \"some say crazy..some think...dr. kumar vishwas: \"some say crazy..some think...
12144me: aaj kuch toofani karte hai.mom: pani ki bo...Me: Let's do some stormy today.Mom: Fill the w...me: let's do something stormy today. mother: f...me: let's do something stormy today. mother: f...
12145pyar mangi to jaan dengi,milk mango to kher de...Ask for love we'll give life, ask for milk we'...if you ask for love, you will give your life, ...if you ask for love, you will give your life, ...
12146kaale kaale baal gaal gore goreblack hair cheeks fair fairkale kale bal gal gore gorekale kale bal gal gore gore
12147ye sab aunty'on ke saath?All this with aunties?all this with the aunties?all this with the aunties?
\n

12148 rows × 4 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"data=data[['Sentence','trr']]\ndata","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.180595Z","iopub.execute_input":"2023-03-16T11:43:37.181056Z","iopub.status.idle":"2023-03-16T11:43:37.196234Z","shell.execute_reply.started":"2023-03-16T11:43:37.181017Z","shell.execute_reply":"2023-03-16T11:43:37.194971Z"},"trusted":true},"execution_count":11,"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":" Sentence \\\n0 congratulations on you celebrating british kid... \n1 uske liye toh bahot kuch karna padega ye pappi... \n2 yehi to hum semjhane ki koshish kar rahe hain.... \n3 cake kaha hai ?? \n4 jeet ka jashn aur shubah ki shuruat eating bre... \n... ... \n12143 dr kumar vishwas: \"koi deewana kehta hai.. koi... \n12144 me: aaj kuch toofani karte hai.mom: pani ki bo... \n12145 pyar mangi to jaan dengi,milk mango to kher de... \n12146 kaale kaale baal gaal gore gore \n12147 ye sab aunty'on ke saath? \n\n trr \n0 congratulations on you celebrating british kid... \n1 you will have to do a lot for that, it will no... \n2 this is what we are trying to understand. peop... \n3 where is the cake?? \n4 victory celebration and early morning eating b... \n... ... \n12143 dr. kumar vishwas: \"some say crazy..some think... \n12144 me: let's do something stormy today. mother: f... \n12145 if you ask for love, you will give your life, ... \n12146 kale kale bal gal gore gore \n12147 all this with the aunties? \n\n[12148 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sentencetrr
0congratulations on you celebrating british kid...congratulations on you celebrating british kid...
1uske liye toh bahot kuch karna padega ye pappi...you will have to do a lot for that, it will no...
2yehi to hum semjhane ki koshish kar rahe hain....this is what we are trying to understand. peop...
3cake kaha hai ??where is the cake??
4jeet ka jashn aur shubah ki shuruat eating bre...victory celebration and early morning eating b...
.........
12143dr kumar vishwas: \"koi deewana kehta hai.. koi...dr. kumar vishwas: \"some say crazy..some think...
12144me: aaj kuch toofani karte hai.mom: pani ki bo...me: let's do something stormy today. mother: f...
12145pyar mangi to jaan dengi,milk mango to kher de...if you ask for love, you will give your life, ...
12146kaale kaale baal gaal gore gorekale kale bal gal gore gore
12147ye sab aunty'on ke saath?all this with the aunties?
\n

12148 rows × 2 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"file = open('/kaggle/input/anuvaad/anuvaad.txt','r')\n# print(file.read())\n# lst=targ, lst2=inp\nfileStrings = file.readlines()\ntarg = []\ninp = []\nfor line in fileStrings:\n # print(line)\n line = line.replace('\\n','')\n lstO = line.split('\\t')\n targ.append(lstO[0])\n inp.append(lstO[1])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.197815Z","iopub.execute_input":"2023-03-16T11:43:37.198915Z","iopub.status.idle":"2023-03-16T11:43:37.212932Z","shell.execute_reply.started":"2023-03-16T11:43:37.198867Z","shell.execute_reply":"2023-03-16T11:43:37.211924Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"data_anu=pd.DataFrame(data={'Sentence':inp,'trr':targ})\ndata_anu","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.216354Z","iopub.execute_input":"2023-03-16T11:43:37.216636Z","iopub.status.idle":"2023-03-16T11:43:37.233188Z","shell.execute_reply.started":"2023-03-16T11:43:37.216609Z","shell.execute_reply":"2023-03-16T11:43:37.232043Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":" Sentence \\\n0 Come on boys, waqt hai shine karne ka \n1 Yeh heart Maange More! \n2 She was bhunno-ing the masala-s jub phone ki g... \n3 You know me na... \n4 Woh to mai sambhal lunga \n... ... \n1299 muze vo bohot pasand hai \n1300 mujhe psnd hai \n1301 mujhe market se vegetables lekar aane hain \n1302 sabse best friend \n1303 best friend sabse \n\n trr \n0 Come on boys, it is time to shine. \n1 This heart asks for more! \n2 She was roasting the spices when the phone rang \n3 You know me right \n4 I will handle that \n... ... \n1299 I like it a lot \n1300 I like it a lot \n1301 I have to bring vegetables from the market \n1302 Most best friend \n1303 most best friend \n\n[1304 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sentencetrr
0Come on boys, waqt hai shine karne kaCome on boys, it is time to shine.
1Yeh heart Maange More!This heart asks for more!
2She was bhunno-ing the masala-s jub phone ki g...She was roasting the spices when the phone rang
3You know me na...You know me right
4Woh to mai sambhal lungaI will handle that
.........
1299muze vo bohot pasand haiI like it a lot
1300mujhe psnd haiI like it a lot
1301mujhe market se vegetables lekar aane hainI have to bring vegetables from the market
1302sabse best friendMost best friend
1303best friend sabsemost best friend
\n

1304 rows × 2 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_anu['Sentence']=data_anu['Sentence'].apply(lambda x: str(x.strip()).lower())\ndata_anu['trr']=data_anu['trr'].apply(lambda x: str(x.strip()).lower())","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.236242Z","iopub.execute_input":"2023-03-16T11:43:37.236568Z","iopub.status.idle":"2023-03-16T11:43:37.245140Z","shell.execute_reply.started":"2023-03-16T11:43:37.236540Z","shell.execute_reply":"2023-03-16T11:43:37.244018Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"df=pd.concat([data,data1,data_anu],axis=0)\ndf","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.246257Z","iopub.execute_input":"2023-03-16T11:43:37.246745Z","iopub.status.idle":"2023-03-16T11:43:37.272633Z","shell.execute_reply.started":"2023-03-16T11:43:37.246706Z","shell.execute_reply":"2023-03-16T11:43:37.271664Z"},"trusted":true},"execution_count":15,"outputs":[{"execution_count":15,"output_type":"execute_result","data":{"text/plain":" Sentence \\\n0 congratulations on you celebrating british kid... \n1 uske liye toh bahot kuch karna padega ye pappi... \n2 yehi to hum semjhane ki koshish kar rahe hain.... \n3 cake kaha hai ?? \n4 jeet ka jashn aur shubah ki shuruat eating bre... \n... ... \n1299 muze vo bohot pasand hai \n1300 mujhe psnd hai \n1301 mujhe market se vegetables lekar aane hain \n1302 sabse best friend \n1303 best friend sabse \n\n trr \n0 congratulations on you celebrating british kid... \n1 you will have to do a lot for that, it will no... \n2 this is what we are trying to understand. peop... \n3 where is the cake?? \n4 victory celebration and early morning eating b... \n... ... \n1299 i like it a lot \n1300 i like it a lot \n1301 i have to bring vegetables from the market \n1302 most best friend \n1303 most best friend \n\n[183535 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sentencetrr
0congratulations on you celebrating british kid...congratulations on you celebrating british kid...
1uske liye toh bahot kuch karna padega ye pappi...you will have to do a lot for that, it will no...
2yehi to hum semjhane ki koshish kar rahe hain....this is what we are trying to understand. peop...
3cake kaha hai ??where is the cake??
4jeet ka jashn aur shubah ki shuruat eating bre...victory celebration and early morning eating b...
.........
1299muze vo bohot pasand haii like it a lot
1300mujhe psnd haii like it a lot
1301mujhe market se vegetables lekar aane haini have to bring vegetables from the market
1302sabse best friendmost best friend
1303best friend sabsemost best friend
\n

183535 rows × 2 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"hin_vocab=dict()\nfor i in df['Sentence'].apply(lambda x: re.sub(\"[.!?\\\\-\\'\\\"]\", \"\", x)):\n for j in i.split(' '):\n hin_vocab[doublemetaphone(j)[0]+'*'+doublemetaphone(j[::-1])[0]+'*'+j[:2]+'*'+j[len(j)-1:]]=j\nfor i in hin_vocab:\n a=set()\n a.add(hin_vocab[i])\n hin_vocab[i]=a\nfor i in df['Sentence'].apply(lambda x: re.sub(\"[.!?\\\\-\\'\\\"]\", \"\", x)):\n for j in i.split(' '):\n hin_vocab[doublemetaphone(j)[0]+'*'+doublemetaphone(j[::-1])[0]+'*'+j[:2]+'*'+j[len(j)-1:]].add(j)\nprint(len(hin_vocab))\nhin_vocab","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:43:37.274284Z","iopub.execute_input":"2023-03-16T11:43:37.274892Z","iopub.status.idle":"2023-03-16T11:45:07.573197Z","shell.execute_reply.started":"2023-03-16T11:43:37.274851Z","shell.execute_reply":"2023-03-16T11:45:07.572045Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"26683\n","output_type":"stream"},{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"{'KNKRTLXNS*SNTLTRNK*co*s': {'congratulations'},\n 'AN*N*on*n': {'on'},\n 'A*A*yo*u': {'you', 'youuuuu', 'youuuuuu'},\n 'SLPRTNK*NTRPLK*ce*g': {'celebrating'},\n 'PRTX*STRP*br*h': {'british'},\n 'KT*TK*ki*d': {'kid', 'kidd'},\n 'SNKRS*SRNS*si*s': {'singers'},\n 'SF*APS*so*a': {'sophia'},\n 'KRSS*SKRK*gr*s': {'graces', 'grocess'},\n 'ANT*TN*an*d': {'and'},\n 'RSS*SSR*ro*s': {'roses', 'rosies'},\n 'ST*TS*st*t': {'st'},\n 'ANFRSR*ARSRFN*an*y': {'anniversary'},\n 'AF*F*of*f': {'of', 'off'},\n 'A*A*a*a': {'a'},\n 'FST*TSF*vi*t': {'visit', 'vist'},\n 'AR*R*yo*r': {'yor', 'your'},\n 'X*AS*sh*w': {'shaw', 'show'},\n '***': {''},\n 'H*A*ho*w': {'how'},\n 'ASK*AKS*us*e': {'uske'},\n 'L*AL*li*e': {'lie', 'liye', 'liyee', 'liyeee', 'liyye'},\n 'T*HT*to*h': {'toh'},\n 'PHT*THP*ba*t': {'bahat', 'bahaut', 'bahot', 'bahuat', 'bahut'},\n 'KX*KK*ku*h': {'kucch', 'kucchh', 'kuch'},\n 'KRN*ANRK*ka*a': {'karana', 'kareena', 'karna', 'karwana'},\n 'PTK*AJTP*pa*a': {'padega', 'padhega', 'padhiyega'},\n 'A*A*ye*e': {'ye', 'yee', 'yeeeeee'},\n 'PPN*NPP*pa*n': {'paapon', 'pappiyan', 'pappiyon'},\n 'S*AS*se*e': {'se', 'see', 'sewee'},\n 'KM*MK*ka*m': {'kaam', 'kaayam', 'kam', 'kamm', 'kaum', 'kayam'},\n 'NH*AHN*na*i': {'naahi', 'nahi'},\n 'XLK*AJLK*ch*a': {'chalayega', 'chalega', 'chilayega'},\n 'AH*AH*ye*i': {'yehi'},\n 'T*AT*to*o': {'to', 'too', 'tooo'},\n 'HM*M*hu*m': {'hum'},\n 'SMJN*ANMS*se*e': {'semjhane'},\n 'K*AK*ki*i': {'ki', 'kii', 'kiwi'},\n 'KXX*SSK*ko*h': {'koshish', 'koshishh'},\n 'KR*RK*ka*r': {'kaar', 'kar', 'karr', 'kaur'},\n 'RH*AHR*ra*e': {'rahe', 'rahee', 'rahiye'},\n 'HN*N*ha*n': {'haan',\n 'haann',\n 'hain',\n 'han',\n 'hawaiian',\n 'hawaiyan',\n 'hawaon',\n 'hawayein',\n 'hawon'},\n 'LK*KL*lo*g': {'log', 'loog'},\n 'SP*PS*sa*b': {'saab', 'sab'},\n 'K*AK*ko*o': {'ko', 'koo'},\n 'AS*AS*is*i': {'isaai', 'isi', 'issi'},\n 'MN*NM*me*n': {'mean', 'mein', 'men', 'meun'},\n 'TL*LT*to*l': {'tol', 'toll', 'tool', 'towel'},\n 'TT*ATT*de*e': {'deate', 'dede', 'dete'},\n 'KNTKSTT*TKSTNK*co*:': {'context:'},\n 'KK*AKK*ca*e': {'cake'},\n 'KH*AHK*ka*a': {'kaha', 'kahaa'},\n 'H*A*ha*i': {'hai',\n 'haii',\n 'haiii',\n 'haiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii',\n 'hawai',\n 'hawaii'},\n 'JT*TJ*je*t': {'jeet', 'jet', 'jett'},\n 'K*AK*ka*a': {'ka', 'kaa', 'kauwa', 'kauwaa', 'kaya'},\n 'JXN*NSJ*ja*n': {'jashn'},\n 'AR*R*au*r': {'aur'},\n 'XP*HPS*sh*h': {'shubah'},\n 'XRT*TRS*sh*t': {'shart',\n 'shirt',\n 'shohrat',\n 'short',\n 'shroowat',\n 'shuruaat',\n 'shuruat'},\n 'ATNK*NT*ea*g': {'eating'},\n 'PRT*TRP*br*d': {'brad', 'bread', 'breed', 'brid', 'broad'},\n 'PKT*ATKP*pa*a': {'pakadta', 'pakata', 'pakda', 'pakoda'},\n 'AT*T*at*t': {'at', 'att'},\n 'TNTNS*SNTNT*ta*s': {'tandons'},\n 'KTJ*AKTK*co*e': {'cottage'},\n 'FXL*ALSF*va*i': {'vaishali'},\n 'TM*HMT*da*h': {'damoh'},\n 'SK*AKS*so*a': {'soca', 'sookha'},\n 'PR*ARP*pu*i': {'puri', 'puuri'},\n 'TN*ANT*de*a': {'deana', 'deanna', 'deewana', 'deewaniya', 'dena', 'denia'},\n 'MTLP*PLTM*ma*b': {'matlab', 'matlb'},\n 'TM*MT*tu*m': {'tum'},\n 'KRXM*AMSRK*ka*a': {'karishma'},\n 'NHN*NHN*na*n': {'nahin'},\n 'RFN*ANFR*ra*a': {'ravana', 'raveena', 'ravenna', 'ravinia'},\n 'H*A*ho*o': {'ho', 'hoo', 'hooo', 'hooooo'},\n 'A*H*wo*h': {'woh'},\n 'J*AJ*jo*o': {'jo', 'joao', 'joo'},\n '0*T*th*,': {'tha,', 'thaa,', 'the,', 'thi,'},\n 'AP*P*ab*b': {'ab', 'abb'},\n 'KHN*NHK*ka*n': {'kahaaaan',\n 'kahaan',\n 'kahaen',\n 'kahan',\n 'kaheen',\n 'kahein',\n 'kahen',\n 'kahin',\n 'kahun'},\n 'SK*AKS*sa*i': {'saakhi', 'sachai', 'sachhai', 'sachhi', 'sakai', 'saki'},\n 'HKNK*AKNK*ho*o': {'hogayi*aankho'},\n 'ANS*SN*aa**': {'aansoo*', 'aansu*'},\n 'SH*AHS*sa*i': {'sahi'},\n 'M*AM*me*e': {'me', 'mee'},\n 'AR*R*ya*r': {'yaaar', 'yaar', 'yar', 'yarr'},\n 'KN*NK*ka*n': {'kaan', 'kahn', 'kan', 'kaun', 'kawn', 'kayon'},\n 'SLL*LLS*za*l': {'zaleel'},\n 'ANSN*NSN*in*n': {'insaan', 'insan'},\n 'HNN*N*ha*,': {'haan,', 'hain,'},\n 'KTK*AKTK*kh*i': {'khidki'},\n 'KL*ALK*kh*i': {'khaali', 'khali', 'kheli', 'kholi', 'khuli'},\n '0*AT*th*i': {'thai', 'the/hai', 'thei', 'thi', 'thii'},\n 'JSMN*NMSJ*ji*n': {'jismein'},\n 'PPR*ARPP*ba*o': {'baburao'},\n 'JNK*NJ*jh*k': {'jhaank'},\n 'RH*AHR*ra*a': {'raha', 'rahaa'},\n '0*AT*th*a': {'tha', 'thaa', 'thha'},\n 'KNFS*SFNK*co*s': {'confess'},\n 'KRT*ATRK*ka*e': {'karate',\n 'karatee',\n 'karde',\n 'karode',\n 'karte',\n 'karwaate',\n 'karwate'},\n 'A*A*ya*a': {'ya', 'yaa'},\n 'TRTR*ARTRT*to*e': {'torture'},\n 'ATN*ANT*it*a': {'itana', 'itna', 'itnaa'},\n 'PT*ATP*ba*a': {'baadhaa',\n 'bada',\n 'badaaa',\n 'badaya',\n 'baddua',\n 'badha',\n 'badhaiya',\n 'badhaya',\n 'badhiya',\n 'badhya',\n 'badiya',\n 'badta',\n 'bata',\n 'bataa',\n 'bataya'},\n 'KNFSN*NSFNK*co*n': {'confession', 'conffesn', 'confusion'},\n 'TJ*AJT*tu*e': {'tuje', 'tujhe'},\n 'SN*NJS*si*n': {'sign'},\n 'AP*P*up*p': {'up'},\n 'K*AK*ke*e': {'ke', 'kee'},\n 'RS*SR*rs*s': {'rs', 'rss'},\n 'MR*ARM*me*e': {'mere', 'meree'},\n 'AKNT*TNK*ac*t': {'account'},\n 'TLN*ANLT*da*e': {'daalne', 'dalane', 'dalne', 'dalwane'},\n 'PTNJJ*NTP*pa*,': {'padenge,'},\n 'TP*PT*ta*b': {'tab'},\n 'H*A*hi*i': {'hi', 'hii', 'hiii'},\n 'RXRJ*AKRSR*re*e': {'recharge'},\n 'PK*AJP*pa*a': {'paaega', 'paayega', 'paega', 'payega'},\n 'MS*SM*mi*s': {'mis', 'miss'},\n 'A*A*u*u': {'u'},\n 'MST*TSM*ms*d': {'msd'},\n 'ATT*TT*aa*t': {'aadat'},\n 'K*AK*ga*i': {'gai', 'gaii', 'gayi', 'gayii'},\n 'APK*AKP*aa*o': {'aaapko', 'aapako', 'aapko'},\n 'KPTN*NTPK*ca*n': {'capetown', 'captain'},\n 'TKT*A0KT*de*e': {'dekhte'},\n 'H*A*hu*e': {'hue', 'huee', 'huwe', 'huye'},\n 'NT*ATN*no*e': {'note'},\n 'APLKXN*NTSLP*ap*n': {'application'},\n 'KKL*ALKK*go*e': {'google'},\n 'PLSTR*ARTSLP*pl*e': {'playstore'},\n 'TNLT*TLNT*do*d': {'donald', 'download'},\n 'HJ*AK*ho*i': {'hogi', 'hogyi'},\n '**,*,': {','},\n 'FK*AKF*fa*e': {'fake'},\n 'SKT*ATKS*sa*i': {'sakti'},\n 'PS*SP*ba*s': {'baas', 'bas', 'bass', 'baus'},\n 'KP*APK*ka*i': {'kabhi', 'kabi'},\n 'KMNT*TNMK*gh*d': {'ghamand'},\n 'K*AK*ki*a': {'kia', 'kiya', 'kiyaa', 'kiyaaa'},\n 'PX*ASP*bh*a': {'bhaasha', 'bhasha'},\n 'SK*AKS*si*a': {'sikha', 'sikhaya', 'sikka'},\n 'XNL*LNK*ch*l': {'chainal', 'chanel', 'channel'},\n 'PR*RP*pa*r': {'paar', 'pair', 'par', 'parr', 'pawar', 'payar'},\n 'KP*PK*ka*b': {'kab', 'kabb'},\n 'AT*AT*aa*a': {'aadha', 'aata', 'aatta'},\n 'PT*TP*ba*,': {'badho,', 'bata,', 'batao,'},\n 'SR*ARS*za*a': {'zaara', 'zara', 'zaraa', 'zariya'},\n 'P*AP*bh*i': {'bhaaaaiiiiii',\n 'bhaaaiiiii',\n 'bhaai',\n 'bhai',\n 'bhaii',\n 'bhaiiii',\n 'bhaui',\n 'bhi',\n 'bhii',\n 'bhoi'},\n 'TK*AKT*de*e': {'deke', 'dekhe', 'dekhiye', 'dekhye'},\n 'AKR*RK*ag*r': {'agar', 'ager', 'agr'},\n 'AS*AS*ay*i': {'ayesi'},\n 'H*A*he*e': {'he', 'hee', 'heeee'},\n 'PT*TP*ba*t': {'baaaat', 'baaat', 'baat', 'bat', 'baut'},\n 'PKL*ALSP*pe*y': {'peechley'},\n 'SL*ALS*sa*o': {'saalo', 'salo'},\n 'PJP*PP*bj*p': {'bjp'},\n 'AL*AL*wa*y': {'waaley', 'walay', 'waley', 'waly'},\n 'KNKRS*SRNK*co*s': {'congres', 'congress'},\n 'KLF*FLK*kh*f': {'khelaaf', 'khilaaf', 'khilaf'},\n 'APN*ANP*ap*a': {'apana', 'apanaa', 'apna', 'apnaya'},\n 'PRTX*ASTRP*pr*i': {'prattyashi'},\n 'K*AK*ky*u': {'kyu'},\n 'KR*ARK*kh*a': {'khara', 'kheera'},\n 'KRT*ATRK*ka*y': {'karrtey', 'kartay', 'kartey'},\n '0N*NT*th*n': {'than', 'thein', 'then', 'thin', 'thon'},\n 'TL*ALT*de*i': {'dehli', 'delhi', 'deli'},\n 'FTNSP*APSNTF*vi*a': {'vidhansabha'},\n 'ALKXN*NTSL*el*n': {'election'},\n 'K*AK*ky*a': {'kya', 'kyaa', 'kyaaa'},\n 'HK*AK*ho*a': {'hoga', 'hogaya'},\n 'AS*AS*ai*a': {'aisa'},\n 'ANN*ANN*un*e': {'unhone'},\n 'PL*ALP*pe*e': {'peele', 'pehle', 'pelee'},\n 'PK*SP*pi*c': {'pic'},\n 'N*AN*ni*i': {'ni', 'nii'},\n 'TK*AKT*dk*i': {'dkhi'},\n 'T*AT*ti*i': {'ti'},\n 'TMR*ARMT*tm*i': {'tmhari'},\n 'MLN*ANLM*mi*e': {'milane', 'milne'},\n 'K*K*k*k': {'k'},\n 'PT*TP*ba*d': {'baaad', 'baad', 'bad'},\n 'AKNR*ARNJ*ig*e': {'ignore'},\n 'TM*AMT*tm*e': {'tmhe'},\n 'M*AM*ma*i': {'mai', 'maui'},\n 'ATN*ANT*it*e': {'itane', 'itanee', 'itne', 'itnee'},\n 'KMS*SMK*ga*s': {'games'},\n 'P*P*b*b': {'b'},\n 'NNJ*ANN*ni*a': {'ninja'},\n 'TRTLS*SLTRT*tu*s': {'turtles'},\n 'STRT*TRTS*st*t': {'start', 'stewart', 'street', 'stuart'},\n 'FTR*R0JF*fi*r': {'fighter'},\n 'KR*RK*ca*r': {'caar', 'car', 'carr'},\n 'RS*AKR*ra*e': {'race'},\n 'MR*ARM*ma*o': {'maaro', 'mario', 'maro'},\n 'KL*LK*kh*l': {'khaal',\n 'khail',\n 'khal',\n 'khayaal',\n 'khayal',\n 'khel',\n 'khol',\n 'khool',\n 'khul',\n 'khyaal'},\n 'KX*SK*kh*h': {'khich', 'khush'},\n 'PK*AKP*ba*i': {'baaaki',\n 'baachhi',\n 'baakhi',\n 'baaki',\n 'bachai',\n 'bachhi',\n 'bakhi',\n 'baki',\n 'bakiii',\n 'baqi'},\n 'TN*NT*dh*n': {'dhan',\n 'dhawan',\n 'dhayan',\n 'dhayn',\n 'dhiyaan',\n 'dhuaan',\n 'dhuaon',\n 'dhyaan',\n 'dhyan'},\n 'TN*ANT*de*e': {'deewane', 'dene', 'dennie'},\n 'TM*MT*ty*m': {'tym'},\n 'KSK*AKSK*ki*e': {'kisake', 'kisike', 'kiske'},\n 'PS*SP*pa*s': {'paas', 'pas', 'pass', 'paws'},\n 'AS*S*is*s': {'is', 'iss'},\n 'RXTT*T0SR*ri*d': {'rishtewaad'},\n 'AST*ATS*az*i': {'azaadi', 'azadi'},\n 'APS*ASP*aa*e': {'aapse'},\n 'XT*A0K*ch*e': {'chahte'},\n 'HPLSSS*SSSLP*ha*z': {'haiplzzzzzz'},\n 'TNT*TNT*dn*t': {'dnt'},\n 'PRK*KRP*br*k': {'break', 'brk', 'brook'},\n '0R*RT*th*r': {'their', 'thor', 'thr', 'thur'},\n 'HRTS*STR*he*s': {'hearts'},\n 'SR*RS*si*r': {'sir'},\n 'PLSS*SSLP*pl*z': {'plzzz', 'plzzzz'},\n 'F*HF*vo*h': {'voh'},\n 'PL*ALP*bo*a': {'bola', 'boleya'},\n 'JSN*ANSJ*ji*e': {'jisne'},\n 'KS*ASK*ki*i': {'kisi', 'kisii', 'kissi'},\n 'PRT*ATRP*pa*y': {'parody', 'party'},\n 'SPRT*TRPS*su*t': {'suparhit', 'superhit', 'support'},\n 'KRN*ANRK*ka*e': {'karaane',\n 'karane',\n 'karene',\n 'karine',\n 'karne',\n 'karonye',\n 'karwaane',\n 'karwane'},\n 'MN*ANM*ma*a': {'maana', 'mana', 'mania', 'manna', 'manwa'},\n 'KRT*ATRK*ka*a': {'karadiya', 'karata', 'kardiya', 'karta', 'karwata'},\n 'FLP*PLF*fl*p': {'flap', 'flip', 'flop'},\n 'ANJ*AJN*an*i': {'annaji'},\n 'KN*ANK*ko*e': {'kone'},\n 'AL*AL*wa*e': {'waale', 'wale', 'walee', 'walle'},\n 'PLT*TLP*pl*t': {'plot'},\n 'JK*HKJ*ja*h': {'jagah'},\n 'ST*ATS*zy*a': {'zyaada', 'zyaadaa', 'zyada'},\n 'HT*AT*ho*i': {'hoti'},\n 'SL*ALS*sa*a': {'saaala', 'saala', 'sala', 'salaaa', 'salla'},\n 'KM*AMK*ga*e': {'game'},\n 'PR*ARP*bh*e': {'bhare'},\n 'RT*A0R*re*e': {'rehte'},\n '0*AT*th*e': {'the', 'thee', 'thhe'},\n 'ASM*AMS*is*e': {'isme'},\n 'TKPS*SPKT*dh*z': {'dhokebaaz'},\n '**>_*<': {'>_<'},\n 'X*AK*ch*a': {'cha', 'chaa', 'chaiyya', 'chiya', 'chya'},\n 'ST*ATS*se*a': {'seedha', 'seeta'},\n 'NSR*RSN*na*r': {'nasr', 'nazar'},\n 'AN*AN*aa*e': {'aaine', 'aane'},\n 'LK*AKL*la*a': {'laga', 'lagaa', 'lagaya', 'lagwa', 'laoga'},\n 'KSP*PSK*ka*b': {'kasab'},\n 'TRMP*PMRT*tr*p': {'trump'},\n 'TK*KT*ta*k': {'taak', 'tak'},\n 'TM*AMT*ti*e': {'time'},\n 'SN*ANS*su*a': {'suna', 'sunha', 'sunna'},\n 'AK*AK*ac*a': {'achha'},\n 'SMJ*AMS*sa*a': {'samja', 'samjha', 'samjhaa', 'samjhaya'},\n 'KP*PK*ka*,': {'kabhi,'},\n 'SR*ARS*sa*e': {'saaare', 'saare', 'sare', 'saree', 'sayre'},\n 'XSN*NSK*ch*n': {'cheezein',\n 'cheezen',\n 'cheezon',\n 'chizein',\n 'chizen',\n 'chosen'},\n 'TL*LT*de*l': {'deal', 'del', 'dell', 'deol'},\n 'XT*TS*sh*d': {'shayad',\n 'shayd',\n 'shed',\n 'shoud',\n 'showed',\n 'shud',\n 'shudd',\n 'shyad'},\n 'LF*AFL*li*e': {'life', 'live'},\n 'SKSSFL*LFSKS*su*l': {'succesful', 'succesfull', 'successfull'},\n 'HT*AT*ho*a': {'hota'},\n '0RK*KRT*th*k': {'tharak'},\n 'SL*ALS*sa*e': {'saale', 'sale'},\n 'HR*R*ha*r': {'haar', 'haaur', 'haiaur', 'hair', 'har', 'harr'},\n 'LTK*AKTL*la*i': {'ladki', 'latki'},\n 'MNXN*NTNM*me*n': {'mention'},\n 'AJ*J*aa*j': {'aaaj', 'aaj', 'aawaj'},\n 'MX*KTM*ma*h': {'match'},\n 'N*AN*na*a': {'na', 'naa', 'naaa', 'naiya', 'naiyya', 'naya', 'nayyiaa'},\n 'KL*LK*ka*l': {'kaal', 'kael', 'kal'},\n 'LTK*AKTL*la*e': {'ladakee', 'ladke', 'latke'},\n 'AN*AN*in*e': {'inhe', 'inne'},\n 'SNT*ATNS*se*i': {'sendai', 'senti'},\n 'KRN*ANRK*kr*e': {'krane', 'krenhe', 'krne'},\n 'KRT*ATRK*kr*e': {'krde', 'krtae', 'krte', 'krwate'},\n '**:(*(': {':('},\n 'TKST*TKST*te*t': {'text'},\n 'KR*ARK*ka*o': {'kaaro',\n 'karao',\n 'kariyo',\n 'karo',\n 'karoo',\n 'karooooo',\n 'karwao'},\n 'FR*RP*ph*r': {'pher', 'phir', 'phor', 'phr'},\n 'TKT*A0KT*de*y': {'dekhtay'},\n 'TT*ATT*di*i': {'didi'},\n 'AP*AP*ab*i': {'abhi', 'abhiii', 'abi'},\n 'APN*ANP*ap*i': {'apani', 'apni'},\n 'AP*AP*ab*e': {'abbe', 'abbie', 'abe'},\n 'KNKRSNN*NSRNK*co*,': {'congressiyon,'},\n 'NJRNS*SNRJN*ni*s': {'nigerians'},\n 'HT*AT*ho*e': {'hoodie', 'hootie', 'hote'},\n 'PNTR*RTNP*ba*r': {'bandar'},\n 'KN*NK*ky*n': {'kyon', 'kyoon', 'kyoun', 'kyun'},\n 'PR*ARP*bu*a': {'bura', 'buraiya'},\n 'MN*NM*ma*n': {'maan', 'maiin', 'main', 'man', 'mann', 'maun', 'mayan'},\n 'K*AK*ga*e': {'gaaye', 'gae', 'gaye', 'gayee'},\n 'KTN*ANTK*ki*e': {'kitane', 'kitne'},\n 'TK*KT*du*h': {'dukh'},\n 'TFRS*AKRFT*di*e': {'divorce'},\n 'ASM*AMS*us*e': {'usme', 'usshme'},\n 'K*AK*ko*i': {'koi', 'koii', 'koyi'},\n 'MTFT*ATFTM*ma*e': {'matvate'},\n 'RNTK*STNJRR*/r*c': {'/rajnetic'},\n 'HN*AN*ho*a': {'hona', 'honna'},\n 'XH*AHK*ch*e': {'chaahiye', 'chahe', 'chahie', 'chahiye', 'chahye', 'chuhe'},\n 'MHL*ALHM*ma*o': {'mahilao', 'mahilayo'},\n 'XRF*PRS*sh*h': {'sharaph'},\n 'SMN*NMS*sa*n': {'saamaan', 'samaan', 'saman', 'samman'},\n 'TRPL*ALPRT*tr*e': {'triple', 'trouble'},\n 'TLK*KLT*ta*q': {'talaq', 'taluq'},\n 'RK*AKR*ro*e': {'roke'},\n 'KRJ*AKRK*ka*e': {'karaoge', 'karge', 'karoge', 'karwaoge'},\n 'S0*TS*sa*h': {'saath', 'sath'},\n 'MN*ANM*ma*e': {'maane',\n 'maine',\n 'mainne',\n 'manaye',\n 'mane',\n 'manne',\n 'mayne'},\n 'TM*AMT*tu*e': {'tumhe'},\n 'ALT*ATL*ul*a': {'ulta'},\n 'HT*T*ha*h': {'hadh'},\n 'AS*AS*wi*e': {'wise'},\n 'T*AT*tu*u': {'tu'},\n 'PL*ALP*bo*i': {'boli'},\n 'N*AN*na*i': {'naayi', 'nai', 'nayi'},\n 'LNXR*RKNL*lu*r': {'lunchar', 'luncher'},\n 'PJ*AJP*pa*i': {'paaji', 'paegi', 'paji', 'payegi'},\n 'X*AK*ch*i': {'chaai', 'chaayi', 'chai', 'chaiwai', 'chi', 'chiii'},\n 'PSKT*TKSP*bi*t': {'biscuit', 'biskoot', 'bizkit'},\n 'K*AK*kh*a': {'kha', 'khaa', 'khaya', 'khoya'},\n '0K*KT*th*k': {'thaak', 'thak', 'thakk', 'theek', 'thik', 'thk', 'thok'},\n 'H**ha*,': {'haaye,', 'hai,', 'hai,,'},\n 'AS*AS*ai*e': {'aise'},\n 'TXRT*TRST*ts*t': {'tshirt'},\n 'KS*ASK*ka*e': {'kaise', 'kaisee', 'kassie'},\n 'TL*LT*da*l': {'daal', 'dal', 'dayal'},\n 'SKT*ATKS*sa*a': {'sakta'},\n 'TTR*RTT*tw*r': {'twiter', 'twitter', 'twtr'},\n 'ALN*NL*wa*n': {'waalon', 'walloon', 'walon', 'waylon'},\n 'KXNSP*APSNSK*kh*i': {'khushnaseebi'},\n 'HP*AP*ha*y': {'happy', 'happyyy'},\n 'PR0T*ATTRP*bi*y': {'birthday', 'birthdayyyy'},\n 'TN*ANT*dh*i': {'dhoni', 'dhunayi'},\n '**&*&': {'&'},\n 'HL*AL*he*o': {'hello', 'helo'},\n 'SKX*ASKS*sa*i': {'saakshi', 'sakshi'},\n 'PP*APP*bh*i': {'bhabhi', 'bhabi', 'bhaibhai'},\n 'KRT*TRK*kh*t': {'khairaat', 'khairiyat'},\n 'LLL*LL*lo*,': {'lol,'},\n 'FNK*NP*ph*k': {'phenk', 'phoonk'},\n 'TT*ATT*de*a': {'deta'},\n 'PXR*ARSP*be*e': {'bechare'},\n 'KT*TK*ka*t': {'kaat', 'kat', 'katt'},\n 'JT*ATJ*ja*a': {'jaada',\n 'jaaeta',\n 'jaata',\n 'jaataa',\n 'jada',\n 'jaida',\n 'jata',\n 'jataa',\n 'jataya',\n 'jayada',\n 'jayda'},\n 'PLL*LP*bi*,': {'bill,'},\n 'ASL*ALS*is*e': {'iseeliye', 'isiliye'},\n 'LT*ATL*la*a': {'ladaa', 'ladta', 'lata', 'lataa', 'lauta'},\n 'T*AT*di*a': {'dia', 'diya', 'diyaa'},\n 'XP*PK*ch*p': {'chaap', 'chap', 'cheap', 'chep', 'chip', 'chup', 'chupp'},\n 'TT*TT*di*d': {'did', 'died'},\n 'ARN*NR*ar*n': {'aran', 'aren', 'arun'},\n 'LL*LL*la*l': {'laal', 'lal'},\n 'S*S*sa*,': {'sa,', 'say,'},\n 'PNKLTX*ASTLNP*ba*i': {'bangladeshi'},\n 'PLR*RLP*pl*r': {'player'},\n 'KX*ASK*kh*i': {'khushi'},\n 'APN*ANP*ap*e': {'apne'},\n 'ANTKX*SKTN*an*h': {'andkosh'},\n 'TKR*ARKT*ta*a': {'takra'},\n 'XL*LK*ch*l': {'chaal', 'chal', 'chaul', 'chawal', 'chill'},\n 'AXNK*KNK*ac*k': {'achaanak', 'achanak'},\n 'SN*ANS*so*y': {'sonny', 'sony'},\n 'TF*FT*tv*v': {'tv'},\n 'TLL*ALLT*di*e': {'dilwale'},\n 'MF*AFM*mo*e': {'moive', 'moovee', 'move', 'movie', 'moviee'},\n 'TLKST*TSSLT*te*t': {'telecast'},\n 'FL*ALF*va*i': {'vaalii', 'vali'},\n 'RT*TR*ra*t': {'raat', 'raitt', 'rat', 'rawat'},\n 'A*A*wh*y': {'why'},\n 'HMX*ASM*ha*a': {'hamesha', 'hameshaa'},\n 'SN*ANS*su*y': {'sunny'},\n 'LN*ANL*le*e': {'leanne', 'lene', 'leone'},\n 'TK*AKT*di*u': {'dikhau'},\n 'PT*ATP*pa*a': {'pada',\n 'padha',\n 'padhaa',\n 'padhaya',\n 'padta',\n 'padwa',\n 'paida',\n 'pata',\n 'pataa',\n 'patta'},\n 'STTNTS*STNTTS*st*s': {'students'},\n 'MHNJ*AKNHM*me*e': {'mehenge'},\n 'TN*NT*di*n': {'dian', 'din', 'dion'},\n 'AK*AK*ag*e': {'agaye', 'agye'},\n 'SX*AKS*so*a': {'socha'},\n 'ANK*KN*aa*h': {'aankh'},\n 'PNT*TNP*ba*d': {'band', 'banned'},\n 'XRT*ATRS*sh*i': {'shruti'},\n 'HSN*NS*ha*n': {'hasan', 'haseen', 'hassan'},\n 'AT*AT*aa*i': {'aadhi', 'aati'},\n 'KT*TK*gu*d': {'gud'},\n 'MRNK*NRM*mr*g': {'mrng'},\n 'MHXFRTR*ARTRFSHM*ma*i': {'mahashivaratri', 'mahashivratri'},\n 'HRTK*KTR*ha*k': {'hardik'},\n 'MNKLKMN*ANMKLKNM*ma*e': {'mangalkamnaye'},\n 'SR*RS*so*,': {'sorry,'},\n 'MJ*AJM*mu*e': {'muje', 'mujhe'},\n 'PJ*AJP*ba*e': {'bajaaye', 'bajawe', 'bajaye', 'baje', 'bajee', 'bajhe'},\n 'FN*ANP*ph*e': {'phone'},\n 'ALPPR*RPPL*wa*r': {'wallpaper'},\n 'SNTJ*AKTNS*zi*i': {'zindagi'},\n 'RH*AHR*ra*i': {'raahi', 'rahi', 'rahii'},\n 'PTL*ALTP*ba*i': {'baatli', 'badhali'},\n 'TMPRR*ARRPMT*te*y': {'temporary'},\n 'PN*NP*ba*n': {'baan', 'baayen', 'ban', 'bann', 'bayaan', 'bayan'},\n 'LK*KL*la*g': {'laag', 'lag', 'lagg'},\n 'XK*AKK*ch*a': {'chacha', 'chakka', 'chauka', 'chokha', 'chuka', 'chukaya'},\n 'AK*AK*ac*i': {'achhai', 'achhhi', 'achhi'},\n 'NHH*HN*na*,': {'nahi,'},\n 'M0N*NTM*mi*n': {'mithun', 'mitthaiyan'},\n 'KL*ALK*kh*o': {'khayaalo', 'khelo', 'khilao', 'kholo'},\n 'AMK*KM*om*g': {'omg'},\n 'N*AN*ne*e': {'ne', 'nee'},\n 'RT*TR*rt*t': {'rt'},\n 'SPR*ARPS*sp*e': {'spare', 'spree'},\n '0NKS*SKNT*th*s': {'thanks', 'thinks', 'thnks'},\n 'KX*ASKK**k*i': {'*khushi'},\n 'AMT*TM*am*t': {'amit'},\n 'MHMT*TMHM*mu*d': {'muhammad'},\n 'AL*AL*al*i': {'ali'},\n 'PKL*ALKP*pa*i': {'pagli'},\n 'PLS*ASLP*pl*e': {'please'},\n 'FL*ALF*fo*w': {'follow', 'folow'},\n 'FR*ARF*fr*e': {'free'},\n 'TR*ART*da*u': {'daaru', 'daru', 'daruu'},\n 'PLT*ATLP*pi*e': {'pilaate'},\n 'HS*AS*hi*a': {'hisa', 'hissa'},\n 'AL*AL*wa*a': {'waala', 'wala', 'walaaaaa', 'walla'},\n 'TKLF*FLKT*ta*f': {'takleef', 'taklif', 'taqleef'},\n 'TK*AJT*de*a': {'dega', 'degaaa'},\n 'A*A*wo*o': {'wo'},\n 'KS*ASK*ki*a': {'kisa', 'kissa'},\n 'HMR*ARM*ha*i': {'hamaari', 'hamari'},\n 'KXT*A0SK*ka*i': {'kashti'},\n 'AHN*NH*wa*n': {'wahaan', 'wahan', 'wahin'},\n 'TP*APT*do*i': {'doobi'},\n 'JHN*NHJ*ja*n': {'jahaan', 'jahan'},\n 'PN*ANP*pa*i': {'paaani', 'paani', 'pani'},\n 'H*A*hu*u': {'hu', 'huuuu'},\n 'MR*ARM*me*a': {'mehra', 'meira', 'mera'},\n 'MTF*AFTM*mo*e': {'motive'},\n 'AR*R*er*r': {'er', 'err'},\n 'L*AL*le*e': {'le', 'lee', 'leye'},\n 'AK*K*ek*k': {'ek'},\n 'KR*ARK*gu*u': {'guru'},\n 'J*AJ*ji*i': {'ji', 'jii'},\n 'PXN*ANXP*pe*e': {'pehchaane', 'pehchane', 'pehchanne'},\n 'ANKR*RKN*in*r': {'inkaar', 'inkar'},\n 'LKN*NKL*lo*n': {'logan', 'logon', 'loogoun'},\n 'PKSTN*ANTSKP*pa*i': {'pakistani'},\n 'FS*ASF*vi*a': {'visa'},\n 'PSPRT*TRPSP*pa*t': {'passport'},\n 'MFT*TFM*mu*t': {'muft'},\n 'J*AJ*ja*e': {'jaae',\n 'jaaiye',\n 'jaaye',\n 'jaayiye',\n 'jae',\n 'jaie',\n 'jaiye',\n 'jaye'},\n 'PRNM*MNRP*pr*m': {'pranaam', 'pranam'},\n 'ARR*R*ya*,': {'yaar,'},\n 'T*AT*do*o': {'do', 'doo'},\n 'A*H*ye*h': {'yeah', 'yeh'},\n 'J*AJ*ja*a': {'ja', 'jaa', 'jaya'},\n '**_/*_': {'_/\\\\_'},\n 'RLJ*AKLR*ru*e': {'rulaoge'},\n 'ANT*ATN*in*a': {'india'},\n 'SR*ARS*sr*i': {'sri'},\n 'LNK*AKNL*la*a': {'laaunga', 'lanka', 'launga'},\n 'PL*ALP*bu*o': {'bulao'},\n 'FT*ATF*vo*e': {'vote'},\n 'RFS*SFR*re*s': {'reviews'},\n 'SRRT*TRRS*za*t': {'zaroorat', 'zarorat', 'zarurat'},\n 'KF*AFK*ka*i': {'kaafi', 'kafi', 'kaufii', 'kavi'},\n 'PRTSHN*NHSTRP*pr*n': {'protsahan'},\n 'MLT*ATLM*mi*a': {'milata', 'milta'},\n 'AN*N*in*n': {'in', 'inn'},\n 'PTN*NTP*ba*n': {'baatein',\n 'baaten',\n 'baaton',\n 'badhaiyaan',\n 'badhaiyan',\n 'badhen',\n 'badhun',\n 'badon',\n 'bataayein',\n 'bataen',\n 'batan',\n 'batayen',\n 'batein',\n 'baten',\n 'baton',\n 'batten',\n 'battiyan'},\n 'ATM*AMT*aa*i': {'aadami', 'aadamii', 'aadmi'},\n 'MLN*ANLM*mi*a': {'milania', 'milena', 'milna'},\n 'PHT*THP*bo*t': {'bohat', 'bohoot', 'bohot', 'bohut'},\n 'SLMNNN*NNNMLS*sa*n': {'salmannnnn', 'salmannnnnn'},\n 'A*A*aa*a': {'aa', 'aaa', 'aaaa', 'aaaaa', 'aaya', 'aayaa', 'aaye,aa'},\n 'ANTSR*RSTN*in*r': {'intazaar',\n 'intazar',\n 'intezaar',\n 'intezar',\n 'intzaar',\n 'intzar'},\n 'ATR*RT*id*r': {'idhar'},\n 'TR*ART*te*e': {'tere', 'terre'},\n 'AR*AR*ar*y': {'aray', 'arey', 'array', 'arrey', 'arry', 'ary'},\n 'AP*P*aa*p': {'aap'},\n 'TNK*NT*dh*g': {'dhang'},\n 'HHH*AHH*ha*a': {'hahaha',\n 'hahahaa',\n 'hahahaaa',\n 'hahahaaaa',\n 'hahahahha',\n 'hai_hahaha'},\n 'XF*AFS*sh*e': {'shave', 'shivaye'},\n 'LN*ANL*le*a': {'leeanna', 'leena', 'lena'},\n 'TR*ART*te*a': {'tera', 'teraa', 'terra'},\n 'SHR*ARHS*sa*a': {'sahaara', 'sahara'},\n 'HH*AH*ha*a': {'haaha',\n 'haahaa',\n 'haha',\n 'hahaa',\n 'hahahha',\n 'hahahhaa',\n 'hahahhahha'},\n 'TT*ATT*da*e': {'daate', 'date', 'daudte'},\n 'AT*T*ya*d': {'yaad', 'yad'},\n 'TLJ*AKLT*di*e': {'dialouge', 'dilaoge'},\n 'KS*ASK*kh*i': {'khaasi', 'khusi'},\n 'H*A*hu*i': {'huawei', 'hui', 'huiii', 'huwi', 'huyi'},\n 'TMR*ARMT*tu*a': {'tumara', 'tumhaaara', 'tumhaara', 'tumhara', 'tumhra'},\n 'JNMTN*NTMNJ*ja*n': {'janamdin', 'janmdin'},\n 'AS*AS*us*e': {'use', 'usse', 'ussee'},\n 'KT*ATK*ga*e': {'gaddhe', 'gadhe', 'gate'},\n 'PHR*RHP*ba*r': {'baahar', 'bahaar', 'bahar', 'baher'},\n 'ATRN*ANRT*ut*a': {'utarna'},\n 'KMPLSR*ARSLPMK*co*y': {'compulsory'},\n 'KR0*ATRK*ke*i': {'keerthi'},\n 'LTS*STL*le*s': {'leads', 'leeds', 'lets'},\n 'APN*NP*op*n': {'opan', 'open', 'oppn'},\n 'NK*AKN*ng*o': {'ngo'},\n 'TK0R*RTKT*to*r': {'together'},\n 'TT*ATT*to*e': {'toadie', 'tode', 'todte', 'toote'},\n 'TL*LT*di*l': {'dial', 'dil', 'dill'},\n 'MRSN*NSRM*ma*n': {'marezon'},\n 'L*AL*li*a': {'lia', 'liya', 'liyaa', 'liyea'},\n 'KP*APK*kb*i': {'kbhi', 'kbi'},\n 'XN*ANK*ch*a': {'chana', 'chanaa', 'china', 'chuna'},\n 'PX*ASP*pe*e': {'peche', 'peeche', 'peeeche', 'peshwe'},\n 'N*AN*nh*i': {'nhai', 'nhi', 'nhii'},\n 'KR*RK*kr*r': {'kr', 'krr'},\n '0T*TT*th*t': {'thaat', 'that'},\n 'SPN*NPS*sp*n': {'spain', 'spann', 'spin', 'spoon'},\n 'RF*AFR*ra*i': {'raffi', 'rafi', 'ravi'},\n 'AXN*NS*as*n': {'ashwin'},\n 'AFRK*ASRF*af*a': {'africa'},\n 'ARLT*TLR*wo*d': {'world'},\n 'KPS*SPK*cu*s': {'cubs', 'cups'},\n 'M*AM*me*i': {'mei'},\n 'T*T*t*t': {'t'},\n 'F*F*v*v': {'v'},\n 'KT*TK*gu*:': {'guddu:'},\n 'MM*AMM*mu*y': {'mummy'},\n 'KN*ANK*kh*e': {'khaane', 'khane', 'khanye'},\n 'XLK*KLK*ch*k': {'chalaak', 'chalk'},\n 'XRN*ANRK*ch*e': {'churane'},\n 'MS*ASM*ma*a': {'maaza', 'mausa', 'maza', 'mazaa', 'mazya', 'mazza'},\n 'PLKLL*LKLP*bi*,': {'bilkul,'},\n 'SKT*ATKS*sa*e': {'sakate', 'sakte'},\n 'SPK*AKPS*sa*o': {'sabhiko', 'sabko'},\n 'PK*AKP*ba*a': {'bacha', 'bachaa', 'bachha', 'baga', 'bakaya'},\n 'KHHRF*FRSSHK*ka*f': {'kaho,sirf'},\n 'AL*AL*il*a': {'ilawa', 'ilha'},\n 'AK*K*ok*k': {'ok', 'okk'},\n 'ML*ALM*mi*i': {'mili'},\n '**\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sentencetrr
0congratulations on you celebrating british kid...congratulations on you celebrating british kid...
1uske liye toh bahat kuch karwana padega yee pa...you will have to do a lot for that, it will no...
2yehi tooo hum semjhane ki koshish karr rahee h...this is what we are trying to understand. peop...
3cake kaha haiwhere is the cake??
4jett kauwa jashn aur shubah ki shroowat eating...victory celebration and early morning eating b...
.........
1299muse vo bohut pasand haii like it a lot
1300muje psnd haii like it a lot
1301muje maarkit see vegtables lekar aaine hawoni have to bring vegetables from the market
1302sabse best frndmost best friend
1303best frnd sabsemost best friend
\n

183535 rows × 2 columns

\n"},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:54.284580Z","iopub.execute_input":"2023-03-16T11:45:54.285329Z","iopub.status.idle":"2023-03-16T11:45:54.321449Z","shell.execute_reply.started":"2023-03-16T11:45:54.285289Z","shell.execute_reply":"2023-03-16T11:45:54.320417Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"df['len_sen']=df['Sentence'].apply(lambda x: len(x.split(' ')))\ndf=df[df['len_sen']<=20]","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:54.323403Z","iopub.execute_input":"2023-03-16T11:45:54.323788Z","iopub.status.idle":"2023-03-16T11:45:54.763578Z","shell.execute_reply.started":"2023-03-16T11:45:54.323750Z","shell.execute_reply":"2023-03-16T11:45:54.762523Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"inp=list(df['Sentence'])\ntarg=list(df['trr'])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:54.765394Z","iopub.execute_input":"2023-03-16T11:45:54.765774Z","iopub.status.idle":"2023-03-16T11:45:54.833711Z","shell.execute_reply.started":"2023-03-16T11:45:54.765737Z","shell.execute_reply":"2023-03-16T11:45:54.832417Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"# printing last english sentence\nprint(targ[-1])","metadata":{"id":"lH_dPY8TRp3c","outputId":"a4486ac6-bd0f-4b6f-c685-5bdf1983e769","execution":{"iopub.status.busy":"2023-03-16T11:45:54.835218Z","iopub.execute_input":"2023-03-16T11:45:54.835825Z","iopub.status.idle":"2023-03-16T11:45:54.845721Z","shell.execute_reply.started":"2023-03-16T11:45:54.835785Z","shell.execute_reply":"2023-03-16T11:45:54.844518Z"},"trusted":true},"execution_count":26,"outputs":[{"name":"stdout","text":"remove all alarms and reminders\n","output_type":"stream"}]},{"cell_type":"code","source":"!nvidia-smi","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:54.847682Z","iopub.execute_input":"2023-03-16T11:45:54.848100Z","iopub.status.idle":"2023-03-16T11:45:55.842937Z","shell.execute_reply.started":"2023-03-16T11:45:54.848038Z","shell.execute_reply":"2023-03-16T11:45:55.841702Z"},"trusted":true},"execution_count":27,"outputs":[{"name":"stdout","text":"Thu Mar 16 11:45:55 2023 \n+-----------------------------------------------------------------------------+\n| NVIDIA-SMI 470.82.01 Driver Version: 470.82.01 CUDA Version: 11.4 |\n|-------------------------------+----------------------+----------------------+\n| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n| | | MIG M. |\n|===============================+======================+======================|\n| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n| N/A 35C P0 27W / 250W | 0MiB / 16280MiB | 0% Default |\n| | | N/A |\n+-------------------------------+----------------------+----------------------+\n \n+-----------------------------------------------------------------------------+\n| Processes: |\n| GPU GI CI PID Type Process name GPU Memory |\n| ID ID Usage |\n|=============================================================================|\n| No running processes found |\n+-----------------------------------------------------------------------------+\n","output_type":"stream"}]},{"cell_type":"code","source":"gpus = tf.config.experimental.list_physical_devices('GPU')\nfor gpu in gpus:\n print(\"Name:\", gpu.name, \" Type:\", gpu.device_type)","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:55.866108Z","iopub.execute_input":"2023-03-16T11:45:55.866509Z","iopub.status.idle":"2023-03-16T11:45:56.134202Z","shell.execute_reply.started":"2023-03-16T11:45:55.866449Z","shell.execute_reply":"2023-03-16T11:45:56.133058Z"},"trusted":true},"execution_count":28,"outputs":[{"name":"stdout","text":"Name: /physical_device:GPU:0 Type: GPU\n","output_type":"stream"}]},{"cell_type":"code","source":"from tensorflow.python.client import device_lib\n\ndevice_lib.list_local_devices()","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:56.135612Z","iopub.execute_input":"2023-03-16T11:45:56.136601Z","iopub.status.idle":"2023-03-16T11:45:58.548906Z","shell.execute_reply.started":"2023-03-16T11:45:56.136559Z","shell.execute_reply":"2023-03-16T11:45:58.547810Z"},"trusted":true},"execution_count":29,"outputs":[{"execution_count":29,"output_type":"execute_result","data":{"text/plain":"[name: \"/device:CPU:0\"\n device_type: \"CPU\"\n memory_limit: 268435456\n locality {\n }\n incarnation: 7469856977397221377\n xla_global_id: -1,\n name: \"/device:GPU:0\"\n device_type: \"GPU\"\n memory_limit: 16105865216\n locality {\n bus_id: 1\n links {\n }\n }\n incarnation: 16859620279429889827\n physical_device_desc: \"device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0\"\n xla_global_id: 416903419]"},"metadata":{}}]},{"cell_type":"code","source":"tf.test.is_gpu_available()","metadata":{"execution":{"iopub.status.busy":"2023-03-16T11:45:58.552088Z","iopub.execute_input":"2023-03-16T11:45:58.552542Z","iopub.status.idle":"2023-03-16T11:45:58.567023Z","shell.execute_reply.started":"2023-03-16T11:45:58.552496Z","shell.execute_reply":"2023-03-16T11:45:58.566034Z"},"trusted":true},"execution_count":30,"outputs":[{"execution_count":30,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}]},{"cell_type":"code","source":"# printing first 100 english sentence\nprint(targ[:100])","metadata":{"id":"WtYLR0ZWXDld","outputId":"afcfed6b-ee0f-4007-9b3d-9cafc3eac537","execution":{"iopub.status.busy":"2023-03-16T11:45:58.570720Z","iopub.execute_input":"2023-03-16T11:45:58.571137Z","iopub.status.idle":"2023-03-16T11:45:58.579466Z","shell.execute_reply.started":"2023-03-16T11:45:58.571108Z","shell.execute_reply":"2023-03-16T11:45:58.578494Z"},"trusted":true},"execution_count":31,"outputs":[{"name":"stdout","text":"['set a new alarm', 'set an alarm for 9 am', 'resume my timer', 'set a recurring reminder every morning at 9 : 00 to take my vitamins', 'any parties in philly tonight', 'when is my alarm set for on monday ?', 'set up an alarm to go to sleep tonight .', 'how long will it take me to get to syracuse downtown if i leave here in 10 minutes ?', 'modiji has hand in this.', 'should i buy emergency supplies at the store tonight', 'how long is my drive home', 'are there any accidents on the 33', 'set up a 30 minute meditation timer', 'why is the turnpike backed up', 'vancouver night market next weekend', 'is it going to be hot today', 'set up a nightly sleep reminder for 10 pm', 'events in new york', \"remind me that i didn ' t finish my tax return .\", 'open a message to peter asking him to call me after the meeting .', 'set an alert for me to go and pick up the catering order at 11 : 45 pm', 'can you please remind me to pay the utility bill on friday at 2 pm ?', 'didi.. agreed.. both of them were seen as nirma super.', 'cancel reminder 4 pm tomorrow .', 'set a timer to go off every hour', 'i want to listen to classical music for 1 hour', 'set up a reminder to get my passport application started by the end of the week', 'play me some jazz', 'scrapbooking parties in atlanta next weekend', 'can you set a reminder for my eye appointment on august 15th at 2 pm ?', 'concerts by new edition', 'please set the alarm at 6 am for tomorrow .', 'play some rock music from the 60s .', 'run this timer twice', 'how far from cocoa beach to port canaveral', 'set an egg timer for 5 minutes ?', \"what ' s the average temp in virginia this time of year\", \"set up a recurring reminder about little matt ' s football practice monday thru friday at 5 : 30 pm\", 'should i bring a jacket with me ?', 'can i tour the princeton art museum today and park in front of the university ?', 'delete my daily 3 pm and 4 pm alarms', 'cancel tomorrows alarm .', 'how much of a delay for my trip home today', 'skip the next track', 'set timer for 20 minutes from now', 'how is the weather on saturday', 'is 75 faster than 45 right now', 'can i get from here to dover by 9 pm tonight ?', 'message tim asking if he has found my wallet at his house', 'set timer for the laundry', 'please create a weekly alarm for thursday at 5 pm', 'what temp will is be in washington dc this week', 'remind me to be at the meeting at 6 pm today', 'prateek johri dialogue is not in your control. ,', 'suspend my alarm for the weekend', 'restart all timers', 'text mom and tell her ill be at the show next thursday', 'any free xmas events i can go to this weekend', 'set the snooze setting on the alarm clock to 9 minutes .', 'set an alarm daily at 6 am for the following 2 weeks', 'is there a scenic backroad drive from jamesville to highlands , nc ?', 'turn off my saturday alarms .', 'start timer for 5 hours', 'replay this track', 'delete all reminders about the wedding .', 'i wanna text robert', 'how long will it take me to get to utsa by bicycle from the rim ?', 'cancel my reminder about the anniversary gift', 'what is the seven day forecast ?', 'when will the sun rise today ?', 'do i need a jacket today ?', 'change order of songs', 'remind me to make a salad dish for my social dinner tomrorow at 12 pm .', 'text steve , how are you doing ?', 'could you wake us up', 'set alarm for every hour', 'how cold is it for paris this weekend in degree c', 'what is the temperature today ? i am going to walk down to the casino .', 'set a reminder for a party tonight', 'remind me i have a dentist appointment on thursday', 'how is traffic from irvine to tustin', 'sound an alarm once timer is complete', 'restart timer', 'add another ten minutes to the timer .', 'cancel my alarm ( s ) for saturday', 'how hot will it be tomorrow afternoon ?', \"how long will it take me to get to john ' s house if i leave in 15 minutes ?\", 'please tell me what hours traffic is the heaviest in kansas city', 'set up an wake up alarm at midnight today .', 'looks like someone has made him an uncle in shares', 'start a new alarm for very tuesday at noon .', 'thumbs down the current song playing on pandora', 'what time is my doctor appointment reminder ?', 'look at my messages on whatsapp .', \"when will i arrive at bob ' s dinner party if i leave right now using the highway ?\", 'expressway is costlier or cheaper', 'set an alarm to wake up at 4 am .', 'how many miles are we from salt lake city', 'let me see meetup reminders for the history group .', 'what is the temperature today']\n","output_type":"stream"}]},{"cell_type":"code","source":"# printing first 100 hinglish sentences\nprint(inp[-1:])","metadata":{"id":"WdC25G6AZXr9","outputId":"26bc2c3e-c413-4715-f08a-65b5ad1ba001","execution":{"iopub.status.busy":"2023-03-16T11:45:58.580765Z","iopub.execute_input":"2023-03-16T11:45:58.582330Z","iopub.status.idle":"2023-03-16T11:45:58.591359Z","shell.execute_reply.started":"2023-03-16T11:45:58.582288Z","shell.execute_reply":"2023-03-16T11:45:58.589997Z"},"trusted":true},"execution_count":32,"outputs":[{"name":"stdout","text":"['sabhi alarms aur remnders koo hatya deon']\n","output_type":"stream"}]},{"cell_type":"code","source":"# arrays of strings are shuffled and batches are made\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 64\n\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)","metadata":{"id":"TqHsArVZ3jFS","execution":{"iopub.status.busy":"2023-03-16T11:45:58.593109Z","iopub.execute_input":"2023-03-16T11:45:58.593511Z","iopub.status.idle":"2023-03-16T11:46:00.512669Z","shell.execute_reply.started":"2023-03-16T11:45:58.593475Z","shell.execute_reply":"2023-03-16T11:46:00.511527Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"for example_input_batch, example_target_batch in dataset.take(1):\n print(example_input_batch[:5])\n print()\n print(example_target_batch[:5])\n break","metadata":{"id":"qc6-NK1GtWQt","outputId":"4059f9ad-4d6e-4204-d40a-840261b82f90","execution":{"iopub.status.busy":"2023-03-16T11:46:00.515424Z","iopub.execute_input":"2023-03-16T11:46:00.516310Z","iopub.status.idle":"2023-03-16T11:46:00.828098Z","shell.execute_reply.started":"2023-03-16T11:46:00.516262Z","shell.execute_reply":"2023-03-16T11:46:00.826860Z"},"trusted":true},"execution_count":34,"outputs":[{"name":"stdout","text":"tf.Tensor(\n[b'taylor swift koo play karwayee' b'1 hour kee liye timer seat karein'\n b'kyaa aj koii live music hai'\n b'kyaa aap muje meree location kee liye traffic updates dee sakate heeee'\n b'9 a m kee liye alaram koo cancel karein'], shape=(5,), dtype=string)\n\ntf.Tensor(\n[b'play taylor swift' b'set the timer for 1 hour'\n b'any live music tonight'\n b'can you give me traffic updates for my location'\n b'cancel alarm for 9 a . m .'], shape=(5,), dtype=string)\n","output_type":"stream"}]},{"cell_type":"code","source":"# unicode normalization operation - first step to standardize data\nexample_text = tf.constant('Yahi haal hai')\n\nprint(example_text.numpy())\nprint(tf_text.normalize_utf8(example_text, 'NFKD').numpy())","metadata":{"id":"mD0e-DWGQ2Vo","outputId":"52e95336-e962-4e89-da30-956c44458493","execution":{"iopub.status.busy":"2023-03-16T11:46:00.829577Z","iopub.execute_input":"2023-03-16T11:46:00.859628Z","iopub.status.idle":"2023-03-16T11:46:00.870845Z","shell.execute_reply.started":"2023-03-16T11:46:00.859578Z","shell.execute_reply":"2023-03-16T11:46:00.869271Z"},"trusted":true},"execution_count":35,"outputs":[{"name":"stdout","text":"b'Yahi haal hai'\nb'Yahi haal hai'\n","output_type":"stream"}]},{"cell_type":"code","source":"def tf_lower_and_split_punct(text):\n # Split accecented characters.\n text = tf_text.normalize_utf8(text, 'NFKD')\n text = tf.strings.lower(text)\n # Keep space, a to z, and select punctuation.\n text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')\n # Add spaces around punctuation.\n text = tf.strings.regex_replace(text, '[.?!,¿]', r' \\0 ')\n # Strip whitespace.\n text = tf.strings.strip(text)\n\n text = tf.strings.join(['[START]', text, '[END]'], separator=' ')\n return text","metadata":{"id":"chTF5N885F0P","execution":{"iopub.status.busy":"2023-03-16T11:46:00.872665Z","iopub.execute_input":"2023-03-16T11:46:00.874045Z","iopub.status.idle":"2023-03-16T11:46:00.881931Z","shell.execute_reply.started":"2023-03-16T11:46:00.873999Z","shell.execute_reply":"2023-03-16T11:46:00.880784Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"print(example_text.numpy().decode())\nprint(tf_lower_and_split_punct(example_text).numpy().decode())","metadata":{"id":"UREvDg3sEKYa","outputId":"c0abfd72-ff76-4661-809c-d1b40f6debb8","execution":{"iopub.status.busy":"2023-03-16T11:46:00.883659Z","iopub.execute_input":"2023-03-16T11:46:00.884080Z","iopub.status.idle":"2023-03-16T11:46:00.907442Z","shell.execute_reply.started":"2023-03-16T11:46:00.884041Z","shell.execute_reply":"2023-03-16T11:46:00.906256Z"},"trusted":true},"execution_count":37,"outputs":[{"name":"stdout","text":"Yahi haal hai\n[START] yahi haal hai [END]\n","output_type":"stream"}]},{"cell_type":"code","source":"max_vocab_size = 53000\n\ninput_text_processor = tf.keras.layers.TextVectorization( # handles vocabulary extraction & conversion of i/p text to sequences of tokens\n standardize=tf_lower_and_split_punct,\n max_tokens=max_vocab_size)","metadata":{"id":"eAY9k49G3jE_","execution":{"iopub.status.busy":"2023-03-16T11:46:00.909139Z","iopub.execute_input":"2023-03-16T11:46:00.909823Z","iopub.status.idle":"2023-03-16T11:46:00.948831Z","shell.execute_reply.started":"2023-03-16T11:46:00.909781Z","shell.execute_reply":"2023-03-16T11:46:00.947635Z"},"trusted":true},"execution_count":38,"outputs":[]},{"cell_type":"code","source":"input_text_processor.adapt(inp) # initializes the layer based on the data\n\n# Here are the first 10 words from the vocabulary:\ninput_text_processor.get_vocabulary()[:10]\n# Hinglish TextVectorization layer","metadata":{"id":"bmsI1Yql8FYe","outputId":"89009883-0834-449f-d408-0d947cd033b2","execution":{"iopub.status.busy":"2023-03-16T11:46:00.951091Z","iopub.execute_input":"2023-03-16T11:46:00.951727Z","iopub.status.idle":"2023-03-16T11:46:13.843270Z","shell.execute_reply.started":"2023-03-16T11:46:00.951684Z","shell.execute_reply":"2023-03-16T11:46:13.842283Z"},"trusted":true},"execution_count":39,"outputs":[{"execution_count":39,"output_type":"execute_result","data":{"text/plain":"['', '[UNK]', '[START]', '[END]', 'kee', 'liye', 'hai', 'koo', 'kyaa', 'mee']"},"metadata":{}}]},{"cell_type":"code","source":"output_text_processor = tf.keras.layers.TextVectorization(\n standardize=tf_lower_and_split_punct,\n max_tokens=max_vocab_size)\n\noutput_text_processor.adapt(targ)\noutput_text_processor.get_vocabulary()[:10]\n# English TextVectorization layer","metadata":{"id":"jlC4xuZnKLBS","outputId":"9b307cdd-52db-48bf-f9bc-2510dd84bd19","execution":{"iopub.status.busy":"2023-03-16T11:46:13.844725Z","iopub.execute_input":"2023-03-16T11:46:13.845119Z","iopub.status.idle":"2023-03-16T11:46:35.612054Z","shell.execute_reply.started":"2023-03-16T11:46:13.845081Z","shell.execute_reply":"2023-03-16T11:46:35.610936Z"},"trusted":true},"execution_count":40,"outputs":[{"execution_count":40,"output_type":"execute_result","data":{"text/plain":"['', '[UNK]', '[START]', '[END]', 'the', 'to', '.', 'for', 'is', '?']"},"metadata":{}}]},{"cell_type":"code","source":"# the above layers convert a batch of strings into a batch of token IDs\nexample_tokens = input_text_processor(example_input_batch)\nexample_tokens[:3, :10]","metadata":{"id":"9KZxj8IrNZ9S","outputId":"ed7ca51e-cd13-4c35-dd4e-fd706b1fb34a","execution":{"iopub.status.busy":"2023-03-16T11:46:35.613717Z","iopub.execute_input":"2023-03-16T11:46:35.614109Z","iopub.status.idle":"2023-03-16T11:46:35.701463Z","shell.execute_reply.started":"2023-03-16T11:46:35.614070Z","shell.execute_reply":"2023-03-16T11:46:35.700450Z"},"trusted":true},"execution_count":41,"outputs":[{"execution_count":41,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"# get_vocabulary - converts token IDs back to text\ninput_vocab = np.array(input_text_processor.get_vocabulary())\ntokens = input_vocab[example_tokens[0].numpy()]\n' '.join(tokens)","metadata":{"id":"98g9rcxGQY0I","outputId":"43515fd5-6811-41ec-d9e9-8e1c4d920d81","execution":{"iopub.status.busy":"2023-03-16T11:46:35.703028Z","iopub.execute_input":"2023-03-16T11:46:35.703389Z","iopub.status.idle":"2023-03-16T11:46:35.776070Z","shell.execute_reply.started":"2023-03-16T11:46:35.703352Z","shell.execute_reply":"2023-03-16T11:46:35.775051Z"},"trusted":true},"execution_count":42,"outputs":[{"execution_count":42,"output_type":"execute_result","data":{"text/plain":"'[START] taylor swift koo play karwayee [END] '"},"metadata":{}}]},{"cell_type":"code","source":"# token IDs converted into a mask\nplt.subplot(1, 2, 1)\nplt.pcolormesh(example_tokens)\nplt.title('Token IDs')\n\nplt.subplot(1, 2, 2)\nplt.pcolormesh(example_tokens != 0)\nplt.title('Mask')","metadata":{"id":"_jx4Or_eFRSz","outputId":"fd68e833-9404-407b-a54b-333bb35ffd2b","execution":{"iopub.status.busy":"2023-03-16T11:46:35.778833Z","iopub.execute_input":"2023-03-16T11:46:35.779871Z","iopub.status.idle":"2023-03-16T11:46:36.103825Z","shell.execute_reply.started":"2023-03-16T11:46:35.779830Z","shell.execute_reply":"2023-03-16T11:46:36.101972Z"},"trusted":true},"execution_count":43,"outputs":[{"execution_count":43,"output_type":"execute_result","data":{"text/plain":"Text(0.5, 1.0, 'Mask')"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}]},{"cell_type":"code","source":"# defining constants for the model\nembedding_dim = 256\nunits = 1024","metadata":{"id":"_a9uNz3-IrF-","execution":{"iopub.status.busy":"2023-03-16T11:46:36.105561Z","iopub.execute_input":"2023-03-16T11:46:36.105951Z","iopub.status.idle":"2023-03-16T11:46:36.111169Z","shell.execute_reply.started":"2023-03-16T11:46:36.105911Z","shell.execute_reply":"2023-03-16T11:46:36.109957Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"markdown","source":"Encoding","metadata":{"id":"6tQ8xnOH7mFc"}},{"cell_type":"code","source":"class Encoder(tf.keras.layers.Layer):\n def __init__(self, input_vocab_size, embedding_dim, enc_units):\n super(Encoder, self).__init__()\n self.enc_units = enc_units\n self.input_vocab_size = input_vocab_size\n\n # The embedding layer converts tokens to vectors\n self.embedding = tf.keras.layers.Embedding(self.input_vocab_size,\n embedding_dim)\n\n # The GRU RNN layer processes those vectors sequentially.\n self.gru = tf.keras.layers.GRU(self.enc_units,\n # Return the sequence and state\n return_sequences=True,\n return_state=True,\n recurrent_initializer='glorot_uniform',\n recurrent_dropout=0.1)\n\n def call(self, tokens, state=None):\n shape_checker = ShapeChecker()\n shape_checker(tokens, ('batch', 's'))\n\n # 2. The embedding layer looks up the embedding for each token.\n vectors = self.embedding(tokens)\n shape_checker(vectors, ('batch', 's', 'embed_dim'))\n\n # 3. The GRU processes the embedding sequence.\n # output shape: (batch, s, enc_units)\n # state shape: (batch, enc_units)\n output, state = self.gru(vectors, initial_state=state)\n shape_checker(output, ('batch', 's', 'enc_units'))\n shape_checker(state, ('batch', 'enc_units'))\n\n # 4. Returns the new sequence and its state.\n return output, state","metadata":{"id":"nZ2rI24i3jFg","execution":{"iopub.status.busy":"2023-03-16T11:46:36.113169Z","iopub.execute_input":"2023-03-16T11:46:36.113539Z","iopub.status.idle":"2023-03-16T11:46:36.124161Z","shell.execute_reply.started":"2023-03-16T11:46:36.113502Z","shell.execute_reply":"2023-03-16T11:46:36.122978Z"},"trusted":true},"execution_count":45,"outputs":[]},{"cell_type":"code","source":"# Convert the input text to tokens.\nexample_tokens = input_text_processor(example_input_batch)\n\n# Encode the input sequence.\nencoder = Encoder(input_text_processor.vocabulary_size(),\n embedding_dim, units)\nexample_enc_output, example_enc_state = encoder(example_tokens)\n\nprint(f'Input batch, shape (batch): {example_input_batch.shape}')\nprint(f'Input batch tokens, shape (batch, s): {example_tokens.shape}')\nprint(f'Encoder output, shape (batch, s, units): {example_enc_output.shape}')\nprint(f'Encoder state, shape (batch, units): {example_enc_state.shape}')","metadata":{"id":"60gSVh05Jl6l","outputId":"99c5ac95-5e34-4591-e00a-16b2d068052d","execution":{"iopub.status.busy":"2023-03-16T11:46:36.125926Z","iopub.execute_input":"2023-03-16T11:46:36.126373Z","iopub.status.idle":"2023-03-16T11:46:38.342644Z","shell.execute_reply.started":"2023-03-16T11:46:36.126335Z","shell.execute_reply":"2023-03-16T11:46:38.341549Z"},"trusted":true},"execution_count":46,"outputs":[{"name":"stdout","text":"Input batch, shape (batch): (64,)\nInput batch tokens, shape (batch, s): (64, 19)\nEncoder output, shape (batch, s, units): (64, 19, 1024)\nEncoder state, shape (batch, units): (64, 1024)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"Attention layer","metadata":{"id":"mI7Zgyrp89GK"}},{"cell_type":"code","source":"class BahdanauAttention(tf.keras.layers.Layer):\n def __init__(self, units):\n super().__init__()\n # For Eqn. (4), the Bahdanau attention\n self.W1 = tf.keras.layers.Dense(units, use_bias=False)\n self.W2 = tf.keras.layers.Dense(units, use_bias=False)\n\n self.attention = tf.keras.layers.AdditiveAttention()\n\n def call(self, query, value, mask):\n shape_checker = ShapeChecker()\n shape_checker(query, ('batch', 't', 'query_units'))\n shape_checker(value, ('batch', 's', 'value_units'))\n shape_checker(mask, ('batch', 's'))\n\n # From Eqn. (4), `W1@ht`.\n w1_query = self.W1(query)\n shape_checker(w1_query, ('batch', 't', 'attn_units'))\n\n # From Eqn. (4), `W2@hs`.\n w2_key = self.W2(value)\n shape_checker(w2_key, ('batch', 's', 'attn_units'))\n\n query_mask = tf.ones(tf.shape(query)[:-1], dtype=bool)\n value_mask = mask\n\n context_vector, attention_weights = self.attention(\n inputs = [w1_query, value, w2_key],\n mask=[query_mask, value_mask],\n return_attention_scores = True,\n )\n shape_checker(context_vector, ('batch', 't', 'value_units'))\n shape_checker(attention_weights, ('batch', 't', 's'))\n\n return context_vector, attention_weights","metadata":{"id":"momiE59lXo6U","execution":{"iopub.status.busy":"2023-03-16T11:46:38.344306Z","iopub.execute_input":"2023-03-16T11:46:38.344676Z","iopub.status.idle":"2023-03-16T11:46:38.354659Z","shell.execute_reply.started":"2023-03-16T11:46:38.344638Z","shell.execute_reply":"2023-03-16T11:46:38.353267Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"# creating BahdanauAttention layer\nattention_layer = BahdanauAttention(units)","metadata":{"id":"t4QMlOp8Gidh","execution":{"iopub.status.busy":"2023-03-16T11:46:38.356518Z","iopub.execute_input":"2023-03-16T11:46:38.357309Z","iopub.status.idle":"2023-03-16T11:46:38.369756Z","shell.execute_reply.started":"2023-03-16T11:46:38.357220Z","shell.execute_reply":"2023-03-16T11:46:38.368696Z"},"trusted":true},"execution_count":48,"outputs":[]},{"cell_type":"code","source":"(example_tokens != 0).shape","metadata":{"id":"DYSHqmORgVFo","outputId":"8feaa6f2-f454-4fa6-bf69-29762aacebee","execution":{"iopub.status.busy":"2023-03-16T11:46:38.371579Z","iopub.execute_input":"2023-03-16T11:46:38.372037Z","iopub.status.idle":"2023-03-16T11:46:38.381523Z","shell.execute_reply.started":"2023-03-16T11:46:38.371917Z","shell.execute_reply":"2023-03-16T11:46:38.380268Z"},"trusted":true},"execution_count":49,"outputs":[{"execution_count":49,"output_type":"execute_result","data":{"text/plain":"TensorShape([64, 19])"},"metadata":{}}]},{"cell_type":"code","source":"# Later, the decoder will generate this attention query\nexample_attention_query = tf.random.normal(shape=[len(example_tokens), 2, 10])\n\n# Attend to the encoded tokens\n\ncontext_vector, attention_weights = attention_layer(\n query=example_attention_query,\n value=example_enc_output,\n mask=(example_tokens != 0))\n\nprint(f'Attention result shape: (batch_size, query_seq_length, units): {context_vector.shape}')\nprint(f'Attention weights shape: (batch_size, query_seq_length, value_seq_length): {attention_weights.shape}')","metadata":{"id":"7y7hjPkNMmHh","outputId":"600899f2-e9d6-467a-ba4a-04ab287d4f08","execution":{"iopub.status.busy":"2023-03-16T11:46:38.383542Z","iopub.execute_input":"2023-03-16T11:46:38.383972Z","iopub.status.idle":"2023-03-16T11:46:38.461258Z","shell.execute_reply.started":"2023-03-16T11:46:38.383936Z","shell.execute_reply":"2023-03-16T11:46:38.460196Z"},"trusted":true},"execution_count":50,"outputs":[{"name":"stdout","text":"Attention result shape: (batch_size, query_seq_length, units): (64, 2, 1024)\nAttention weights shape: (batch_size, query_seq_length, value_seq_length): (64, 2, 19)\n","output_type":"stream"}]},{"cell_type":"code","source":"plt.subplot(1, 2, 1)\nplt.pcolormesh(attention_weights[:, 0, :])\nplt.title('Attention weights')\n\nplt.subplot(1, 2, 2)\nplt.pcolormesh(example_tokens != 0)\nplt.title('Mask')\n","metadata":{"id":"Rqr8XGsAJlf6","outputId":"a8f59ec0-d2a6-49a2-de95-aa80d7f7a7b7","execution":{"iopub.status.busy":"2023-03-16T11:46:38.462954Z","iopub.execute_input":"2023-03-16T11:46:38.463333Z","iopub.status.idle":"2023-03-16T11:46:38.765069Z","shell.execute_reply.started":"2023-03-16T11:46:38.463296Z","shell.execute_reply":"2023-03-16T11:46:38.763953Z"},"trusted":true},"execution_count":51,"outputs":[{"execution_count":51,"output_type":"execute_result","data":{"text/plain":"Text(0.5, 1.0, 'Mask')"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}]},{"cell_type":"code","source":"attention_weights.shape","metadata":{"id":"ZuzrCdmYlTcJ","outputId":"8dfa3d00-cd0c-48e8-8718-af89ed82d106","execution":{"iopub.status.busy":"2023-03-16T11:46:38.766676Z","iopub.execute_input":"2023-03-16T11:46:38.767075Z","iopub.status.idle":"2023-03-16T11:46:38.773967Z","shell.execute_reply.started":"2023-03-16T11:46:38.767036Z","shell.execute_reply":"2023-03-16T11:46:38.772787Z"},"trusted":true},"execution_count":52,"outputs":[{"execution_count":52,"output_type":"execute_result","data":{"text/plain":"TensorShape([64, 2, 19])"},"metadata":{}}]},{"cell_type":"code","source":"attention_slice = attention_weights[0, 0].numpy()\nattention_slice = attention_slice[attention_slice != 0]","metadata":{"id":"qIMwC-f-ZC8N","execution":{"iopub.status.busy":"2023-03-16T11:46:38.775961Z","iopub.execute_input":"2023-03-16T11:46:38.776857Z","iopub.status.idle":"2023-03-16T11:46:38.786165Z","shell.execute_reply.started":"2023-03-16T11:46:38.776814Z","shell.execute_reply":"2023-03-16T11:46:38.785038Z"},"trusted":true},"execution_count":53,"outputs":[]},{"cell_type":"code","source":"#@title\nplt.suptitle('Attention weights for one sequence')\n\nplt.figure(figsize=(12, 6))\na1 = plt.subplot(1, 2, 1)\nplt.bar(range(len(attention_slice)), attention_slice)\n# freeze the xlim\nplt.xlim(plt.xlim())\nplt.xlabel('Attention weights')\n\na2 = plt.subplot(1, 2, 2)\nplt.bar(range(len(attention_slice)), attention_slice)\nplt.xlabel('Attention weights, zoomed')\n\n# zoom in\ntop = max(a1.get_ylim())\nzoom = 0.85*top\na2.set_ylim([0.90*top, top])\na1.plot(a1.get_xlim(), [zoom, zoom], color='k')","metadata":{"id":"ysWDPO6hOS8X","outputId":"9f941203-0962-4219-98cb-efdfe46a5faa","execution":{"iopub.status.busy":"2023-03-16T11:46:38.788275Z","iopub.execute_input":"2023-03-16T11:46:38.788715Z","iopub.status.idle":"2023-03-16T11:46:39.182116Z","shell.execute_reply.started":"2023-03-16T11:46:38.788670Z","shell.execute_reply":"2023-03-16T11:46:39.181038Z"},"trusted":true},"execution_count":54,"outputs":[{"execution_count":54,"output_type":"execute_result","data":{"text/plain":"[]"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}]},{"cell_type":"markdown","source":"Decoder","metadata":{"id":"g68xf3c_-gBS"}},{"cell_type":"code","source":"class Decoder(tf.keras.layers.Layer):\n def __init__(self, output_vocab_size, embedding_dim, dec_units):\n super(Decoder, self).__init__()\n self.dec_units = dec_units\n self.output_vocab_size = output_vocab_size\n self.embedding_dim = embedding_dim\n\n # For Step 1. The embedding layer converts token IDs to vectors\n self.embedding = tf.keras.layers.Embedding(self.output_vocab_size,\n embedding_dim)\n\n # For Step 2. The RNN keeps track of what's been generated so far.\n self.gru = tf.keras.layers.GRU(self.dec_units,\n return_sequences=True,\n return_state=True,\n recurrent_initializer='glorot_uniform',\n recurrent_dropout=0.1)\n\n # For step 3. The RNN output will be the query for the attention layer.\n self.attention = BahdanauAttention(self.dec_units)\n\n # For step 4. Eqn. (3): converting `ct` to `at`\n self.Wc = tf.keras.layers.Dense(dec_units, activation=tf.math.tanh,\n use_bias=False)\n\n # For step 5. This fully connected layer produces the logits for each output token.\n self.fc = tf.keras.layers.Dense(self.output_vocab_size)","metadata":{"id":"erYvHIgAl8kh","execution":{"iopub.status.busy":"2023-03-16T11:46:39.183861Z","iopub.execute_input":"2023-03-16T11:46:39.184272Z","iopub.status.idle":"2023-03-16T11:46:39.192243Z","shell.execute_reply.started":"2023-03-16T11:46:39.184233Z","shell.execute_reply":"2023-03-16T11:46:39.191028Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"class DecoderInput(typing.NamedTuple):\n new_tokens: Any\n enc_output: Any\n mask: Any\n\nclass DecoderOutput(typing.NamedTuple):\n logits: Any\n attention_weights: Any","metadata":{"id":"7WfSIb2sArRT","execution":{"iopub.status.busy":"2023-03-16T11:46:39.193966Z","iopub.execute_input":"2023-03-16T11:46:39.194651Z","iopub.status.idle":"2023-03-16T11:46:39.206347Z","shell.execute_reply.started":"2023-03-16T11:46:39.194612Z","shell.execute_reply":"2023-03-16T11:46:39.205356Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"def call(self,\n inputs: DecoderInput,\n state=None) -> Tuple[DecoderOutput, tf.Tensor]:\n shape_checker = ShapeChecker()\n shape_checker(inputs.new_tokens, ('batch', 't'))\n shape_checker(inputs.enc_output, ('batch', 's', 'enc_units'))\n shape_checker(inputs.mask, ('batch', 's'))\n\n if state is not None:\n shape_checker(state, ('batch', 'dec_units'))\n\n # Step 1. Lookup the embeddings\n vectors = self.embedding(inputs.new_tokens)\n shape_checker(vectors, ('batch', 't', 'embedding_dim'))\n\n # Step 2. Process one step with the RNN\n rnn_output, state = self.gru(vectors, initial_state=state)\n\n shape_checker(rnn_output, ('batch', 't', 'dec_units'))\n shape_checker(state, ('batch', 'dec_units'))\n\n # Step 3. Use the RNN output as the query for the attention over the\n # encoder output.\n context_vector, attention_weights = self.attention(\n query=rnn_output, value=inputs.enc_output, mask=inputs.mask)\n shape_checker(context_vector, ('batch', 't', 'dec_units'))\n shape_checker(attention_weights, ('batch', 't', 's'))\n\n # Step 4. Eqn. (3): Join the context_vector and rnn_output\n # [ct; ht] shape: (batch t, value_units + query_units)\n context_and_rnn_output = tf.concat([context_vector, rnn_output], axis=-1)\n\n # Step 4. Eqn. (3): `at = tanh(Wc@[ct; ht])`\n attention_vector = self.Wc(context_and_rnn_output)\n shape_checker(attention_vector, ('batch', 't', 'dec_units'))\n\n # Step 5. Generate logit predictions:\n logits = self.fc(attention_vector)\n shape_checker(logits, ('batch', 't', 'output_vocab_size'))\n\n return DecoderOutput(logits, attention_weights), state","metadata":{"id":"PJOi5btHAPNK","execution":{"iopub.status.busy":"2023-03-16T11:46:39.208199Z","iopub.execute_input":"2023-03-16T11:46:39.208844Z","iopub.status.idle":"2023-03-16T11:46:39.220786Z","shell.execute_reply.started":"2023-03-16T11:46:39.208582Z","shell.execute_reply":"2023-03-16T11:46:39.219581Z"},"trusted":true},"execution_count":57,"outputs":[]},{"cell_type":"code","source":"Decoder.call = call","metadata":{"id":"Ay_mTMPfnb2a","execution":{"iopub.status.busy":"2023-03-16T11:46:39.222435Z","iopub.execute_input":"2023-03-16T11:46:39.223014Z","iopub.status.idle":"2023-03-16T11:46:39.232325Z","shell.execute_reply.started":"2023-03-16T11:46:39.222949Z","shell.execute_reply":"2023-03-16T11:46:39.231281Z"},"trusted":true},"execution_count":58,"outputs":[]},{"cell_type":"code","source":"decoder = Decoder(output_text_processor.vocabulary_size(),\n embedding_dim, units)","metadata":{"id":"4ZUMbYXIEVeA","execution":{"iopub.status.busy":"2023-03-16T11:46:39.233853Z","iopub.execute_input":"2023-03-16T11:46:39.234238Z","iopub.status.idle":"2023-03-16T11:46:39.251412Z","shell.execute_reply.started":"2023-03-16T11:46:39.234201Z","shell.execute_reply":"2023-03-16T11:46:39.250373Z"},"trusted":true},"execution_count":59,"outputs":[]},{"cell_type":"code","source":"# Convert the target sequence, and collect the \"[START]\" tokens\nexample_output_tokens = output_text_processor(example_target_batch)\n\nstart_index = output_text_processor.get_vocabulary().index('[START]')\nfirst_token = tf.constant([[start_index]] * example_output_tokens.shape[0])","metadata":{"id":"4u6eJBU4GL40","execution":{"iopub.status.busy":"2023-03-16T11:46:39.252616Z","iopub.execute_input":"2023-03-16T11:46:39.253187Z","iopub.status.idle":"2023-03-16T11:46:39.336408Z","shell.execute_reply.started":"2023-03-16T11:46:39.253147Z","shell.execute_reply":"2023-03-16T11:46:39.335259Z"},"trusted":true},"execution_count":60,"outputs":[]},{"cell_type":"code","source":"# Run the decoder\ndec_result, dec_state = decoder(\n inputs = DecoderInput(new_tokens=first_token,\n enc_output=example_enc_output,\n mask=(example_tokens != 0)),\n state = example_enc_state\n)\n\nprint(f'logits shape: (batch_size, t, output_vocab_size) {dec_result.logits.shape}')\nprint(f'state shape: (batch_size, dec_units) {dec_state.shape}')","metadata":{"id":"E5hqvbR5FUCD","outputId":"ec292583-153d-4d60-d79e-082f2ed7a208","execution":{"iopub.status.busy":"2023-03-16T11:46:39.337807Z","iopub.execute_input":"2023-03-16T11:46:39.338547Z","iopub.status.idle":"2023-03-16T11:46:39.459382Z","shell.execute_reply.started":"2023-03-16T11:46:39.338507Z","shell.execute_reply":"2023-03-16T11:46:39.458221Z"},"trusted":true},"execution_count":61,"outputs":[{"name":"stdout","text":"logits shape: (batch_size, t, output_vocab_size) (64, 1, 23282)\nstate shape: (batch_size, dec_units) (64, 1024)\n","output_type":"stream"}]},{"cell_type":"code","source":"# sample token according to logits\nsampled_token = tf.random.categorical(dec_result.logits[:, 0, :], num_samples=1)","metadata":{"id":"P5UY8wko3jFp","execution":{"iopub.status.busy":"2023-03-16T11:46:39.460768Z","iopub.execute_input":"2023-03-16T11:46:39.461652Z","iopub.status.idle":"2023-03-16T11:46:39.470290Z","shell.execute_reply.started":"2023-03-16T11:46:39.461610Z","shell.execute_reply":"2023-03-16T11:46:39.469239Z"},"trusted":true},"execution_count":62,"outputs":[]},{"cell_type":"code","source":"# decode the token as first word of o/p\nvocab = np.array(output_text_processor.get_vocabulary())\nfirst_word = vocab[sampled_token.numpy()]\nfirst_word[:5]","metadata":{"id":"lKXTLYu4IV7I","outputId":"4c243ce7-23fc-4781-98c6-9d64e07e13d0","execution":{"iopub.status.busy":"2023-03-16T11:46:39.471965Z","iopub.execute_input":"2023-03-16T11:46:39.472379Z","iopub.status.idle":"2023-03-16T11:46:39.554000Z","shell.execute_reply.started":"2023-03-16T11:46:39.472340Z","shell.execute_reply":"2023-03-16T11:46:39.553028Z"},"trusted":true},"execution_count":63,"outputs":[{"execution_count":63,"output_type":"execute_result","data":{"text/plain":"array([['slogan'],\n ['foreigner'],\n ['disney'],\n ['ptag'],\n ['annoying']], dtype='}\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n\nCPU times: user 28.7 s, sys: 1.24 s, total: 30 s\nWall time: 29.1 s\n","output_type":"stream"}]},{"cell_type":"code","source":"# maximizing performance while training\n@tf.function(input_signature=[[tf.TensorSpec(dtype=tf.string, shape=[None]),\n tf.TensorSpec(dtype=tf.string, shape=[None])]])\ndef _tf_train_step(self, inputs):\n return self._train_step(inputs)","metadata":{"id":"UFUsTKQx0jaH","execution":{"iopub.status.busy":"2023-03-16T11:47:08.881371Z","iopub.execute_input":"2023-03-16T11:47:08.881738Z","iopub.status.idle":"2023-03-16T11:47:08.889074Z","shell.execute_reply.started":"2023-03-16T11:47:08.881701Z","shell.execute_reply":"2023-03-16T11:47:08.886914Z"},"trusted":true},"execution_count":77,"outputs":[]},{"cell_type":"code","source":"TrainTranslator._tf_train_step = _tf_train_step","metadata":{"id":"2-bgU59jrztQ","execution":{"iopub.status.busy":"2023-03-16T11:47:08.891048Z","iopub.execute_input":"2023-03-16T11:47:08.891867Z","iopub.status.idle":"2023-03-16T11:47:08.901275Z","shell.execute_reply.started":"2023-03-16T11:47:08.891829Z","shell.execute_reply":"2023-03-16T11:47:08.900192Z"},"trusted":true},"execution_count":78,"outputs":[]},{"cell_type":"code","source":"translator.use_tf_function = True","metadata":{"id":"KC8bRv_Gr3H9","execution":{"iopub.status.busy":"2023-03-16T11:47:08.902845Z","iopub.execute_input":"2023-03-16T11:47:08.904047Z","iopub.status.idle":"2023-03-16T11:47:08.910837Z","shell.execute_reply.started":"2023-03-16T11:47:08.904005Z","shell.execute_reply":"2023-03-16T11:47:08.909469Z"},"trusted":true},"execution_count":79,"outputs":[]},{"cell_type":"code","source":"translator.train_step([example_input_batch, example_target_batch])","metadata":{"id":"pLQZsX2dp1QK","outputId":"db5ec937-a3e4-40b2-ec54-9dea9c4f3519","execution":{"iopub.status.busy":"2023-03-16T11:47:08.912266Z","iopub.execute_input":"2023-03-16T11:47:08.912796Z","iopub.status.idle":"2023-03-16T11:47:15.710605Z","shell.execute_reply.started":"2023-03-16T11:47:08.912760Z","shell.execute_reply":"2023-03-16T11:47:15.709612Z"},"trusted":true},"execution_count":80,"outputs":[{"execution_count":80,"output_type":"execute_result","data":{"text/plain":"{'batch_loss': }"},"metadata":{}}]},{"cell_type":"code","source":"%%time\nfor n in range(10):\n print(translator.train_step([example_input_batch, example_target_batch]))\nprint()","metadata":{"id":"UzXXMwjXCqqh","outputId":"c536483a-a0b5-4c1b-b99b-3cc35a2caf48","execution":{"iopub.status.busy":"2023-03-16T11:47:15.712754Z","iopub.execute_input":"2023-03-16T11:47:15.713156Z","iopub.status.idle":"2023-03-16T11:47:19.076763Z","shell.execute_reply.started":"2023-03-16T11:47:15.713109Z","shell.execute_reply":"2023-03-16T11:47:19.075669Z"},"trusted":true},"execution_count":81,"outputs":[{"name":"stdout","text":"{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n{'batch_loss': }\n\nCPU times: user 4.32 s, sys: 389 ms, total: 4.71 s\nWall time: 3.36 s\n","output_type":"stream"}]},{"cell_type":"code","source":"losses = []\nfor n in range(100):\n print('.', end='')\n logs = translator.train_step([example_input_batch, example_target_batch])\n losses.append(logs['batch_loss'].numpy())\n\nprint()\nplt.plot(losses)","metadata":{"id":"U-dIWMIBqK7b","outputId":"6df08356-994b-48fa-b64c-ebcdae2c6291","execution":{"iopub.status.busy":"2023-03-16T11:47:19.087055Z","iopub.execute_input":"2023-03-16T11:47:19.087394Z","iopub.status.idle":"2023-03-16T11:47:51.493218Z","shell.execute_reply.started":"2023-03-16T11:47:19.087365Z","shell.execute_reply":"2023-03-16T11:47:51.491996Z"},"trusted":true},"execution_count":82,"outputs":[{"name":"stdout","text":"....................................................................................................\n","output_type":"stream"},{"execution_count":82,"output_type":"execute_result","data":{"text/plain":"[]"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}]},{"cell_type":"code","source":"# building a fresh copy of the model to train from scratch since the training step is working\ntrain_translator = TrainTranslator(\n embedding_dim, units,\n input_text_processor=input_text_processor,\n output_text_processor=output_text_processor)\n\n# Configure the loss and optimizer\ntrain_translator.compile(\n optimizer=tf.optimizers.Adam(),\n loss=MaskedLoss(),\n)","metadata":{"id":"Emgfgh4tAmJt","execution":{"iopub.status.busy":"2023-03-16T11:47:51.494792Z","iopub.execute_input":"2023-03-16T11:47:51.495166Z","iopub.status.idle":"2023-03-16T11:47:51.524712Z","shell.execute_reply.started":"2023-03-16T11:47:51.495127Z","shell.execute_reply":"2023-03-16T11:47:51.523701Z"},"trusted":true},"execution_count":83,"outputs":[]},{"cell_type":"markdown","source":"Training the model","metadata":{"id":"1h7Dmiy1De8S"}},{"cell_type":"code","source":"class BatchLogs(tf.keras.callbacks.Callback):\n def __init__(self, key):\n self.key = key\n self.logs = []\n\n def on_train_batch_end(self, n, logs):\n self.logs.append(logs[self.key])\n\nbatch_loss = BatchLogs('batch_loss')","metadata":{"id":"J7m4mtnj80sq","execution":{"iopub.status.busy":"2023-03-16T11:47:51.526321Z","iopub.execute_input":"2023-03-16T11:47:51.526708Z","iopub.status.idle":"2023-03-16T11:47:51.532735Z","shell.execute_reply.started":"2023-03-16T11:47:51.526670Z","shell.execute_reply":"2023-03-16T11:47:51.531558Z"},"trusted":true},"execution_count":84,"outputs":[]},{"cell_type":"code","source":"train_translator.fit(dataset, epochs=14,\n callbacks=[batch_loss])","metadata":{"id":"BQd_esVVoSf3","outputId":"06c29d9e-195b-46a6-a401-92eb33ccdc94","execution":{"iopub.status.busy":"2023-03-16T11:47:51.534342Z","iopub.execute_input":"2023-03-16T11:47:51.535032Z","iopub.status.idle":"2023-03-16T15:34:16.598152Z","shell.execute_reply.started":"2023-03-16T11:47:51.534976Z","shell.execute_reply":"2023-03-16T15:34:16.597041Z"},"trusted":true},"execution_count":85,"outputs":[{"name":"stdout","text":"Epoch 1/14\n2820/2820 [==============================] - 989s 348ms/step - batch_loss: 3.3863\nEpoch 2/14\n2820/2820 [==============================] - 961s 341ms/step - batch_loss: 1.3731\nEpoch 3/14\n2820/2820 [==============================] - 958s 340ms/step - batch_loss: 0.9362\nEpoch 4/14\n2820/2820 [==============================] - 969s 343ms/step - batch_loss: 0.7388\nEpoch 5/14\n2820/2820 [==============================] - 959s 340ms/step - batch_loss: 0.6408\nEpoch 6/14\n2820/2820 [==============================] - 957s 339ms/step - batch_loss: 0.5801\nEpoch 7/14\n2820/2820 [==============================] - 956s 339ms/step - batch_loss: 0.5346\nEpoch 8/14\n2820/2820 [==============================] - 961s 341ms/step - batch_loss: 0.5017\nEpoch 9/14\n2820/2820 [==============================] - 956s 339ms/step - batch_loss: 0.4785\nEpoch 10/14\n2820/2820 [==============================] - 957s 339ms/step - batch_loss: 0.4618\nEpoch 11/14\n2820/2820 [==============================] - 953s 338ms/step - batch_loss: 0.4497\nEpoch 12/14\n2820/2820 [==============================] - 952s 337ms/step - batch_loss: 0.4415\nEpoch 13/14\n2820/2820 [==============================] - 962s 341ms/step - batch_loss: 0.4363\nEpoch 14/14\n2820/2820 [==============================] - 955s 339ms/step - batch_loss: 0.4364\n","output_type":"stream"},{"execution_count":85,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)\ninp=list(df['Sentence'])\ntarg=list(df['trr'])\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 128\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)\ntrain_translator.fit(dataset, epochs=3,callbacks=[batch_loss])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T15:40:53.225716Z","iopub.execute_input":"2023-03-16T15:40:53.226111Z","iopub.status.idle":"2023-03-16T16:10:40.299151Z","shell.execute_reply.started":"2023-03-16T15:40:53.226076Z","shell.execute_reply":"2023-03-16T16:10:40.298062Z"},"trusted":true},"execution_count":101,"outputs":[{"name":"stdout","text":"Epoch 1/3\n1410/1410 [==============================] - 592s 420ms/step - batch_loss: 0.3202\nEpoch 2/3\n1410/1410 [==============================] - 575s 408ms/step - batch_loss: 0.2631\nEpoch 3/3\n1410/1410 [==============================] - 571s 405ms/step - batch_loss: 0.2652\n","output_type":"stream"},{"execution_count":101,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)\ninp=list(df['Sentence'])\ntarg=list(df['trr'])\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 256\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)\ntrain_translator.fit(dataset, epochs=3,callbacks=[batch_loss])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T16:42:20.557205Z","iopub.execute_input":"2023-03-16T16:42:20.557606Z","iopub.status.idle":"2023-03-16T17:03:02.329620Z","shell.execute_reply.started":"2023-03-16T16:42:20.557572Z","shell.execute_reply":"2023-03-16T17:03:02.328512Z"},"trusted":true},"execution_count":118,"outputs":[{"name":"stdout","text":"Epoch 1/3\n705/705 [==============================] - 424s 600ms/step - batch_loss: 0.1966\nEpoch 2/3\n705/705 [==============================] - 409s 580ms/step - batch_loss: 0.1409\nEpoch 3/3\n705/705 [==============================] - 408s 579ms/step - batch_loss: 0.1234\n","output_type":"stream"},{"execution_count":118,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)\ninp=list(df['Sentence'])\ntarg=list(df['trr'])\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 512\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)\ntrain_translator.fit(dataset, epochs=3,callbacks=[batch_loss])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:04:42.238316Z","iopub.execute_input":"2023-03-16T17:04:42.239043Z","iopub.status.idle":"2023-03-16T17:22:34.668307Z","shell.execute_reply.started":"2023-03-16T17:04:42.238994Z","shell.execute_reply":"2023-03-16T17:22:34.667065Z"},"trusted":true},"execution_count":131,"outputs":[{"name":"stdout","text":"Epoch 1/3\n353/353 [==============================] - 351s 993ms/step - batch_loss: 0.0902\nEpoch 2/3\n353/353 [==============================] - 339s 959ms/step - batch_loss: 0.0665\nEpoch 3/3\n353/353 [==============================] - 337s 953ms/step - batch_loss: 0.0593\n","output_type":"stream"},{"execution_count":131,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)\ninp=list(df['Sentence'])\ntarg=list(df['trr'])\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 1024\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)\ntrain_translator.fit(dataset, epochs=3,callbacks=[batch_loss])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:23:44.856932Z","iopub.execute_input":"2023-03-16T17:23:44.857689Z","iopub.status.idle":"2023-03-16T17:39:40.716749Z","shell.execute_reply.started":"2023-03-16T17:23:44.857648Z","shell.execute_reply":"2023-03-16T17:39:40.715750Z"},"trusted":true},"execution_count":147,"outputs":[{"name":"stdout","text":"Epoch 1/3\n177/177 [==============================] - 326s 2s/step - batch_loss: 0.0480\nEpoch 2/3\n177/177 [==============================] - 314s 2s/step - batch_loss: 0.0421\nEpoch 3/3\n177/177 [==============================] - 314s 2s/step - batch_loss: 0.0405\n","output_type":"stream"},{"execution_count":147,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"df = df.sample(frac = 1)\ninp=list(df['Sentence'])\ntarg=list(df['trr'])\nBUFFER_SIZE = len(inp)\nBATCH_SIZE = 1536\ndataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\ndataset = dataset.batch(BATCH_SIZE)\ntrain_translator.fit(dataset, epochs=3,callbacks=[batch_loss])","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:43:00.227000Z","iopub.execute_input":"2023-03-16T17:43:00.227861Z","iopub.status.idle":"2023-03-16T17:43:28.536846Z","shell.execute_reply.started":"2023-03-16T17:43:00.227811Z","shell.execute_reply":"2023-03-16T17:43:28.534001Z"},"trusted":true},"execution_count":163,"outputs":[{"name":"stdout","text":"Epoch 1/3\n 6/118 [>.............................] - ETA: 4:50 - batch_loss: 0.0351","output_type":"stream"},{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mResourceExhaustedError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/tmp/ipykernel_23/188685183.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_tensor_slices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBUFFER_SIZE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBATCH_SIZE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mtrain_translator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbatch_loss\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;31m# To get the full stack trace, call:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;31m# `tf.debugging.disable_traceback_filtering()`\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfiltered_tb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mfiltered_tb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0;32m---> 53\u001b[0;31m inputs, attrs, num_outputs)\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mResourceExhaustedError\u001b[0m: Graph execution error:\n\nDetected at node 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul' defined at (most recent call last):\n File \"/opt/conda/lib/python3.7/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/opt/conda/lib/python3.7/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py\", line 17, in \n app.launch_new_instance()\n File \"/opt/conda/lib/python3.7/site-packages/traitlets/config/application.py\", line 1041, in launch_instance\n app.start()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelapp.py\", line 712, in start\n self.io_loop.start()\n File \"/opt/conda/lib/python3.7/site-packages/tornado/platform/asyncio.py\", line 199, in start\n self.asyncio_loop.run_forever()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 541, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 1786, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.7/asyncio/events.py\", line 88, in _run\n self._context.run(self._callback, *self._args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 510, in dispatch_queue\n await self.process_one()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 499, in process_one\n await dispatch(*args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 406, in dispatch_shell\n await result\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 730, in execute_request\n reply_content = await reply_content\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py\", line 387, in do_execute\n cell_id=cell_id,\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py\", line 528, in run_cell\n return super().run_cell(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 2976, in run_cell\n raw_cell, store_history, silent, shell_futures, cell_id\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n return runner(coro)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n coro.send(None)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3258, in run_cell_async\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n if (await self.run_code(code, result, async_=asy)):\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"/tmp/ipykernel_23/2555411180.py\", line 2, in \n callbacks=[batch_loss])\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1650, in fit\n tmp_logs = self.train_function(iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1249, in train_function\n return step_function(self, iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1233, in step_function\n outputs = model.distribute_strategy.run(run_step, args=(data,))\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1222, in run_step\n outputs = model.train_step(data)\n File \"/tmp/ipykernel_23/3733470266.py\", line 24, in train_step\n return self._tf_train_step(inputs)\n File \"/tmp/ipykernel_23/626488378.py\", line 5, in _tf_train_step\n return self._train_step(inputs)\n File \"/tmp/ipykernel_23/4151822410.py\", line 22, in _train_step\n for t in tf.range(max_target_length-1):\n File \"/tmp/ipykernel_23/4151822410.py\", line 27, in _train_step\n step_loss, dec_state = self._loop_step(new_tokens, input_mask,\n File \"/tmp/ipykernel_23/431701214.py\", line 10, in _loop_step\n dec_result, dec_state = self.decoder(decoder_input, state=dec_state)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/2024488487.py\", line 24, in call\n context_vector, attention_weights = self.attention(\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/696840811.py\", line 17, in call\n w1_query = self.W1(query)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/layers/core/dense.py\", line 244, in call\n outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])\nNode: 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul'\nDetected at node 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul' defined at (most recent call last):\n File \"/opt/conda/lib/python3.7/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/opt/conda/lib/python3.7/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py\", line 17, in \n app.launch_new_instance()\n File \"/opt/conda/lib/python3.7/site-packages/traitlets/config/application.py\", line 1041, in launch_instance\n app.start()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelapp.py\", line 712, in start\n self.io_loop.start()\n File \"/opt/conda/lib/python3.7/site-packages/tornado/platform/asyncio.py\", line 199, in start\n self.asyncio_loop.run_forever()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 541, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 1786, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.7/asyncio/events.py\", line 88, in _run\n self._context.run(self._callback, *self._args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 510, in dispatch_queue\n await self.process_one()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 499, in process_one\n await dispatch(*args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 406, in dispatch_shell\n await result\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 730, in execute_request\n reply_content = await reply_content\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py\", line 387, in do_execute\n cell_id=cell_id,\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py\", line 528, in run_cell\n return super().run_cell(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 2976, in run_cell\n raw_cell, store_history, silent, shell_futures, cell_id\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n return runner(coro)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n coro.send(None)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3258, in run_cell_async\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n if (await self.run_code(code, result, async_=asy)):\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"/tmp/ipykernel_23/2555411180.py\", line 2, in \n callbacks=[batch_loss])\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1650, in fit\n tmp_logs = self.train_function(iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1249, in train_function\n return step_function(self, iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1233, in step_function\n outputs = model.distribute_strategy.run(run_step, args=(data,))\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1222, in run_step\n outputs = model.train_step(data)\n File \"/tmp/ipykernel_23/3733470266.py\", line 24, in train_step\n return self._tf_train_step(inputs)\n File \"/tmp/ipykernel_23/626488378.py\", line 5, in _tf_train_step\n return self._train_step(inputs)\n File \"/tmp/ipykernel_23/4151822410.py\", line 22, in _train_step\n for t in tf.range(max_target_length-1):\n File \"/tmp/ipykernel_23/4151822410.py\", line 27, in _train_step\n step_loss, dec_state = self._loop_step(new_tokens, input_mask,\n File \"/tmp/ipykernel_23/431701214.py\", line 10, in _loop_step\n dec_result, dec_state = self.decoder(decoder_input, state=dec_state)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/2024488487.py\", line 24, in call\n context_vector, attention_weights = self.attention(\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/696840811.py\", line 17, in call\n w1_query = self.W1(query)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/layers/core/dense.py\", line 244, in call\n outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])\nNode: 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul'\n2 root error(s) found.\n (0) RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[1536,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n\t [[{{node while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n\t [[StatefulPartitionedCall/while/loop_body_control/_609/_53]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n (1) RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[1536,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n\t [[{{node while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n0 successful operations.\n0 derived errors ignored. [Op:__inference_train_function_274694]"],"ename":"ResourceExhaustedError","evalue":"Graph execution error:\n\nDetected at node 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul' defined at (most recent call last):\n File \"/opt/conda/lib/python3.7/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/opt/conda/lib/python3.7/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py\", line 17, in \n app.launch_new_instance()\n File \"/opt/conda/lib/python3.7/site-packages/traitlets/config/application.py\", line 1041, in launch_instance\n app.start()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelapp.py\", line 712, in start\n self.io_loop.start()\n File \"/opt/conda/lib/python3.7/site-packages/tornado/platform/asyncio.py\", line 199, in start\n self.asyncio_loop.run_forever()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 541, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 1786, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.7/asyncio/events.py\", line 88, in _run\n self._context.run(self._callback, *self._args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 510, in dispatch_queue\n await self.process_one()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 499, in process_one\n await dispatch(*args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 406, in dispatch_shell\n await result\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 730, in execute_request\n reply_content = await reply_content\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py\", line 387, in do_execute\n cell_id=cell_id,\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py\", line 528, in run_cell\n return super().run_cell(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 2976, in run_cell\n raw_cell, store_history, silent, shell_futures, cell_id\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n return runner(coro)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n coro.send(None)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3258, in run_cell_async\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n if (await self.run_code(code, result, async_=asy)):\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"/tmp/ipykernel_23/2555411180.py\", line 2, in \n callbacks=[batch_loss])\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1650, in fit\n tmp_logs = self.train_function(iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1249, in train_function\n return step_function(self, iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1233, in step_function\n outputs = model.distribute_strategy.run(run_step, args=(data,))\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1222, in run_step\n outputs = model.train_step(data)\n File \"/tmp/ipykernel_23/3733470266.py\", line 24, in train_step\n return self._tf_train_step(inputs)\n File \"/tmp/ipykernel_23/626488378.py\", line 5, in _tf_train_step\n return self._train_step(inputs)\n File \"/tmp/ipykernel_23/4151822410.py\", line 22, in _train_step\n for t in tf.range(max_target_length-1):\n File \"/tmp/ipykernel_23/4151822410.py\", line 27, in _train_step\n step_loss, dec_state = self._loop_step(new_tokens, input_mask,\n File \"/tmp/ipykernel_23/431701214.py\", line 10, in _loop_step\n dec_result, dec_state = self.decoder(decoder_input, state=dec_state)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/2024488487.py\", line 24, in call\n context_vector, attention_weights = self.attention(\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/696840811.py\", line 17, in call\n w1_query = self.W1(query)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/layers/core/dense.py\", line 244, in call\n outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])\nNode: 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul'\nDetected at node 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul' defined at (most recent call last):\n File \"/opt/conda/lib/python3.7/runpy.py\", line 193, in _run_module_as_main\n \"__main__\", mod_spec)\n File \"/opt/conda/lib/python3.7/runpy.py\", line 85, in _run_code\n exec(code, run_globals)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py\", line 17, in \n app.launch_new_instance()\n File \"/opt/conda/lib/python3.7/site-packages/traitlets/config/application.py\", line 1041, in launch_instance\n app.start()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelapp.py\", line 712, in start\n self.io_loop.start()\n File \"/opt/conda/lib/python3.7/site-packages/tornado/platform/asyncio.py\", line 199, in start\n self.asyncio_loop.run_forever()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 541, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.7/asyncio/base_events.py\", line 1786, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.7/asyncio/events.py\", line 88, in _run\n self._context.run(self._callback, *self._args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 510, in dispatch_queue\n await self.process_one()\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 499, in process_one\n await dispatch(*args)\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 406, in dispatch_shell\n await result\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py\", line 730, in execute_request\n reply_content = await reply_content\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py\", line 387, in do_execute\n cell_id=cell_id,\n File \"/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py\", line 528, in run_cell\n return super().run_cell(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 2976, in run_cell\n raw_cell, store_history, silent, shell_futures, cell_id\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n return runner(coro)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n coro.send(None)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3258, in run_cell_async\n interactivity=interactivity, compiler=compiler, result=result)\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n if (await self.run_code(code, result, async_=asy)):\n File \"/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"/tmp/ipykernel_23/2555411180.py\", line 2, in \n callbacks=[batch_loss])\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1650, in fit\n tmp_logs = self.train_function(iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1249, in train_function\n return step_function(self, iterator)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1233, in step_function\n outputs = model.distribute_strategy.run(run_step, args=(data,))\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py\", line 1222, in run_step\n outputs = model.train_step(data)\n File \"/tmp/ipykernel_23/3733470266.py\", line 24, in train_step\n return self._tf_train_step(inputs)\n File \"/tmp/ipykernel_23/626488378.py\", line 5, in _tf_train_step\n return self._train_step(inputs)\n File \"/tmp/ipykernel_23/4151822410.py\", line 22, in _train_step\n for t in tf.range(max_target_length-1):\n File \"/tmp/ipykernel_23/4151822410.py\", line 27, in _train_step\n step_loss, dec_state = self._loop_step(new_tokens, input_mask,\n File \"/tmp/ipykernel_23/431701214.py\", line 10, in _loop_step\n dec_result, dec_state = self.decoder(decoder_input, state=dec_state)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/2024488487.py\", line 24, in call\n context_vector, attention_weights = self.attention(\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/tmp/ipykernel_23/696840811.py\", line 17, in call\n w1_query = self.W1(query)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 65, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py\", line 1132, in __call__\n outputs = call_fn(inputs, *args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py\", line 96, in error_handler\n return fn(*args, **kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/keras/layers/core/dense.py\", line 244, in call\n outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])\nNode: 'while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul'\n2 root error(s) found.\n (0) RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[1536,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n\t [[{{node while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n\t [[StatefulPartitionedCall/while/loop_body_control/_609/_53]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n (1) RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[1536,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n\t [[{{node while/decoder_2/bahdanau_attention_3/dense_10/Tensordot/MatMul}}]]\nHint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.\n\n0 successful operations.\n0 derived errors ignored. [Op:__inference_train_function_274694]","output_type":"error"}]},{"cell_type":"code","source":"plt.plot(batch_loss.logs)\nplt.ylim([0, 5])\nplt.xlabel('Batch #')\nplt.ylabel('CE/token')","metadata":{"id":"38rLdlmtQHCm","outputId":"2c24e8f8-3ef3-4477-b845-34da28c8f96b","execution":{"iopub.status.busy":"2023-03-16T17:41:37.110859Z","iopub.execute_input":"2023-03-16T17:41:37.111920Z","iopub.status.idle":"2023-03-16T17:41:37.396141Z","shell.execute_reply.started":"2023-03-16T17:41:37.111881Z","shell.execute_reply":"2023-03-16T17:41:37.394978Z"},"trusted":true},"execution_count":162,"outputs":[{"execution_count":162,"output_type":"execute_result","data":{"text/plain":"Text(0, 0.5, 'CE/token')"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}]},{"cell_type":"markdown","source":"Translation","metadata":{"id":"ygvrNyB-DvUF"}},{"cell_type":"code","source":"class Translator(tf.Module):\n\n def __init__(self, encoder, decoder, input_text_processor,\n output_text_processor):\n self.encoder = encoder\n self.decoder = decoder\n self.input_text_processor = input_text_processor\n self.output_text_processor = output_text_processor\n\n self.output_token_string_from_index = (\n tf.keras.layers.StringLookup(\n vocabulary=output_text_processor.get_vocabulary(),\n mask_token='',\n invert=True))\n\n # The output should never generate padding, unknown, or start.\n index_from_string = tf.keras.layers.StringLookup(\n vocabulary=output_text_processor.get_vocabulary(), mask_token='')\n token_mask_ids = index_from_string(['', '[UNK]', '[START]']).numpy()\n\n token_mask = np.zeros([index_from_string.vocabulary_size()], dtype=np.bool)\n token_mask[np.array(token_mask_ids)] = True\n self.token_mask = token_mask\n\n self.start_token = index_from_string(tf.constant('[START]'))\n self.end_token = index_from_string(tf.constant('[END]'))","metadata":{"id":"PO-CLL1LVBbM","execution":{"iopub.status.busy":"2023-03-16T17:39:56.628844Z","iopub.execute_input":"2023-03-16T17:39:56.629611Z","iopub.status.idle":"2023-03-16T17:39:56.637695Z","shell.execute_reply.started":"2023-03-16T17:39:56.629566Z","shell.execute_reply":"2023-03-16T17:39:56.636484Z"},"trusted":true},"execution_count":148,"outputs":[]},{"cell_type":"code","source":"translator = Translator(\n encoder=train_translator.encoder,\n decoder=train_translator.decoder,\n input_text_processor=input_text_processor,\n output_text_processor=output_text_processor,\n)","metadata":{"id":"iBQzFZ9uWU79","execution":{"iopub.status.busy":"2023-03-16T17:39:56.793148Z","iopub.execute_input":"2023-03-16T17:39:56.794470Z","iopub.status.idle":"2023-03-16T17:39:57.546904Z","shell.execute_reply.started":"2023-03-16T17:39:56.794408Z","shell.execute_reply":"2023-03-16T17:39:57.544201Z"},"trusted":true},"execution_count":149,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:21: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\nDeprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n","output_type":"stream"}]},{"cell_type":"code","source":"# converting token IDs to human readable text\ndef tokens_to_text(self, result_tokens):\n shape_checker = ShapeChecker()\n shape_checker(result_tokens, ('batch', 't'))\n result_text_tokens = self.output_token_string_from_index(result_tokens)\n shape_checker(result_text_tokens, ('batch', 't'))\n\n result_text = tf.strings.reduce_join(result_text_tokens,\n axis=1, separator=' ')\n shape_checker(result_text, ('batch'))\n\n result_text = tf.strings.strip(result_text)\n shape_checker(result_text, ('batch',))\n return result_text","metadata":{"id":"8IjwKTwtmdFf","execution":{"iopub.status.busy":"2023-03-16T17:39:57.549319Z","iopub.execute_input":"2023-03-16T17:39:57.549740Z","iopub.status.idle":"2023-03-16T17:39:57.557163Z","shell.execute_reply.started":"2023-03-16T17:39:57.549696Z","shell.execute_reply":"2023-03-16T17:39:57.555921Z"},"trusted":true},"execution_count":150,"outputs":[]},{"cell_type":"code","source":"Translator.tokens_to_text = tokens_to_text","metadata":{"id":"912aV0K7r90w","execution":{"iopub.status.busy":"2023-03-16T17:39:57.558702Z","iopub.execute_input":"2023-03-16T17:39:57.559697Z","iopub.status.idle":"2023-03-16T17:39:57.566470Z","shell.execute_reply.started":"2023-03-16T17:39:57.559651Z","shell.execute_reply":"2023-03-16T17:39:57.565321Z"},"trusted":true},"execution_count":151,"outputs":[]},{"cell_type":"code","source":"# inputting random token IDs to see what it generates\nexample_output_tokens = tf.random.uniform(\n shape=[5, 2], minval=0, dtype=tf.int64,\n maxval=output_text_processor.vocabulary_size())\ntranslator.tokens_to_text(example_output_tokens).numpy()","metadata":{"id":"cWCMHdoS32QN","outputId":"b49a9c58-b11b-4aa3-e127-3a7b4f5b98d8","execution":{"iopub.status.busy":"2023-03-16T17:39:57.568863Z","iopub.execute_input":"2023-03-16T17:39:57.569707Z","iopub.status.idle":"2023-03-16T17:39:57.619101Z","shell.execute_reply.started":"2023-03-16T17:39:57.569665Z","shell.execute_reply":"2023-03-16T17:39:57.618059Z"},"trusted":true},"execution_count":152,"outputs":[{"execution_count":152,"output_type":"execute_result","data":{"text/plain":"array([b'suncream priest', b'imupro heber', b'milles cgs', b'dudley br',\n b'handsome corvallis'], dtype=object)"},"metadata":{}}]},{"cell_type":"code","source":"# takes the decoder's logit outputs and samples token IDs from that distribution\ndef sample(self, logits, temperature):\n shape_checker = ShapeChecker()\n # 't' is usually 1 here.\n shape_checker(logits, ('batch', 't', 'vocab'))\n shape_checker(self.token_mask, ('vocab',))\n\n token_mask = self.token_mask[tf.newaxis, tf.newaxis, :]\n shape_checker(token_mask, ('batch', 't', 'vocab'), broadcast=True)\n\n # Set the logits for all masked tokens to -inf, so they are never chosen.\n logits = tf.where(self.token_mask, -np.inf, logits)\n\n if temperature == 0.0:\n new_tokens = tf.argmax(logits, axis=-1)\n else: \n logits = tf.squeeze(logits, axis=1)\n new_tokens = tf.random.categorical(logits/temperature,\n num_samples=1)\n \n shape_checker(new_tokens, ('batch', 't'))\n\n return new_tokens","metadata":{"id":"8lfuj3GcdD6e","execution":{"iopub.status.busy":"2023-03-16T17:39:57.621086Z","iopub.execute_input":"2023-03-16T17:39:57.621913Z","iopub.status.idle":"2023-03-16T17:39:57.630581Z","shell.execute_reply.started":"2023-03-16T17:39:57.621874Z","shell.execute_reply":"2023-03-16T17:39:57.629751Z"},"trusted":true},"execution_count":153,"outputs":[]},{"cell_type":"code","source":"Translator.sample = sample","metadata":{"id":"4DpDnBdBdL9_","execution":{"iopub.status.busy":"2023-03-16T17:39:57.789040Z","iopub.execute_input":"2023-03-16T17:39:57.789406Z","iopub.status.idle":"2023-03-16T17:39:57.796297Z","shell.execute_reply.started":"2023-03-16T17:39:57.789371Z","shell.execute_reply":"2023-03-16T17:39:57.794960Z"},"trusted":true},"execution_count":154,"outputs":[]},{"cell_type":"code","source":"# testing the function on random i/p\nexample_logits = tf.random.normal([5, 1, output_text_processor.vocabulary_size()])\nexample_output_tokens = translator.sample(example_logits, temperature=1.0)\nexample_output_tokens","metadata":{"id":"rwLT0nxXym80","outputId":"492baa1e-7a3f-4ee3-a0cb-4df725ece439","execution":{"iopub.status.busy":"2023-03-16T17:39:58.065949Z","iopub.execute_input":"2023-03-16T17:39:58.067146Z","iopub.status.idle":"2023-03-16T17:39:58.091672Z","shell.execute_reply.started":"2023-03-16T17:39:58.067091Z","shell.execute_reply":"2023-03-16T17:39:58.090659Z"},"trusted":true},"execution_count":155,"outputs":[{"execution_count":155,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"# implementing text to text translation loop\ndef translate_unrolled(self,\n input_text, *,\n max_length=50,\n return_attention=True,\n temperature=1.0):\n batch_size = tf.shape(input_text)[0]\n input_tokens = self.input_text_processor(input_text)\n enc_output, enc_state = self.encoder(input_tokens)\n\n dec_state = enc_state\n new_tokens = tf.fill([batch_size, 1], self.start_token)\n\n result_tokens = []\n attention = []\n done = tf.zeros([batch_size, 1], dtype=tf.bool)\n\n for _ in range(max_length):\n dec_input = DecoderInput(new_tokens=new_tokens,\n enc_output=enc_output,\n mask=(input_tokens!=0))\n \n dec_result, dec_state = self.decoder(dec_input, state=dec_state)\n\n attention.append(dec_result.attention_weights)\n\n new_tokens = self.sample(dec_result.logits, temperature)\n\n # If a sequence produces an `end_token`, set it `done`\n done = done | (new_tokens == self.end_token)\n # Once a sequence is done it only produces 0-padding.\n new_tokens = tf.where(done, tf.constant(0, dtype=tf.int64), new_tokens)\n\n # Collect the generated tokens\n result_tokens.append(new_tokens)\n\n if tf.executing_eagerly() and tf.reduce_all(done):\n break\n\n # Convert the list of generates token ids to a list of strings.\n result_tokens = tf.concat(result_tokens, axis=-1)\n result_text = self.tokens_to_text(result_tokens)\n\n if return_attention:\n attention_stack = tf.concat(attention, axis=1)\n return {'text': result_text, 'attention': attention_stack}\n else:\n return {'text': result_text}\n","metadata":{"id":"ZmOvVrZmwAxg","execution":{"iopub.status.busy":"2023-03-16T17:39:58.289827Z","iopub.execute_input":"2023-03-16T17:39:58.291300Z","iopub.status.idle":"2023-03-16T17:39:58.320405Z","shell.execute_reply.started":"2023-03-16T17:39:58.291246Z","shell.execute_reply":"2023-03-16T17:39:58.319212Z"},"trusted":true},"execution_count":156,"outputs":[]},{"cell_type":"code","source":"Translator.translate = translate_unrolled","metadata":{"id":"JOmd8Y269MG3","execution":{"iopub.status.busy":"2023-03-16T17:39:58.570219Z","iopub.execute_input":"2023-03-16T17:39:58.571429Z","iopub.status.idle":"2023-03-16T17:39:58.577001Z","shell.execute_reply.started":"2023-03-16T17:39:58.571373Z","shell.execute_reply":"2023-03-16T17:39:58.575644Z"},"trusted":true},"execution_count":157,"outputs":[]},{"cell_type":"code","source":"change_vocab('tum kaise ho')","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:39:58.882755Z","iopub.execute_input":"2023-03-16T17:39:58.883159Z","iopub.status.idle":"2023-03-16T17:39:58.892384Z","shell.execute_reply.started":"2023-03-16T17:39:58.883123Z","shell.execute_reply":"2023-03-16T17:39:58.891112Z"},"trusted":true},"execution_count":158,"outputs":[{"execution_count":158,"output_type":"execute_result","data":{"text/plain":"'tum kassie hoo '"},"metadata":{}}]},{"cell_type":"code","source":"# TESTING\ninput_text = tf.constant([\n change_vocab('mera naam rohan hai'),\n change_vocab('kaha jana hai'),\n change_vocab('tumhare papa kya kaam karte hai '),\n change_vocab('tumhare papa ka naam kya hai'),\n change_vocab('maine khana kha liya hai')\n])\n\nresult = translator.translate(\n input_text = input_text)\n\nprint(result['text'][0].numpy().decode())\nprint(result['text'][1].numpy().decode())\nprint(result['text'][2].numpy().decode())\nprint(result['text'][3].numpy().decode())\nprint(result['text'][4].numpy().decode())\nprint()","metadata":{"id":"hd2rgyHwVVrv","outputId":"72a81edc-d510-4038-fc8b-527a5b824127","execution":{"iopub.status.busy":"2023-03-16T17:40:15.051171Z","iopub.execute_input":"2023-03-16T17:40:15.052400Z","iopub.status.idle":"2023-03-16T17:40:16.039222Z","shell.execute_reply.started":"2023-03-16T17:40:15.052348Z","shell.execute_reply":"2023-03-16T17:40:16.038061Z"},"trusted":true},"execution_count":161,"outputs":[{"name":"stdout","text":"my name is loud\nwhere is it ?\ndo you do your papa . ?\nis there a name of your father ?\ni have to take\n\n","output_type":"stream"}]},{"cell_type":"code","source":"@tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])\ndef tf_translate(self, input_text):\n return self.translate(input_text)\n\nTranslator.tf_translate = tf_translate","metadata":{"id":"_JhTZ5hOptO-","execution":{"iopub.status.busy":"2023-03-16T17:43:54.731553Z","iopub.execute_input":"2023-03-16T17:43:54.733482Z","iopub.status.idle":"2023-03-16T17:43:54.739930Z","shell.execute_reply.started":"2023-03-16T17:43:54.733439Z","shell.execute_reply":"2023-03-16T17:43:54.738640Z"},"trusted":true},"execution_count":164,"outputs":[]},{"cell_type":"code","source":"tf.saved_model.save(translator, 'translator1',\n signatures={'serving_default': translator.tf_translate})","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:43:55.442867Z","iopub.execute_input":"2023-03-16T17:43:55.443608Z","iopub.status.idle":"2023-03-16T17:44:17.215533Z","shell.execute_reply.started":"2023-03-16T17:43:55.443567Z","shell.execute_reply":"2023-03-16T17:44:17.214346Z"},"trusted":true},"execution_count":165,"outputs":[]},{"cell_type":"code","source":"%%time\nresult = translator.tf_translate(\n input_text = input_text)","metadata":{"id":"_NzrixLvVBjQ","outputId":"469e8d6c-c011-43b1-ead8-0f442e9c602f","execution":{"iopub.status.busy":"2023-03-16T11:27:34.224701Z","iopub.execute_input":"2023-03-16T11:27:34.225096Z","iopub.status.idle":"2023-03-16T11:27:54.830320Z","shell.execute_reply.started":"2023-03-16T11:27:34.225060Z","shell.execute_reply":"2023-03-16T11:27:54.829264Z"},"trusted":true},"execution_count":125,"outputs":[{"name":"stdout","text":"CPU times: user 20.5 s, sys: 90.8 ms, total: 20.6 s\nWall time: 20.6 s\n","output_type":"stream"}]},{"cell_type":"code","source":"%%time\nresult = translator.tf_translate(\n input_text = input_text)\n\nprint(result['text'][0].numpy().decode())\nprint(result['text'][1].numpy().decode())\nprint()","metadata":{"id":"USJdu00tVFbd","outputId":"9c40b1e8-a2a9-4ad1-9992-430dd580d8fb","execution":{"iopub.status.busy":"2023-03-16T17:46:33.012957Z","iopub.execute_input":"2023-03-16T17:46:33.013943Z","iopub.status.idle":"2023-03-16T17:46:53.566845Z","shell.execute_reply.started":"2023-03-16T17:46:33.013903Z","shell.execute_reply":"2023-03-16T17:46:53.565732Z"},"trusted":true},"execution_count":170,"outputs":[{"name":"stdout","text":"everyone is in my name ?\nwhere is it ?\n\nCPU times: user 20.3 s, sys: 243 ms, total: 20.5 s\nWall time: 20.5 s\n","output_type":"stream"}]},{"cell_type":"code","source":"#@title [Optional] Use a symbolic loop\ndef translate_symbolic(self,\n input_text,\n *,\n max_length=50,\n return_attention=True,\n temperature=1.0):\n shape_checker = ShapeChecker()\n shape_checker(input_text, ('batch',))\n\n batch_size = tf.shape(input_text)[0]\n\n # Encode the input\n input_tokens = self.input_text_processor(input_text)\n shape_checker(input_tokens, ('batch', 's'))\n\n enc_output, enc_state = self.encoder(input_tokens)\n shape_checker(enc_output, ('batch', 's', 'enc_units'))\n shape_checker(enc_state, ('batch', 'enc_units'))\n\n # Initialize the decoder\n dec_state = enc_state\n new_tokens = tf.fill([batch_size, 1], self.start_token)\n shape_checker(new_tokens, ('batch', 't1'))\n\n # Initialize the accumulators\n result_tokens = tf.TensorArray(tf.int64, size=1, dynamic_size=True)\n attention = tf.TensorArray(tf.float32, size=1, dynamic_size=True)\n done = tf.zeros([batch_size, 1], dtype=tf.bool)\n shape_checker(done, ('batch', 't1'))\n\n for t in tf.range(max_length):\n dec_input = DecoderInput(\n new_tokens=new_tokens, enc_output=enc_output, mask=(input_tokens != 0))\n\n dec_result, dec_state = self.decoder(dec_input, state=dec_state)\n\n shape_checker(dec_result.attention_weights, ('batch', 't1', 's'))\n attention = attention.write(t, dec_result.attention_weights)\n\n new_tokens = self.sample(dec_result.logits, temperature)\n shape_checker(dec_result.logits, ('batch', 't1', 'vocab'))\n shape_checker(new_tokens, ('batch', 't1'))\n\n # If a sequence produces an `end_token`, set it `done`\n done = done | (new_tokens == self.end_token)\n # Once a sequence is done it only produces 0-padding.\n new_tokens = tf.where(done, tf.constant(0, dtype=tf.int64), new_tokens)\n\n # Collect the generated tokens\n result_tokens = result_tokens.write(t, new_tokens)\n\n if tf.reduce_all(done):\n break\n\n # Convert the list of generated token ids to a list of strings.\n result_tokens = result_tokens.stack()\n shape_checker(result_tokens, ('t', 'batch', 't0'))\n result_tokens = tf.squeeze(result_tokens, -1)\n result_tokens = tf.transpose(result_tokens, [1, 0])\n shape_checker(result_tokens, ('batch', 't'))\n\n result_text = self.tokens_to_text(result_tokens)\n shape_checker(result_text, ('batch',))\n\n if return_attention:\n attention_stack = attention.stack()\n shape_checker(attention_stack, ('t', 'batch', 't1', 's'))\n\n attention_stack = tf.squeeze(attention_stack, 2)\n shape_checker(attention_stack, ('t', 'batch', 's'))\n\n attention_stack = tf.transpose(attention_stack, [1, 0, 2])\n shape_checker(attention_stack, ('batch', 't', 's'))\n\n return {'text': result_text, 'attention': attention_stack}\n else:\n return {'text': result_text}","metadata":{"id":"EbQpyYs13jF_","execution":{"iopub.status.busy":"2023-03-16T17:46:53.569825Z","iopub.execute_input":"2023-03-16T17:46:53.570556Z","iopub.status.idle":"2023-03-16T17:46:53.586068Z","shell.execute_reply.started":"2023-03-16T17:46:53.570509Z","shell.execute_reply":"2023-03-16T17:46:53.584852Z"},"trusted":true},"execution_count":171,"outputs":[]},{"cell_type":"code","source":"Translator.translate = translate_symbolic","metadata":{"id":"ngywxv1WYO_O","execution":{"iopub.status.busy":"2023-03-16T17:46:53.587484Z","iopub.execute_input":"2023-03-16T17:46:53.588398Z","iopub.status.idle":"2023-03-16T17:46:53.605490Z","shell.execute_reply.started":"2023-03-16T17:46:53.588358Z","shell.execute_reply":"2023-03-16T17:46:53.604434Z"},"trusted":true},"execution_count":172,"outputs":[]},{"cell_type":"markdown","source":"The initial implementation used python lists to collect the outputs. This uses `tf.range` as the loop iterator, allowing `tf.autograph` to convert the loop. The biggest change in this implementation is the use of `tf.TensorArray` instead of python `list` to accumulate tensors. `tf.TensorArray` is required to collect a variable number of tensors in graph mode. ","metadata":{"id":"lItV7qjEGsYc"}},{"cell_type":"markdown","source":"With eager execution this implementation performs on par with the original:","metadata":{"id":"AJ_NznOgZTxC"}},{"cell_type":"code","source":"# RUN THIS\nresult = translator.translate(\n input_text = input_text)\n\nprint(result['text'][0].numpy().decode())\nprint(result['text'][1].numpy().decode())\nprint()","metadata":{"id":"JRh66y-YYeBw","outputId":"8992e4bf-e4c7-47cb-84a4-0465f2153d83","execution":{"iopub.status.busy":"2023-03-16T17:46:53.608455Z","iopub.execute_input":"2023-03-16T17:46:53.608896Z","iopub.status.idle":"2023-03-16T17:46:54.641135Z","shell.execute_reply.started":"2023-03-16T17:46:53.608858Z","shell.execute_reply":"2023-03-16T17:46:54.640057Z"},"trusted":true},"execution_count":173,"outputs":[{"name":"stdout","text":"peace is as loud\nwhere is it ?\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"But when you wrap it in a `tf.function` you'll notice two differences.","metadata":{"id":"l6B8W4_MZdX0"}},{"cell_type":"code","source":"@tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])\ndef tf_translate(self, input_text):\n return self.translate(input_text)\n\nTranslator.tf_translate = tf_translate","metadata":{"id":"WX6EF8KtYh20","execution":{"iopub.status.busy":"2023-03-16T17:46:54.644456Z","iopub.execute_input":"2023-03-16T17:46:54.644768Z","iopub.status.idle":"2023-03-16T17:46:54.651847Z","shell.execute_reply.started":"2023-03-16T17:46:54.644737Z","shell.execute_reply":"2023-03-16T17:46:54.650594Z"},"trusted":true},"execution_count":174,"outputs":[]},{"cell_type":"markdown","source":"First: Graph creation is much faster (~10x), since it doesn't create `max_iterations` copies of the model.","metadata":{"id":"9S0kQ-bBZswZ"}},{"cell_type":"code","source":"%%time\nresult = translator.tf_translate(\n input_text = input_text)","metadata":{"id":"Eq8d40RKYoJa","outputId":"e8bfbaaa-9b1c-4eaa-d08e-78b76749add4","execution":{"iopub.status.busy":"2023-03-16T17:46:54.653377Z","iopub.execute_input":"2023-03-16T17:46:54.654207Z","iopub.status.idle":"2023-03-16T17:46:57.327429Z","shell.execute_reply.started":"2023-03-16T17:46:54.654122Z","shell.execute_reply":"2023-03-16T17:46:57.326288Z"},"trusted":true},"execution_count":175,"outputs":[{"name":"stdout","text":"CPU times: user 2.58 s, sys: 102 ms, total: 2.68 s\nWall time: 2.66 s\n","output_type":"stream"}]},{"cell_type":"markdown","source":"Second: The compiled function is much faster on small inputs (5x on this example), because it can break out of the loop.","metadata":{"id":"2ABEwtKIZ6eE"}},{"cell_type":"code","source":"%%time\nresult = translator.tf_translate(\n input_text = input_text)\n\nprint(result['text'][0].numpy().decode())\nprint(result['text'][1].numpy().decode())\nprint()","metadata":{"id":"d5VdCLxPYrpz","outputId":"b0e36859-576d-4d40-c6df-efb0b3f7a4c6","execution":{"iopub.status.busy":"2023-03-16T17:46:57.329281Z","iopub.execute_input":"2023-03-16T17:46:57.329764Z","iopub.status.idle":"2023-03-16T17:46:57.390001Z","shell.execute_reply.started":"2023-03-16T17:46:57.329722Z","shell.execute_reply":"2023-03-16T17:46:57.389026Z"},"trusted":true},"execution_count":176,"outputs":[{"name":"stdout","text":"my name is loud\nwhere is it ?\n\nCPU times: user 57 ms, sys: 16.7 ms, total: 73.8 ms\nWall time: 53.4 ms\n","output_type":"stream"}]},{"cell_type":"markdown","source":"### Visualize the process","metadata":{"id":"eo5sf4jZaO2l"}},{"cell_type":"markdown","source":"The attention weights returned by the `translate` method show where the model was \"looking\" when it generated each output token.\n\nSo the sum of the attention over the input should return all ones:","metadata":{"id":"FzZzC2cJacTv"}},{"cell_type":"code","source":"a = result['attention'][0]\n\nprint(np.sum(a, axis=-1))","metadata":{"id":"UEd2GljgqQ-0","outputId":"79eecf74-f06e-4d9a-ec44-5338c4df2c8c","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Here is the attention distribution for the first output step of the first example. Note how the attention is now much more focused than it was for the untrained model:","metadata":{"id":"k_HWQHcI2_h5"}},{"cell_type":"code","source":"_ = plt.bar(range(len(a[0, :])), a[0, :])","metadata":{"id":"M8BHdqQujALu","outputId":"31b6618c-07e0-4821-c953-efe35ddc95e5","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Since there is some rough alignment between the input and output words, you expect the attention to be focused near the diagonal:","metadata":{"id":"qB13OG472Z3V"}},{"cell_type":"code","source":"plt.imshow(np.array(a), vmin=0.0)","metadata":{"id":"xyeXuEYHd0kQ","outputId":"8e85df2f-bfc4-43db-abd2-412001afb29e","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Here is some code to make a better attention plot:","metadata":{"id":"mXECcNTn2mxN"}},{"cell_type":"code","source":"#@title Labeled attention plots\ndef plot_attention(attention, sentence, predicted_sentence):\n sentence = tf_lower_and_split_punct(sentence).numpy().decode().split()\n predicted_sentence = predicted_sentence.numpy().decode().split() + ['[END]']\n fig = plt.figure(figsize=(10, 10))\n ax = fig.add_subplot(1, 1, 1)\n\n attention = attention[:len(predicted_sentence), :len(sentence)]\n\n ax.matshow(attention, cmap='viridis', vmin=0.0)\n\n fontdict = {'fontsize': 14}\n\n ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)\n ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)\n\n ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n\n ax.set_xlabel('Input text')\n ax.set_ylabel('Output text')\n plt.suptitle('Attention weights')","metadata":{"id":"s5hQWlbN3jGF","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"i=0\nplot_attention(result['attention'][i], input_text[i], result['text'][i])","metadata":{"id":"rrGawQv2eiA4","outputId":"a8ce33b3-cfaf-49f9-c04f-2f6e89d90eeb","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"%%time\nthree_input_text = tf.constant([#'sabse best friend', \n #'That toh I know',\n #change_vocab('She was bhunnoing the masalas jub phone ki ghuntee bugee'),\n change_vocab('tum batao'),\n change_vocab('kya karna hai'),\n])\n\nresult = translator.tf_translate(three_input_text)\nprint(result['text'])\nfor tr in result['text']:\n print(tr.numpy().decode())\n\nprint()","metadata":{"id":"WrAM0FDomq3E","outputId":"8641124a-e81a-4aea-ff61-51425a2e0b42","execution":{"iopub.status.busy":"2023-03-16T17:46:57.391628Z","iopub.execute_input":"2023-03-16T17:46:57.392024Z","iopub.status.idle":"2023-03-16T17:46:57.421652Z","shell.execute_reply.started":"2023-03-16T17:46:57.391965Z","shell.execute_reply":"2023-03-16T17:46:57.420668Z"},"trusted":true},"execution_count":177,"outputs":[{"name":"stdout","text":"tf.Tensor([b'you know' b'is there any ?'], shape=(2,), dtype=string)\nyou know\nis there any ?\n\nCPU times: user 26.1 ms, sys: 4.04 ms, total: 30.1 ms\nWall time: 23.2 ms\n","output_type":"stream"}]},{"cell_type":"code","source":"result['text']","metadata":{"id":"-LjFp0AljOaZ","outputId":"8f3de323-4809-4d52-8092-bac9d1e1f8fc","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"i = 0\nplot_attention(result['attention'][i], three_input_text[i], result['text'][i])","metadata":{"id":"v7QwIMrG-id2","outputId":"02b9f2a4-4ea0-4b45-a294-283b21e1cec9","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"i = 1\nplot_attention(result['attention'][i], three_input_text[i], result['text'][i])","metadata":{"id":"zYVoVf8P-lr-","outputId":"59fde2f6-58e3-4cfa-e9b1-f2bba1a872a4","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"i = 2\nplot_attention(result['attention'][i], three_input_text[i], result['text'][i])","metadata":{"id":"9sFvlZBk-me4","outputId":"7e25b713-c576-4c03-8614-c55bb3c7d83e","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"long_input_text = tf.constant([inp[-1]])\n\nimport textwrap\nprint('Expected output:\\n', '\\n'.join(textwrap.wrap(targ[-1])))","metadata":{"id":"-FUHFLEvSMbG","outputId":"4ea7bcc6-e870-4959-dc40-dd0e6b1ff60a","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"result = translator.tf_translate(long_input_text)\n\ni = 0\nplot_attention(result['attention'][i], long_input_text[i], result['text'][i])\n_ = plt.suptitle('Graph')","metadata":{"id":"lDa_8NaN_RUy","outputId":"e8e9580b-e815-465d-dc0b-e42875d3399c","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tf.saved_model.save(translator, 'translator1',\n signatures={'serving_default': translator.tf_translate})","metadata":{"id":"OyvxT5V0_X5B","outputId":"ced85551-7dd8-48b7-cf12-b37055831235","execution":{"iopub.status.busy":"2023-03-16T17:47:01.584098Z","iopub.execute_input":"2023-03-16T17:47:01.584497Z","iopub.status.idle":"2023-03-16T17:47:06.183442Z","shell.execute_reply.started":"2023-03-16T17:47:01.584463Z","shell.execute_reply":"2023-03-16T17:47:06.182299Z"},"trusted":true},"execution_count":178,"outputs":[]},{"cell_type":"code","source":"!cd /kaggle/working","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:47:06.190128Z","iopub.execute_input":"2023-03-16T17:47:06.190452Z","iopub.status.idle":"2023-03-16T17:47:07.219232Z","shell.execute_reply.started":"2023-03-16T17:47:06.190421Z","shell.execute_reply":"2023-03-16T17:47:07.217784Z"},"trusted":true},"execution_count":179,"outputs":[]},{"cell_type":"code","source":"!ls","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:47:07.222283Z","iopub.execute_input":"2023-03-16T17:47:07.223150Z","iopub.status.idle":"2023-03-16T17:47:08.261595Z","shell.execute_reply.started":"2023-03-16T17:47:07.223102Z","shell.execute_reply":"2023-03-16T17:47:08.260165Z"},"trusted":true},"execution_count":180,"outputs":[{"name":"stdout","text":"__notebook_source__.ipynb translator.zip translator1\n","output_type":"stream"}]},{"cell_type":"code","source":"!zip -r translator.zip translator1","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:47:08.265384Z","iopub.execute_input":"2023-03-16T17:47:08.265863Z","iopub.status.idle":"2023-03-16T17:47:19.369936Z","shell.execute_reply.started":"2023-03-16T17:47:08.265818Z","shell.execute_reply":"2023-03-16T17:47:19.368688Z"},"trusted":true},"execution_count":181,"outputs":[{"name":"stdout","text":"updating: translator1/ (stored 0%)\nupdating: translator1/variables/ (stored 0%)\nupdating: translator1/variables/variables.index (deflated 56%)\nupdating: translator1/variables/variables.data-00000-of-00001 (deflated 7%)\nupdating: translator1/saved_model.pb (deflated 74%)\nupdating: translator1/assets/ (stored 0%)\nupdating: translator1/fingerprint.pb (stored 0%)\n","output_type":"stream"}]},{"cell_type":"code","source":"from IPython.display import FileLink\nFileLink(r'translator.zip')","metadata":{"execution":{"iopub.status.busy":"2023-03-16T17:47:19.372132Z","iopub.execute_input":"2023-03-16T17:47:19.372458Z","iopub.status.idle":"2023-03-16T17:47:19.381320Z","shell.execute_reply.started":"2023-03-16T17:47:19.372424Z","shell.execute_reply":"2023-03-16T17:47:19.380160Z"},"trusted":true},"execution_count":182,"outputs":[{"execution_count":182,"output_type":"execute_result","data":{"text/plain":"/kaggle/working/translator.zip","text/html":"translator.zip
"},"metadata":{}}]},{"cell_type":"code","source":"reloaded = tf.saved_model.load('translator1')\nresult = reloaded.tf_translate(input_text)","metadata":{"id":"-I0j3i3ekOba","execution":{"iopub.status.busy":"2023-03-16T11:27:57.379083Z","iopub.execute_input":"2023-03-16T11:27:57.379373Z","iopub.status.idle":"2023-03-16T11:28:12.032478Z","shell.execute_reply.started":"2023-03-16T11:27:57.379346Z","shell.execute_reply":"2023-03-16T11:28:12.031392Z"},"trusted":true},"execution_count":133,"outputs":[]},{"cell_type":"code","source":"%%time\nresult = reloaded.tf_translate(input_text)\n\nfor tr in result['text']:\n print(tr.numpy().decode())\n\nprint()","metadata":{"id":"GXZF__FZXJCm","outputId":"2473fa25-b3e1-460d-a647-76c1350f2c81","execution":{"iopub.status.busy":"2023-03-16T11:28:12.034087Z","iopub.execute_input":"2023-03-16T11:28:12.034457Z","iopub.status.idle":"2023-03-16T11:28:12.143606Z","shell.execute_reply.started":"2023-03-16T11:28:12.034419Z","shell.execute_reply":"2023-03-16T11:28:12.142646Z"},"trusted":true},"execution_count":134,"outputs":[{"name":"stdout","text":"where to take ?\nwhat are you doing this , whats love\ndoes your name have his sister\nhave food lost\n\nCPU times: user 127 ms, sys: 11 ms, total: 138 ms\nWall time: 104 ms\n","output_type":"stream"}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}