{ "cells": [ { "cell_type": "code", "execution_count": 17, "id": "d5e3e67f", "metadata": {}, "outputs": [], "source": [ "from tkinter import *\n", "import pickle\n", "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras import models\n", "from tensorflow.keras.layers import Input,LSTM,Dense\n", "\n", "cv=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "40c50a8d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 18, "id": "7e54fc77", "metadata": {}, "outputs": [], "source": [ "datafile = pickle.load(open(\"training_data_translation.pkl\",\"rb\"))\n", "input_characters = datafile['input_characters']\n", "target_characters = datafile['target_characters']\n", "max_input_length = datafile['max_input_length']\n", "max_target_length = datafile['max_target_length']\n", "num_en_chars = datafile['num_en_chars']\n", "num_dec_chars = datafile['num_dec_chars']\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "ec54e3fc", "metadata": {}, "outputs": [], "source": [ "#Inference model\n", "#load the model\n", "model = models.load_model(\"model_translation\")\n", "#construct encoder model from the output of second layer\n", "#discard the encoder output and store only states.\n", "enc_outputs, state_h_enc, state_c_enc = model.layers[2].output \n", "#add input object and state from the layer.\n", "en_model = Model(model.input[0], [state_h_enc, state_c_enc])\n", "#create Input object for hidden and cell state for decoder\n", "#shape of layer with hidden or latent dimension\n", "dec_state_input_h = Input(shape=(256,))\n", "dec_state_input_c = Input(shape=(256,))\n", "dec_states_inputs = [dec_state_input_h, dec_state_input_c]\n", "#add input from the encoder output and initialize with states.\n", "dec_lstm = model.layers[3]\n", "dec_outputs, state_h_dec, state_c_dec = dec_lstm(\n", " model.input[1], initial_state=dec_states_inputs\n", ")\n", "dec_states = [state_h_dec, state_c_dec]\n", "dec_dense = model.layers[4]\n", "dec_outputs = dec_dense(dec_outputs)\n", "#create Model with the input of decoder state input and encoder input\n", "#and decoder output with the decoder states.\n", "dec_model = Model(\n", " [model.input[1]] + dec_states_inputs, [dec_outputs] + dec_states\n", ")" ] }, { "cell_type": "code", "execution_count": 20, "id": "835bebec", "metadata": {}, "outputs": [], "source": [ "def decode_sequence_translation(input_seq):\n", " #create a dictionary with a key as index and value as characters.\n", " reverse_target_char_index = dict(enumerate(target_characters))\n", " #get the states from the user input sequence\n", " states_value = en_model.predict(input_seq)\n", "\n", " #fit target characters and \n", " #initialize every first character to be 1 which is '\\t'.\n", " #Generate empty target sequence of length 1.\n", " co=cv.fit(target_characters) \n", " target_seq=np.array([co.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n", "\n", " #if the iteration reaches the end of text than it will be stop the it\n", " stop_condition = False\n", " #append every predicted character in decoded sentence\n", " decoded_sentence = \"\"\n", "\n", " while not stop_condition:\n", " #get predicted output and discard hidden and cell state.\n", " output_chars, h, c = dec_model.predict([target_seq] + states_value)\n", "\n", " #get the index and from the dictionary get the character.\n", " char_index = np.argmax(output_chars[0, -1, :])\n", " text_char = reverse_target_char_index[char_index]\n", " decoded_sentence += text_char\n", " # Exit condition: either hit max length\n", " # or find a stop character.\n", " if text_char == \"\\n\" or len(decoded_sentence) > max_target_length:\n", " stop_condition = True\n", " #update target sequence to the current character index.\n", " target_seq = np.zeros((1, 1, num_dec_chars))\n", " target_seq[0, 0, char_index] = 1.0\n", " states_value = [h, c]\n", " #return the decoded sentence\n", " return decoded_sentence\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 21, "id": "911511bb", "metadata": {}, "outputs": [], "source": [ "\n", "def bagofcharacter_translation(input_t):\n", " cv=CountVectorizer(binary=True,tokenizer=lambda txt:\n", " txt.split(),stop_words=None,analyzer='char') \n", " en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters)-1)\n", " \n", " cv_inp= cv.fit(input_characters)\n", " en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())\n", " \n", " if len(input_t)< max_input_length:\n", " for _ in range(max_input_length-len(input_t)):\n", " en_in_data[0].append(pad_en)\n", " \n", " return np.array(en_in_data,dtype=\"float32\")\n", " \n", " \n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2732c86d", "metadata": {}, "outputs": [], "source": [ "output_texts=[]\n", "sent= input( ) \n", "input_text = sent.split(' ') \n", "output_texts=\"\"\n", "\n", "en_in_data = bagofcharacter_translation( x.lower()+\".\") \n", "x=decode_sequence_translation(en_in_data)\n", "output_texts+=\" \"+ x \n", "print(output_texts)" ] }, { "cell_type": "code", "execution_count": null, "id": "7bc57d99", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }