NLP_FULL_APP / pages /7_NEXT_WORD_PREDICTOR.py
Sudhanshu976's picture
14
dfd5098
import streamlit as st
import numpy as np
st.set_page_config(
page_title="NLP WEB APP"
)
st.title("NEXT WORD PREDICTOR")
st.sidebar.success("Select a page above")
string1 = st.text_area("Enter the training text (Note : This may take time depending upon the data size )")
test = st.text_input("ENTER THE WORD")
number = st.number_input("Enter the number of next words" )
number = int(number)
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense,LSTM
if st.button("PREDICT"):
tokenizer = Tokenizer()
tokenizer.fit_on_texts([string1])
input_sequences =[]
for sentence in string1.split("\n"):
tokenized_sentences = tokenizer.texts_to_sequences([sentence])[0]
for i in range(1,len(tokenized_sentences)):
input_sequences.append(tokenized_sentences[:i+1])
max_len = max([len(x) for x in input_sequences])
use = max_len-1
padded_input_sentences = pad_sequences(input_sequences , maxlen = max_len , padding ="pre")
X = padded_input_sentences[:,:-1]
Y = padded_input_sentences[:,-1]
num_class = len(tokenizer.word_index)
Y = to_categorical(Y , num_classes=num_class+1)
model = Sequential()
model.add(Embedding(num_class+1,100,input_length = None))
model.add(LSTM(250))
model.add(Dense(num_class+1,activation ="softmax"))
model.compile(loss="categorical_crossentropy" , optimizer="adam" , metrics=["accuracy"])
model.fit(X,Y,epochs=100)
for i in range(number):
output_token = tokenizer.texts_to_sequences([test])[0]
padded_token = pad_sequences([output_token] , maxlen=max_len, padding="pre")
output = np.argmax(model.predict(padded_token))
for word,index in tokenizer.word_index.items():
if index == output:
test =test + " " + word
st.header(test)