|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
import pickle |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
from tensorflow.keras.models import load_model |
|
|
|
|
|
model = load_model('sherlock_model.h5') |
|
|
|
tokenizer = pickle.load(open( "tokenizer.pkl", "rb")) |
|
word_index_df = pd.read_csv("word_index.csv", header=None, index_col=0) |
|
word_index = {} |
|
for word, index in word_index_df.to_dict()[1].items(): |
|
word_index[word] = int(index) |
|
tokenizer.word_index = word_index |
|
|
|
st.title('Next Word Generator :writing_hand:') |
|
st.write("This app predicts the next word using a model trained on words from the book 'THE ADVENTURES OF SHERLOCK HOLMES'.") |
|
st.write("Write a few words and indicate how many words you want them to guess.") |
|
|
|
def next_words(seed_text, n): |
|
for _ in range(n): |
|
token_list = tokenizer.texts_to_sequences([seed_text])[0] |
|
token_list = pad_sequences([token_list], maxlen=17, padding='pre') |
|
predicted = np.argmax(model.predict(token_list), axis=-1) |
|
output_word = "" |
|
for word, index in tokenizer.word_index.items(): |
|
if index == predicted: |
|
output_word = word |
|
break |
|
seed_text += " " + output_word |
|
return seed_text |
|
|
|
|
|
text = st.text_area("Enter text", height=80) |
|
n=st.number_input("Word number", 1,100) |
|
|
|
if st.button("Predict"): |
|
sonuc=next_words(text, n) |
|
st.info(f'Prediction : {sonuc}') |