abdullatifkaban
commited on
Commit
•
0178c73
1
Parent(s):
2584832
Upload 5 files
Browse files- app.py +42 -0
- requirements.txt +1 -0
- sherlock_model.h5 +3 -0
- tokenizer.pkl +3 -0
- word_index.csv +0 -0
app.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
import pickle
|
6 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
7 |
+
from tensorflow.keras.models import load_model
|
8 |
+
|
9 |
+
# Modeli yükle
|
10 |
+
model = load_model('sherlock_model.h5')
|
11 |
+
|
12 |
+
tokenizer = pickle.load(open( "tokenizer.pkl", "rb"))
|
13 |
+
word_index_df = pd.read_csv("word_index.csv", header=None, index_col=0)
|
14 |
+
word_index = {}
|
15 |
+
for word, index in word_index_df.to_dict()[1].items():
|
16 |
+
word_index[word] = int(index)
|
17 |
+
tokenizer.word_index = word_index
|
18 |
+
|
19 |
+
st.title('Next Word Generator :writing_hand:')
|
20 |
+
st.write("This app predicts the next word using a model trained on words from the book 'THE ADVENTURES OF SHERLOCK HOLMES'.")
|
21 |
+
st.write("Write a few words and indicate how many words you want them to guess.")
|
22 |
+
|
23 |
+
def next_words(seed_text, n):
|
24 |
+
for _ in range(n):
|
25 |
+
token_list = tokenizer.texts_to_sequences([seed_text])[0]
|
26 |
+
token_list = pad_sequences([token_list], maxlen=17, padding='pre')
|
27 |
+
predicted = np.argmax(model.predict(token_list), axis=-1)
|
28 |
+
output_word = ""
|
29 |
+
for word, index in tokenizer.word_index.items():
|
30 |
+
if index == predicted:
|
31 |
+
output_word = word
|
32 |
+
break
|
33 |
+
seed_text += " " + output_word
|
34 |
+
return seed_text
|
35 |
+
|
36 |
+
# Giriş metnini al
|
37 |
+
text = st.text_area("Enter text", height=80)
|
38 |
+
n=st.number_input("Word number", 1,100)
|
39 |
+
|
40 |
+
if st.button("Predict"):
|
41 |
+
sonuc=next_words(text, n)
|
42 |
+
st.info(f'Prediction : {sonuc}')
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
streamlit
|
sherlock_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db7fbd6be47b84e0b0abab9042ca002581461f9b96786f9cda32aac2c0fce40e
|
3 |
+
size 26528400
|
tokenizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:805a9e499dbc4a3997080ac38b0c57036ce65192089cdce5190009be316a8f85
|
3 |
+
size 324679
|
word_index.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|