abdullatifkaban commited on
Commit
0178c73
1 Parent(s): 2584832

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +42 -0
  2. requirements.txt +1 -0
  3. sherlock_model.h5 +3 -0
  4. tokenizer.pkl +3 -0
  5. word_index.csv +0 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ import pickle
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ from tensorflow.keras.models import load_model
8
+
9
+ # Modeli yükle
10
+ model = load_model('sherlock_model.h5')
11
+
12
+ tokenizer = pickle.load(open( "tokenizer.pkl", "rb"))
13
+ word_index_df = pd.read_csv("word_index.csv", header=None, index_col=0)
14
+ word_index = {}
15
+ for word, index in word_index_df.to_dict()[1].items():
16
+ word_index[word] = int(index)
17
+ tokenizer.word_index = word_index
18
+
19
+ st.title('Next Word Generator :writing_hand:')
20
+ st.write("This app predicts the next word using a model trained on words from the book 'THE ADVENTURES OF SHERLOCK HOLMES'.")
21
+ st.write("Write a few words and indicate how many words you want them to guess.")
22
+
23
+ def next_words(seed_text, n):
24
+ for _ in range(n):
25
+ token_list = tokenizer.texts_to_sequences([seed_text])[0]
26
+ token_list = pad_sequences([token_list], maxlen=17, padding='pre')
27
+ predicted = np.argmax(model.predict(token_list), axis=-1)
28
+ output_word = ""
29
+ for word, index in tokenizer.word_index.items():
30
+ if index == predicted:
31
+ output_word = word
32
+ break
33
+ seed_text += " " + output_word
34
+ return seed_text
35
+
36
+ # Giriş metnini al
37
+ text = st.text_area("Enter text", height=80)
38
+ n=st.number_input("Word number", 1,100)
39
+
40
+ if st.button("Predict"):
41
+ sonuc=next_words(text, n)
42
+ st.info(f'Prediction : {sonuc}')
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ streamlit
sherlock_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7fbd6be47b84e0b0abab9042ca002581461f9b96786f9cda32aac2c0fce40e
3
+ size 26528400
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805a9e499dbc4a3997080ac38b0c57036ce65192089cdce5190009be316a8f85
3
+ size 324679
word_index.csv ADDED
The diff for this file is too large to render. See raw diff