Spaces:

ElieMark
/

ChatBot-It

Sleeping

App Files Files Community

ElieMark commited on Jun 8

Commit

227ccf3

•

1 Parent(s): 62bbe5d

Update chat.py

Browse files

Files changed (1) hide show

chat.py +93 -93

chat.py CHANGED Viewed

@@ -1,94 +1,94 @@
-import streamlit as st
-import pickle
-import pandas as pd
-import re                                # Regular expressions to use sub function for replacing the useless text from the data
-import tensorflow as tf
-import numpy as np
-import pickle
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-def clean_text(text):
-    text = re.sub(r',', '', text)
-    text = re.sub(r'\'', '',  text)
-    text = re.sub(r'\"', '', text)
-    text = re.sub(r'\(', '', text)
-    text = re.sub(r'\)', '', text)
-    text = re.sub(r'\n', '', text)
-    text = re.sub(r'“', '', text)
-    text = re.sub(r'”', '', text)
-    text = re.sub(r'’', '', text)
-    text = re.sub(r'\.', '', text)
-    text = re.sub(r';', '', text)
-    text = re.sub(r':', '', text)
-    text = re.sub(r'\-', '', text)
-    return text
-@st.cache_data
-def loadata():
-    data = pd.read_excel("IT_Knowledge_Base_Final_FR.xlsx")
-    data.drop("questions", axis=1, inplace=True)
-    data = data.to_string()
-    lower_text= data.lower()
-    split_dataset= lower_text.splitlines()
-    final=''
-    for line in split_dataset:
-        line= clean_text(line)
-        final+='\n'+line
-    final_dataset= final.split('\n')
-    return final_dataset
-max_vocab=100000 #je veux que chaque phrase ait maximum 10k mots
-tokenizer = Tokenizer(num_words=max_vocab) #arrange la taille max de chaque phrase
-tokenizer.fit_on_texts(loadata())
-wor2idx= tokenizer.word_index #tranforme les mots en index
-input_seq=[] #transforme la sequence de mot en matrice de chiffre
-for line in loadata():
-    token_list= tokenizer.texts_to_sequences([line])[0]
-    for i in range(1, len(token_list)):
-        n_gram_seq= token_list[:i+1]
-        input_seq.append(n_gram_seq)
-max_seq_length=max(len(x) for x in input_seq)
-input_seq= np.array(pad_sequences(input_seq, maxlen=max_seq_length, padding='pre'))
-model = pickle.load(open("model2.pkl","rb"))
-def predict_words(seed, no_words=50, tokenizer=tokenizer,max_seq_length=max_seq_length):
-  for i in range(no_words):
-    token_list = tokenizer.texts_to_sequences([seed])[0]
-    token_list = pad_sequences([token_list], maxlen=max_seq_length-1, padding='pre')
-    predicted = np.argmax(model.predict(token_list), axis=1)
-    new_word = ''
-    for word, index in tokenizer.word_index.items():
-      if predicted == index:
-        new_word = word
-        break
-    seed += " " + new_word
-  return seed
-def chatbot(message):
-    response= predict_words(message)
-    return f"{response}"
-def main():
-    st.title("Chatbot ")
-    user_input = st.text_input("vous:", "")
-    if st.button("envoyer"):
-        response = chatbot(user_input)
-        st.text_area("Chatbot:", value=response, height=100)
-main()
     # pickled_model.predict(X_test)

+import streamlit as st
+import pickle
+import pandas as pd
+import re                                # Regular expressions to use sub function for replacing the useless text from the data
+import tensorflow as tf
+import numpy as np
+import pickle
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+def clean_text(text):
+    text = re.sub(r',', '', text)
+    text = re.sub(r'\'', '',  text)
+    text = re.sub(r'\"', '', text)
+    text = re.sub(r'\(', '', text)
+    text = re.sub(r'\)', '', text)
+    text = re.sub(r'\n', '', text)
+    text = re.sub(r'“', '', text)
+    text = re.sub(r'”', '', text)
+    text = re.sub(r'’', '', text)
+    text = re.sub(r'\.', '', text)
+    text = re.sub(r';', '', text)
+    text = re.sub(r':', '', text)
+    text = re.sub(r'\-', '', text)
+    return text
+@st.cache_data
+def loadata():
+    data = pd.read_excel("IT_Knowledge_Base_Final_FR.xlsx")
+    data.drop("questions", axis=1, inplace=True)
+    data = data.to_string()
+    lower_text= data.lower()
+    split_dataset= lower_text.splitlines()
+    final=''
+    for line in split_dataset:
+        line= clean_text(line)
+        final+='\n'+line
+    final_dataset= final.split('\n')
+    return final_dataset
+max_vocab=100000 #je veux que chaque phrase ait maximum 10k mots
+tokenizer = Tokenizer(num_words=max_vocab) #arrange la taille max de chaque phrase
+tokenizer.fit_on_texts(loadata())
+wor2idx= tokenizer.word_index #tranforme les mots en index
+input_seq=[] #transforme la sequence de mot en matrice de chiffre
+for line in loadata():
+    token_list= tokenizer.texts_to_sequences([line])[0]
+    for i in range(1, len(token_list)):
+        n_gram_seq= token_list[:i+1]
+        input_seq.append(n_gram_seq)
+max_seq_length=max(len(x) for x in input_seq)
+input_seq= np.array(pad_sequences(input_seq, maxlen=max_seq_length, padding='pre'))
+model = pickle.load(open("modelfinalfinal1.pkl","rb"))
+def predict_words(seed, no_words=50, tokenizer=tokenizer,max_seq_length=max_seq_length):
+  for i in range(no_words):
+    token_list = tokenizer.texts_to_sequences([seed])[0]
+    token_list = pad_sequences([token_list], maxlen=max_seq_length-1, padding='pre')
+    predicted = np.argmax(model.predict(token_list), axis=1)
+    new_word = ''
+    for word, index in tokenizer.word_index.items():
+      if predicted == index:
+        new_word = word
+        break
+    seed += " " + new_word
+  return seed
+def chatbot(message):
+    response= predict_words(message)
+    return f"{response}"
+def main():
+    st.title("Chatbot ")
+    user_input = st.text_input("vous:", "")
+    if st.button("envoyer"):
+        response = chatbot(user_input)
+        st.text_area("Chatbot:", value=response, height=100)
+main()
     # pickled_model.predict(X_test)