Spaces:

mertbozkurt
/

basic-chatbot

Runtime error

App Files Files Community

mertbozkurt commited on Sep 29, 2022

Commit

2e7d21f

•

1 Parent(s): 3858ca8

add

Browse files

Files changed (10) hide show

chatbot.py +169 -0
chatbot_streamlit.py +28 -0
intents.json +43 -0
model/checkpoint +2 -0
model/model.tflearn.data-00000-of-00001 +0 -0
model/model.tflearn.index +0 -0
model/model.tflearn.meta +0 -0
model/training_data +0 -0
nltk.txt +1 -0
requirements.txt +6 -0

chatbot.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import nltk
+import numpy as np
+import tflearn
+import tensorflow as tf
+import random
+import json
+import nltk
+from nltk.stem.lancaster import LancasterStemmer
+nltk.download('punkt')
+stemmer = LancasterStemmer()
+# import our chat-bot intents file
+with open('intents.json') as json_data:
+    intents = json.load(json_data)
+bot_name = 'Kevin'
+words = []
+classes = []
+documents = []
+ignore_words = ['?']
+# loop through each sentence in our intents patterns
+for intent in intents['intents']:
+    for pattern in intent['patterns']:
+        # tokenize each word in the sentence
+        w = nltk.word_tokenize(pattern)
+        # add to our words list
+        words.extend(w)
+        # add to documents in our corpus
+        documents.append((w, intent['tag']))
+        # add to our classes list
+        if intent['tag'] not in classes:
+            classes.append(intent['tag'])
+# stem and lower each word and remove duplicates
+words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
+words = sorted(list(set(words)))
+# remove duplicates
+classes = sorted(list(set(classes)))
+print (len(documents), "documents")
+print (len(classes), "classes", classes)
+print (len(words), "unique stemmed words", words)
+# create our training data
+training = []
+output = []
+# create an empty array for our output
+output_empty = [0] * len(classes)
+# training set, bag of words for each sentence
+for doc in documents:
+    # initialize our bag of words
+    bag = []
+    # list of tokenized words for the pattern
+    pattern_words = doc[0]
+    # stem each word
+    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
+    # create our bag of words array
+    for w in words:
+        bag.append(1) if w in pattern_words else bag.append(0)
+    # output is a '0' for each tag and '1' for current tag
+    output_row = list(output_empty)
+    output_row[classes.index(doc[1])] = 1
+    training.append([bag, output_row])
+# shuffle our features and turn into np.array
+random.shuffle(training)
+training = np.array(training)
+# create train and test lists
+train_x = list(training[:,0])
+train_y = list(training[:,1])
+# Build neural network
+net = tflearn.input_data(shape=[None, len(train_x[0])])
+net = tflearn.fully_connected(net, 8)
+net = tflearn.fully_connected(net, 8)
+net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
+net = tflearn.regression(net)
+# Define model and setup tensorboard
+model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
+#if u need u can fit the model
+# model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
+# restore all of our data structures
+import pickle
+data = pickle.load( open( "model/training_data", "rb" ) )
+words = data['words']
+classes = data['classes']
+train_x = data['train_x']
+train_y = data['train_y']
+#we have saved model on local
+# load our saved model
+model.load('model/model.tflearn')
+def clean_up_sentence(sentence):
+    # tokenize the pattern
+    sentence_words = nltk.word_tokenize(sentence)
+    # stem each word
+    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
+    return sentence_words
+# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
+def bow(sentence, words, show_details=False):
+    # tokenize the pattern
+    sentence_words = clean_up_sentence(sentence)
+    # bag of words
+    bag = [0]*len(words)
+    for s in sentence_words:
+        for i,w in enumerate(words):
+            if w == s:
+                bag[i] = 1
+                if show_details:
+                    print ("found in bag: %s" % w)
+    return(np.array(bag))
+# create a data structure to hold user context
+context = {}
+ERROR_THRESHOLD = 0.25
+def classify(sentence):
+    # generate probabilities from the model
+    results = model.predict([bow(sentence, words)])[0]
+    # filter out predictions below a threshold
+    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
+    # sort by strength of probability
+    results.sort(key=lambda x: x[1], reverse=True)
+    return_list = []
+    for r in results:
+        return_list.append((classes[r[0]], r[1]))
+    # return tuple of intent and probability
+    return return_list
+def response(sentence, userID='123', show_details=False):
+    results = classify(sentence)
+    # if we have a classification then find the matching intent tag
+    if results:
+        # loop as long as there are matches to process
+        while results:
+            for i in intents['intents']:
+                # find a tag matching the first result
+                if i['tag'] == results[0][0]:
+                    # set context for this intent if necessary
+                    if 'context_set' in i:
+                        if show_details: print ('context:', i['context_set'])
+                        context[userID] = i['context_set']
+                    # check if this intent is contextual and applies to this user's conversation
+                    if not 'context_filter' in i or \
+                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
+                        if show_details: print ('tag:', i['tag'])
+                        # a random response from the intent
+                        return random.choice(i['responses'])
+                    #print(random.choice(i['responses']))
+            results.pop(0)

chatbot_streamlit.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import streamlit as st
+from streamlit_chat import message as st_message
+from chatbot import response, bot_name
+st.set_page_config(
+    page_title="AI- Kevin",
+    page_icon=":robot:"
+)
+if "history" not in st.session_state:
+    st.session_state.history = []
+st.title(bot_name)
+def ol():
+    user_message = st.session_state.input_text
+    res= response(user_message)
+    st.session_state.history.append({"message": user_message, "is_user": True})
+    st.session_state.history.append({"message": res, "is_user": False})
+#user_message = st.session_state.input_text
+#result = model.generate(**inputs)
+st.text_input("Ask me about AI", key="input_text", on_change=ol)
+for chat1 in st.session_state.history:
+    st_message(**chat1)  # unpacking

intents.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{"intents": [
+        {"tag": "greeting",
+         "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"],
+         "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"],
+         "context_set": ""
+        },
+        {"tag": "goodbye",
+         "patterns": ["Bye", "See you later", "Goodbye"],
+         "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."]
+        },
+        {"tag": "thanks",
+         "patterns": ["Thanks", "Thank you", "That's helpful"],
+         "responses": ["Happy to help!", "Any time!", "My pleasure"]
+        },
+        {"tag": "ai",
+         "patterns": ["What is ai?", "Do you know ai?", "Can you explain ai?" ],
+         "responses": ["Artificial intelligence (AI) refers to the simulation of human intelligence", " AI is machines that are programmed to think like humans and mimic their actions."]
+        },
+        {"tag": "overfitting",
+         "patterns": ["What is the overfitting?", "Do you know overfitting?" ],
+         "responses": ["Overfitting is occurs when a statistical model fits exactly against its training data.", " Overfitting occurs when the model has a high variance"]
+        },
+        {"tag": "machine",
+         "patterns": ["Can machines think?" ],
+         "responses": ["Yes i think so " ]
+        },
+        {"tag": "underfitting",
+         "patterns": ["What is the underfitting?", "Do you know underfitting?"],
+         "responses": ["Underfitting is data model is unable to capture the relationship between the input and output variables accurately", "When the model performs poorly on the training data you have underfitting "]
+        },
+        {"tag": "nlp",
+         "patterns": ["What is nlp ?", "Can you explain nlp ?", "Do you know nlp?" ],
+         "responses": ["Do you mean Natural language processing "],
+         "context_set": "nlp"
+        },
+        {"tag": "yes",
+         "patterns": ["yes", "sure", "absolutely"],
+         "responses": ["NLP is the ability of a computer program to understand human language as it is spoken and written", "NLP is a collective term referring to automatic computational processing of human languages"],
+         "context_filter": "nlp"
+        }
+   ]
+}

model/checkpoint ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model_checkpoint_path: "/content/model.tflearn"
2	+ all_model_checkpoint_paths: "/content/model.tflearn"

model/model.tflearn.data-00000-of-00001 ADDED Viewed

Binary file (5.13 kB). View file

model/model.tflearn.index ADDED Viewed

Binary file (887 Bytes). View file

model/model.tflearn.meta ADDED Viewed

Binary file (102 kB). View file

model/training_data ADDED Viewed

Binary file (2.91 kB). View file

nltk.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ punkt

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit ==  1.11.0
+streamlit_chat ==  0.0.2.1
+nltk ==  3.5
+numpy ==  1.23.1
+tflearn ==  0.5.0
+tensorflow-cpu==2.9.1