mertbozkurt commited on
Commit
2e7d21f
1 Parent(s): 3858ca8
chatbot.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import numpy as np
3
+ import tflearn
4
+ import tensorflow as tf
5
+ import random
6
+ import json
7
+ import nltk
8
+ from nltk.stem.lancaster import LancasterStemmer
9
+ nltk.download('punkt')
10
+ stemmer = LancasterStemmer()
11
+
12
+ # import our chat-bot intents file
13
+ with open('intents.json') as json_data:
14
+ intents = json.load(json_data)
15
+
16
+ bot_name = 'Kevin'
17
+ words = []
18
+ classes = []
19
+ documents = []
20
+ ignore_words = ['?']
21
+
22
+ # loop through each sentence in our intents patterns
23
+ for intent in intents['intents']:
24
+ for pattern in intent['patterns']:
25
+ # tokenize each word in the sentence
26
+ w = nltk.word_tokenize(pattern)
27
+ # add to our words list
28
+ words.extend(w)
29
+ # add to documents in our corpus
30
+ documents.append((w, intent['tag']))
31
+ # add to our classes list
32
+ if intent['tag'] not in classes:
33
+ classes.append(intent['tag'])
34
+
35
+ # stem and lower each word and remove duplicates
36
+ words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
37
+ words = sorted(list(set(words)))
38
+
39
+ # remove duplicates
40
+ classes = sorted(list(set(classes)))
41
+
42
+ print (len(documents), "documents")
43
+ print (len(classes), "classes", classes)
44
+ print (len(words), "unique stemmed words", words)
45
+ # create our training data
46
+ training = []
47
+ output = []
48
+ # create an empty array for our output
49
+ output_empty = [0] * len(classes)
50
+
51
+ # training set, bag of words for each sentence
52
+ for doc in documents:
53
+ # initialize our bag of words
54
+ bag = []
55
+ # list of tokenized words for the pattern
56
+ pattern_words = doc[0]
57
+ # stem each word
58
+ pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
59
+ # create our bag of words array
60
+ for w in words:
61
+ bag.append(1) if w in pattern_words else bag.append(0)
62
+
63
+ # output is a '0' for each tag and '1' for current tag
64
+ output_row = list(output_empty)
65
+ output_row[classes.index(doc[1])] = 1
66
+
67
+ training.append([bag, output_row])
68
+
69
+ # shuffle our features and turn into np.array
70
+ random.shuffle(training)
71
+ training = np.array(training)
72
+
73
+ # create train and test lists
74
+ train_x = list(training[:,0])
75
+ train_y = list(training[:,1])
76
+
77
+ # Build neural network
78
+ net = tflearn.input_data(shape=[None, len(train_x[0])])
79
+ net = tflearn.fully_connected(net, 8)
80
+ net = tflearn.fully_connected(net, 8)
81
+ net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
82
+ net = tflearn.regression(net)
83
+
84
+ # Define model and setup tensorboard
85
+ model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
86
+
87
+ #if u need u can fit the model
88
+ # model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
89
+
90
+
91
+ # restore all of our data structures
92
+ import pickle
93
+ data = pickle.load( open( "model/training_data", "rb" ) )
94
+ words = data['words']
95
+ classes = data['classes']
96
+ train_x = data['train_x']
97
+ train_y = data['train_y']
98
+
99
+
100
+
101
+ #we have saved model on local
102
+ # load our saved model
103
+ model.load('model/model.tflearn')
104
+ def clean_up_sentence(sentence):
105
+ # tokenize the pattern
106
+ sentence_words = nltk.word_tokenize(sentence)
107
+ # stem each word
108
+ sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
109
+ return sentence_words
110
+
111
+ # return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
112
+ def bow(sentence, words, show_details=False):
113
+ # tokenize the pattern
114
+ sentence_words = clean_up_sentence(sentence)
115
+ # bag of words
116
+ bag = [0]*len(words)
117
+ for s in sentence_words:
118
+ for i,w in enumerate(words):
119
+ if w == s:
120
+ bag[i] = 1
121
+ if show_details:
122
+ print ("found in bag: %s" % w)
123
+
124
+ return(np.array(bag))
125
+
126
+ # create a data structure to hold user context
127
+ context = {}
128
+
129
+ ERROR_THRESHOLD = 0.25
130
+ def classify(sentence):
131
+ # generate probabilities from the model
132
+ results = model.predict([bow(sentence, words)])[0]
133
+ # filter out predictions below a threshold
134
+ results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
135
+ # sort by strength of probability
136
+ results.sort(key=lambda x: x[1], reverse=True)
137
+ return_list = []
138
+ for r in results:
139
+ return_list.append((classes[r[0]], r[1]))
140
+ # return tuple of intent and probability
141
+ return return_list
142
+
143
+
144
+
145
+ def response(sentence, userID='123', show_details=False):
146
+ results = classify(sentence)
147
+ # if we have a classification then find the matching intent tag
148
+ if results:
149
+ # loop as long as there are matches to process
150
+ while results:
151
+ for i in intents['intents']:
152
+ # find a tag matching the first result
153
+ if i['tag'] == results[0][0]:
154
+ # set context for this intent if necessary
155
+ if 'context_set' in i:
156
+ if show_details: print ('context:', i['context_set'])
157
+ context[userID] = i['context_set']
158
+
159
+ # check if this intent is contextual and applies to this user's conversation
160
+ if not 'context_filter' in i or \
161
+ (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
162
+ if show_details: print ('tag:', i['tag'])
163
+ # a random response from the intent
164
+ return random.choice(i['responses'])
165
+ #print(random.choice(i['responses']))
166
+
167
+ results.pop(0)
168
+
169
+
chatbot_streamlit.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message as st_message
3
+ from chatbot import response, bot_name
4
+
5
+
6
+ st.set_page_config(
7
+ page_title="AI- Kevin",
8
+ page_icon=":robot:"
9
+ )
10
+
11
+ if "history" not in st.session_state:
12
+ st.session_state.history = []
13
+
14
+ st.title(bot_name)
15
+
16
+ def ol():
17
+ user_message = st.session_state.input_text
18
+ res= response(user_message)
19
+ st.session_state.history.append({"message": user_message, "is_user": True})
20
+ st.session_state.history.append({"message": res, "is_user": False})
21
+
22
+ #user_message = st.session_state.input_text
23
+ #result = model.generate(**inputs)
24
+
25
+ st.text_input("Ask me about AI", key="input_text", on_change=ol)
26
+
27
+ for chat1 in st.session_state.history:
28
+ st_message(**chat1) # unpacking
intents.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"intents": [
2
+ {"tag": "greeting",
3
+ "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"],
4
+ "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"],
5
+ "context_set": ""
6
+ },
7
+ {"tag": "goodbye",
8
+ "patterns": ["Bye", "See you later", "Goodbye"],
9
+ "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."]
10
+ },
11
+ {"tag": "thanks",
12
+ "patterns": ["Thanks", "Thank you", "That's helpful"],
13
+ "responses": ["Happy to help!", "Any time!", "My pleasure"]
14
+ },
15
+ {"tag": "ai",
16
+ "patterns": ["What is ai?", "Do you know ai?", "Can you explain ai?" ],
17
+ "responses": ["Artificial intelligence (AI) refers to the simulation of human intelligence", " AI is machines that are programmed to think like humans and mimic their actions."]
18
+ },
19
+ {"tag": "overfitting",
20
+ "patterns": ["What is the overfitting?", "Do you know overfitting?" ],
21
+ "responses": ["Overfitting is occurs when a statistical model fits exactly against its training data.", " Overfitting occurs when the model has a high variance"]
22
+ },
23
+ {"tag": "machine",
24
+ "patterns": ["Can machines think?" ],
25
+ "responses": ["Yes i think so " ]
26
+ },
27
+ {"tag": "underfitting",
28
+ "patterns": ["What is the underfitting?", "Do you know underfitting?"],
29
+ "responses": ["Underfitting is data model is unable to capture the relationship between the input and output variables accurately", "When the model performs poorly on the training data you have underfitting "]
30
+ },
31
+ {"tag": "nlp",
32
+ "patterns": ["What is nlp ?", "Can you explain nlp ?", "Do you know nlp?" ],
33
+ "responses": ["Do you mean Natural language processing "],
34
+ "context_set": "nlp"
35
+ },
36
+ {"tag": "yes",
37
+ "patterns": ["yes", "sure", "absolutely"],
38
+ "responses": ["NLP is the ability of a computer program to understand human language as it is spoken and written", "NLP is a collective term referring to automatic computational processing of human languages"],
39
+ "context_filter": "nlp"
40
+ }
41
+ ]
42
+ }
43
+
model/checkpoint ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_checkpoint_path: "/content/model.tflearn"
2
+ all_model_checkpoint_paths: "/content/model.tflearn"
model/model.tflearn.data-00000-of-00001 ADDED
Binary file (5.13 kB). View file
 
model/model.tflearn.index ADDED
Binary file (887 Bytes). View file
 
model/model.tflearn.meta ADDED
Binary file (102 kB). View file
 
model/training_data ADDED
Binary file (2.91 kB). View file
 
nltk.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ punkt
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit == 1.11.0
2
+ streamlit_chat == 0.0.2.1
3
+ nltk == 3.5
4
+ numpy == 1.23.1
5
+ tflearn == 0.5.0
6
+ tensorflow-cpu==2.9.1