Create app.py
Browse files- machine_learning.py +154 -0
machine_learning.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from statistics import mode
|
2 |
+
import spacy
|
3 |
+
import re
|
4 |
+
import numpy
|
5 |
+
import tflearn
|
6 |
+
import tensorflow
|
7 |
+
|
8 |
+
|
9 |
+
def convert_txt_to_DataFrame(textFile):
|
10 |
+
with open(textFile) as f:
|
11 |
+
lines = f.readlines()
|
12 |
+
# remove all '\n' characters in all lines
|
13 |
+
lines = list(map(lambda x: x.strip('\n'), lines))
|
14 |
+
|
15 |
+
temp_dict = {}
|
16 |
+
for x in lines:
|
17 |
+
if '- -' in x:
|
18 |
+
# add new key to dict
|
19 |
+
temp_dict[x.strip('- -')] = []
|
20 |
+
|
21 |
+
for i, x in enumerate(lines):
|
22 |
+
if '- -' in x:
|
23 |
+
# '- -' = the question
|
24 |
+
# ' -' = the answers, there could be multiple answers for 1 question
|
25 |
+
# add the values(answers) to the question(key) of the dictionaries
|
26 |
+
temp_dict[x.strip('- -')].append(lines[i+1].strip(' -'))
|
27 |
+
|
28 |
+
return temp_dict
|
29 |
+
|
30 |
+
npl = spacy.load('en_core_web_md')
|
31 |
+
|
32 |
+
def convert_to_list(file):
|
33 |
+
data = convert_txt_to_DataFrame(file)
|
34 |
+
|
35 |
+
question = []
|
36 |
+
temp_bag = []
|
37 |
+
ans = []
|
38 |
+
|
39 |
+
for x in data:
|
40 |
+
ans.append(data[x])
|
41 |
+
x = re.sub(r'[^\w\s]', '', x)
|
42 |
+
temp_bag.append(x)
|
43 |
+
|
44 |
+
for x in temp_bag:
|
45 |
+
x1 = npl(x)
|
46 |
+
temp = []
|
47 |
+
for z in x1:
|
48 |
+
temp.append(z.lemma_)
|
49 |
+
question.append(temp)
|
50 |
+
|
51 |
+
return question,ans
|
52 |
+
|
53 |
+
def addQuestion(file, name):
|
54 |
+
temp_ques = {}
|
55 |
+
temp_ans = {}
|
56 |
+
|
57 |
+
ques, ans = convert_to_list(file)
|
58 |
+
|
59 |
+
temp_ques[name] = ques
|
60 |
+
temp_ans[name] = ans
|
61 |
+
|
62 |
+
return (ml_data.update(temp_ques), ans_data.update(temp_ans))
|
63 |
+
|
64 |
+
ml_data = {}
|
65 |
+
ans_data = {}
|
66 |
+
|
67 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/hello.txt', 'hello')
|
68 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/how are you.txt', 'how are you')
|
69 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/interest.txt', 'interest')
|
70 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/who are you.txt', 'who are you')
|
71 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/you a robot.txt', 'you a robot')
|
72 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/tell me about yourself.txt','tell me about yourself')
|
73 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/what language python.txt','what language python')
|
74 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/What is AI.txt', 'what is ai')
|
75 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Tell me a joke.txt','tell me a joke')
|
76 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/you are stupid.txt', 'you are stupid')
|
77 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Pollak Library.txt','pollak library')
|
78 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Where is the building.txt','where is the building')
|
79 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/hungry.txt','hungry')
|
80 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/What is your major.txt','what is your major')
|
81 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/free time.txt', 'free time')
|
82 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/I need help.txt', 'i need help')
|
83 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/your food.txt', 'your food')
|
84 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/what time.txt', 'what time')
|
85 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/weather.txt', 'weather')
|
86 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/your job.txt', 'your job')
|
87 |
+
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/old.txt', 'age')
|
88 |
+
|
89 |
+
labels = []
|
90 |
+
for x in ml_data:
|
91 |
+
labels.append(x)
|
92 |
+
|
93 |
+
labels = sorted(labels)
|
94 |
+
# labels
|
95 |
+
|
96 |
+
words = []
|
97 |
+
for x in ml_data:
|
98 |
+
for z in ml_data[x]:
|
99 |
+
words.extend(z)
|
100 |
+
|
101 |
+
words = sorted(list(set(words)))
|
102 |
+
|
103 |
+
#source: https://www.techwithtim.net/tutorials/ai-chatbot/part-2/
|
104 |
+
out_empty = [0 for _ in range(len(labels))]
|
105 |
+
training = []
|
106 |
+
output = []
|
107 |
+
|
108 |
+
for x, ques in enumerate(ml_data):
|
109 |
+
print(f"question: {ques}\n\n")
|
110 |
+
bag = []
|
111 |
+
wrds = []
|
112 |
+
|
113 |
+
for w in ml_data[ques]:
|
114 |
+
wrds.extend(w)
|
115 |
+
|
116 |
+
for w in words:
|
117 |
+
if w in wrds:
|
118 |
+
bag.append(1)
|
119 |
+
print(f"{w} = 1")
|
120 |
+
else:
|
121 |
+
bag.append(0)
|
122 |
+
# print(f"words: {w} = 0")
|
123 |
+
|
124 |
+
output_row = out_empty[:]
|
125 |
+
output_row[labels.index(ques)] = 1
|
126 |
+
print('\n', output_row)
|
127 |
+
|
128 |
+
training.append(bag)
|
129 |
+
output.append(output_row)
|
130 |
+
print(labels)
|
131 |
+
print("\n\n****\n\n")
|
132 |
+
|
133 |
+
training = numpy.array(training)
|
134 |
+
output = numpy.array(output)
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
tensorflow.compat.v1.reset_default_graph()
|
140 |
+
|
141 |
+
net = tflearn.input_data(shape=[None, len(training[0])])
|
142 |
+
net = tflearn.fully_connected(net, 8)
|
143 |
+
net = tflearn.fully_connected(net, 8)
|
144 |
+
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
|
145 |
+
net = tflearn.regression(net)
|
146 |
+
|
147 |
+
model = tflearn.DNN(net)
|
148 |
+
|
149 |
+
try:
|
150 |
+
model.load('minh103')
|
151 |
+
except:
|
152 |
+
model = tflearn.DNN(net)
|
153 |
+
model.fit(training, output, n_epoch=20000, batch_size=10, show_metric=True)
|
154 |
+
model.save('minh103')
|