File size: 4,392 Bytes
b20f5ed
 
ad93163
 
97aa13e
943a06b
 
 
 
4af6be2
943a06b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97aa13e
943a06b
 
 
 
 
 
 
 
 
 
 
 
 
 
de6415b
943a06b
 
 
 
 
 
 
f66ad4d
 
 
 
 
943a06b
 
 
 
 
 
 
 
 
 
 
 
 
0c9ae8f
58dc75b
 
 
 
 
 
 
695dec0
 
58dc75b
695dec0
 
58dc75b
695dec0
58dc75b
695dec0
58dc75b
 
 
 
 
 
 
 
 
 
 
 
f26fbb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3653538
f66ad4d
0c9ae8f
943a06b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b042db
 
943a06b
 
 
 
 
4af6be2
943a06b
4af6be2
943a06b
017f2df
943a06b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import en_core_web_md
nlp = en_core_web_md.load()


#import spacy
import re
import numpy
import tflearn
import tensorflow
import os

def convert_txt_to_DataFrame(textFile):
    with open(textFile) as f:
        lines = f.readlines()
    # remove all '\n' characters in all lines
    lines = list(map(lambda x: x.strip('\n'), lines))
    
    temp_dict = {}
    for x in lines:
        if '- -' in x:
            # add new key to dict
            temp_dict[x.strip('- -')] = []

    for i, x in enumerate(lines):
        if '- -' in x:
            # '- -' = the question
            # ' -' = the answers, there could be multiple answers for 1 question
            # add the values(answers) to the question(key) of the dictionaries
            temp_dict[x.strip('- -')].append(lines[i+1].strip(' -'))
    
    return temp_dict

#npl = spacy.load('en_core_web_md')

def convert_to_list(file):
    data = convert_txt_to_DataFrame(file)

    question = []
    temp_bag = []
    ans = []

    for x in data:
        ans.append(data[x])
        x = re.sub(r'[^\w\s]', '', x)
        temp_bag.append(x)
            
    for x in temp_bag:
        x1 = nlp(x)       
        temp = []
        for z in x1:
            temp.append(z.lemma_)
        question.append(temp)

    return question,ans

def addQuestion(file):

    name = str(file).strip('.txt')
    # print(f"\n\n{name}\n\n")

    temp_ques = {}
    temp_ans = {}
    
    ques, ans = convert_to_list(file)
    
    temp_ques[name] = ques
    temp_ans[name] = ans
    
    return (ml_data.update(temp_ques), ans_data.update(temp_ans))

ml_data = {}
ans_data = {}


addQuestion('hello.txt')
addQuestion('how are you.txt')
addQuestion('interest.txt')
addQuestion('who are you.txt')
addQuestion('you a robot.txt')
addQuestion('tell me about yourself.txt')
addQuestion('what language python.txt')
addQuestion('What is AI.txt')
addQuestion('Tell me a joke.txt')
addQuestion('you are stupid.txt')
addQuestion('Pollak Library.txt')
addQuestion('Where is the building.txt')
addQuestion('hungry.txt')
addQuestion('What is your major.txt')
addQuestion('free time.txt')
addQuestion('I need help.txt')
addQuestion('your food.txt')
addQuestion('what time.txt')
addQuestion('weather.txt')
addQuestion('your job.txt')
addQuestion('old.txt')
addQuestion('love you.txt')
addQuestion('shut up.txt')
addQuestion('where is csuf.txt')
addQuestion('csuf mascot.txt')
addQuestion('school start.txt')
addQuestion('golden gate.txt')
addQuestion('trc.txt')
addQuestion('gwpac.txt')
addQuestion('lovelace.txt')
addQuestion('bathroom.txt')
addQuestion('starbucks.txt')
addQuestion('workout.txt')
addQuestion('tuffy.txt')
addQuestion('mccarthy.txt')
addQuestion('sgmh.txt')
addQuestion('david.txt')
addQuestion('microwave.txt')
addQuestion('arboretum.txt')
addQuestion('langdor.txt')
addQuestion('restroom.txt')
addQuestion('burger.txt')
addQuestion('tsu.txt')
addQuestion('park.txt')
addQuestion('dan.txt')



labels = []
for x in ml_data:
    labels.append(x)
    
labels = sorted(labels)
# labels

words = []
for x in ml_data:
    for z in ml_data[x]:
        words.extend(z)
        
words = sorted(list(set(words)))

#source: https://www.techwithtim.net/tutorials/ai-chatbot/part-2/
out_empty = [0 for _ in range(len(labels))]
training = []
output = []

for x, ques in enumerate(ml_data):
    print(f"question: {ques}\n\n")
    bag = []
    wrds = []
    
    for w in ml_data[ques]:
        wrds.extend(w)
    
    for w in words:
        if w in wrds:
            bag.append(1)
            print(f"{w} = 1")
        else:
            bag.append(0)
#             print(f"words: {w} = 0")

    output_row = out_empty[:]
    output_row[labels.index(ques)] = 1
    print('\n', output_row)

    training.append(bag)
    output.append(output_row)
    print(labels)
    print("\n\n****\n\n")

training = numpy.array(training)
output = numpy.array(output)




tensorflow.compat.v1.reset_default_graph()

net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
net = tflearn.regression(net)

model = tflearn.DNN(net)

if os.path.exists('minh103.meta'):
    model.load('minh103')
else:
    model = tflearn.DNN(net)
    model.fit(training, output, n_epoch=50000, batch_size=10, show_metric=True)
    model.save('minh103')