|
import en_core_web_md |
|
nlp = en_core_web_md.load() |
|
|
|
|
|
|
|
import re |
|
import numpy |
|
import tflearn |
|
import tensorflow |
|
import os |
|
|
|
def convert_txt_to_DataFrame(textFile): |
|
with open(textFile) as f: |
|
lines = f.readlines() |
|
|
|
lines = list(map(lambda x: x.strip('\n'), lines)) |
|
|
|
temp_dict = {} |
|
for x in lines: |
|
if '- -' in x: |
|
|
|
temp_dict[x.strip('- -')] = [] |
|
|
|
for i, x in enumerate(lines): |
|
if '- -' in x: |
|
|
|
|
|
|
|
temp_dict[x.strip('- -')].append(lines[i+1].strip(' -')) |
|
|
|
return temp_dict |
|
|
|
|
|
|
|
def convert_to_list(file): |
|
data = convert_txt_to_DataFrame(file) |
|
|
|
question = [] |
|
temp_bag = [] |
|
ans = [] |
|
|
|
for x in data: |
|
ans.append(data[x]) |
|
x = re.sub(r'[^\w\s]', '', x) |
|
temp_bag.append(x) |
|
|
|
for x in temp_bag: |
|
x1 = nlp(x) |
|
temp = [] |
|
for z in x1: |
|
temp.append(z.lemma_) |
|
question.append(temp) |
|
|
|
return question,ans |
|
|
|
def addQuestion(file): |
|
|
|
name = str(file).strip('.txt') |
|
|
|
|
|
temp_ques = {} |
|
temp_ans = {} |
|
|
|
ques, ans = convert_to_list(file) |
|
|
|
temp_ques[name] = ques |
|
temp_ans[name] = ans |
|
|
|
return (ml_data.update(temp_ques), ans_data.update(temp_ans)) |
|
|
|
ml_data = {} |
|
ans_data = {} |
|
|
|
|
|
addQuestion('hello.txt') |
|
addQuestion('how are you.txt') |
|
addQuestion('interest.txt') |
|
addQuestion('who are you.txt') |
|
addQuestion('you a robot.txt') |
|
addQuestion('tell me about yourself.txt') |
|
addQuestion('what language python.txt') |
|
addQuestion('What is AI.txt') |
|
addQuestion('Tell me a joke.txt') |
|
addQuestion('you are stupid.txt') |
|
addQuestion('Pollak Library.txt') |
|
addQuestion('Where is the building.txt') |
|
addQuestion('hungry.txt') |
|
addQuestion('What is your major.txt') |
|
addQuestion('free time.txt') |
|
addQuestion('I need help.txt') |
|
addQuestion('your food.txt') |
|
addQuestion('what time.txt') |
|
addQuestion('weather.txt') |
|
addQuestion('your job.txt') |
|
addQuestion('old.txt') |
|
addQuestion('love you.txt') |
|
addQuestion('shut up.txt') |
|
addQuestion('where is csuf.txt') |
|
addQuestion('csuf mascot.txt') |
|
addQuestion('school start.txt') |
|
addQuestion('golden gate.txt') |
|
addQuestion('trc.txt') |
|
addQuestion('gwpac.txt') |
|
addQuestion('lovelace.txt') |
|
addQuestion('bathroom.txt') |
|
addQuestion('starbucks.txt') |
|
addQuestion('workout.txt') |
|
addQuestion('tuffy.txt') |
|
addQuestion('mccarthy.txt') |
|
addQuestion('sgmh.txt') |
|
addQuestion('david.txt') |
|
addQuestion('microwave.txt') |
|
addQuestion('arboretum.txt') |
|
addQuestion('langdor.txt') |
|
addQuestion('restroom.txt') |
|
addQuestion('burger.txt') |
|
addQuestion('tsu.txt') |
|
addQuestion('park.txt') |
|
addQuestion('dan.txt') |
|
|
|
|
|
|
|
labels = [] |
|
for x in ml_data: |
|
labels.append(x) |
|
|
|
labels = sorted(labels) |
|
|
|
|
|
words = [] |
|
for x in ml_data: |
|
for z in ml_data[x]: |
|
words.extend(z) |
|
|
|
words = sorted(list(set(words))) |
|
|
|
|
|
out_empty = [0 for _ in range(len(labels))] |
|
training = [] |
|
output = [] |
|
|
|
for x, ques in enumerate(ml_data): |
|
print(f"question: {ques}\n\n") |
|
bag = [] |
|
wrds = [] |
|
|
|
for w in ml_data[ques]: |
|
wrds.extend(w) |
|
|
|
for w in words: |
|
if w in wrds: |
|
bag.append(1) |
|
print(f"{w} = 1") |
|
else: |
|
bag.append(0) |
|
|
|
|
|
output_row = out_empty[:] |
|
output_row[labels.index(ques)] = 1 |
|
print('\n', output_row) |
|
|
|
training.append(bag) |
|
output.append(output_row) |
|
print(labels) |
|
print("\n\n****\n\n") |
|
|
|
training = numpy.array(training) |
|
output = numpy.array(output) |
|
|
|
|
|
|
|
|
|
tensorflow.compat.v1.reset_default_graph() |
|
|
|
net = tflearn.input_data(shape=[None, len(training[0])]) |
|
net = tflearn.fully_connected(net, 32) |
|
net = tflearn.fully_connected(net, 32) |
|
net = tflearn.fully_connected(net, len(output[0]), activation="softmax") |
|
net = tflearn.regression(net) |
|
|
|
model = tflearn.DNN(net) |
|
|
|
if os.path.exists('minh103.meta'): |
|
model.load('minh103') |
|
else: |
|
model = tflearn.DNN(net) |
|
model.fit(training, output, n_epoch=50000, batch_size=10, show_metric=True) |
|
model.save('minh103') |
|
|