|
from statistics import mode |
|
import spacy |
|
import re |
|
import numpy |
|
import tflearn |
|
import tensorflow |
|
|
|
|
|
def convert_txt_to_DataFrame(textFile): |
|
with open(textFile) as f: |
|
lines = f.readlines() |
|
|
|
lines = list(map(lambda x: x.strip('\n'), lines)) |
|
|
|
temp_dict = {} |
|
for x in lines: |
|
if '- -' in x: |
|
|
|
temp_dict[x.strip('- -')] = [] |
|
|
|
for i, x in enumerate(lines): |
|
if '- -' in x: |
|
|
|
|
|
|
|
temp_dict[x.strip('- -')].append(lines[i+1].strip(' -')) |
|
|
|
return temp_dict |
|
|
|
npl = spacy.load('en_core_web_md') |
|
|
|
def convert_to_list(file): |
|
data = convert_txt_to_DataFrame(file) |
|
|
|
question = [] |
|
temp_bag = [] |
|
ans = [] |
|
|
|
for x in data: |
|
ans.append(data[x]) |
|
x = re.sub(r'[^\w\s]', '', x) |
|
temp_bag.append(x) |
|
|
|
for x in temp_bag: |
|
x1 = npl(x) |
|
temp = [] |
|
for z in x1: |
|
temp.append(z.lemma_) |
|
question.append(temp) |
|
|
|
return question,ans |
|
|
|
def addQuestion(file, name): |
|
temp_ques = {} |
|
temp_ans = {} |
|
|
|
ques, ans = convert_to_list(file) |
|
|
|
temp_ques[name] = ques |
|
temp_ans[name] = ans |
|
|
|
return (ml_data.update(temp_ques), ans_data.update(temp_ans)) |
|
|
|
ml_data = {} |
|
ans_data = {} |
|
|
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/hello.txt', 'hello') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/how are you.txt', 'how are you') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/interest.txt', 'interest') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/who are you.txt', 'who are you') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/you a robot.txt', 'you a robot') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/tell me about yourself.txt','tell me about yourself') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/what language python.txt','what language python') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/What is AI.txt', 'what is ai') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Tell me a joke.txt','tell me a joke') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/you are stupid.txt', 'you are stupid') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Pollak Library.txt','pollak library') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/Where is the building.txt','where is the building') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/hungry.txt','hungry') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/What is your major.txt','what is your major') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/free time.txt', 'free time') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/I need help.txt', 'i need help') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/your food.txt', 'your food') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/what time.txt', 'what time') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/weather.txt', 'weather') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/your job.txt', 'your job') |
|
addQuestion('/opt/homebrew/anaconda3/envs/tensorflow/Chatbot/question files/old.txt', 'age') |
|
|
|
labels = [] |
|
for x in ml_data: |
|
labels.append(x) |
|
|
|
labels = sorted(labels) |
|
|
|
|
|
words = [] |
|
for x in ml_data: |
|
for z in ml_data[x]: |
|
words.extend(z) |
|
|
|
words = sorted(list(set(words))) |
|
|
|
|
|
out_empty = [0 for _ in range(len(labels))] |
|
training = [] |
|
output = [] |
|
|
|
for x, ques in enumerate(ml_data): |
|
print(f"question: {ques}\n\n") |
|
bag = [] |
|
wrds = [] |
|
|
|
for w in ml_data[ques]: |
|
wrds.extend(w) |
|
|
|
for w in words: |
|
if w in wrds: |
|
bag.append(1) |
|
print(f"{w} = 1") |
|
else: |
|
bag.append(0) |
|
|
|
|
|
output_row = out_empty[:] |
|
output_row[labels.index(ques)] = 1 |
|
print('\n', output_row) |
|
|
|
training.append(bag) |
|
output.append(output_row) |
|
print(labels) |
|
print("\n\n****\n\n") |
|
|
|
training = numpy.array(training) |
|
output = numpy.array(output) |
|
|
|
|
|
|
|
|
|
tensorflow.compat.v1.reset_default_graph() |
|
|
|
net = tflearn.input_data(shape=[None, len(training[0])]) |
|
net = tflearn.fully_connected(net, 8) |
|
net = tflearn.fully_connected(net, 8) |
|
net = tflearn.fully_connected(net, len(output[0]), activation="softmax") |
|
net = tflearn.regression(net) |
|
|
|
model = tflearn.DNN(net) |
|
|
|
try: |
|
model.load('minh103') |
|
except: |
|
model = tflearn.DNN(net) |
|
model.fit(training, output, n_epoch=20000, batch_size=10, show_metric=True) |
|
model.save('minh103') |
|
|