import en_core_web_md
nlp = en_core_web_md.load()


#import spacy
import re
import numpy
import tflearn
import tensorflow
import os

def convert_txt_to_DataFrame(textFile):
    with open(textFile) as f:
        lines = f.readlines()
    # remove all '\n' characters in all lines
    lines = list(map(lambda x: x.strip('\n'), lines))
    
    temp_dict = {}
    for x in lines:
        if '- -' in x:
            # add new key to dict
            temp_dict[x.strip('- -')] = []

    for i, x in enumerate(lines):
        if '- -' in x:
            # '- -' = the question
            # ' -' = the answers, there could be multiple answers for 1 question
            # add the values(answers) to the question(key) of the dictionaries
            temp_dict[x.strip('- -')].append(lines[i+1].strip(' -'))
    
    return temp_dict

#npl = spacy.load('en_core_web_md')

def convert_to_list(file):
    data = convert_txt_to_DataFrame(file)

    question = []
    temp_bag = []
    ans = []

    for x in data:
        ans.append(data[x])
        x = re.sub(r'[^\w\s]', '', x)
        temp_bag.append(x)
            
    for x in temp_bag:
        x1 = nlp(x)       
        temp = []
        for z in x1:
            temp.append(z.lemma_)
        question.append(temp)

    return question,ans

def addQuestion(file):

    name = str(file).strip('.txt')
    # print(f"\n\n{name}\n\n")

    temp_ques = {}
    temp_ans = {}
    
    ques, ans = convert_to_list(file)
    
    temp_ques[name] = ques
    temp_ans[name] = ans
    
    return (ml_data.update(temp_ques), ans_data.update(temp_ans))

ml_data = {}
ans_data = {}


addQuestion('hello.txt')
addQuestion('how are you.txt')
addQuestion('interest.txt')


labels = []
for x in ml_data:
    labels.append(x)
    
labels = sorted(labels)
# labels

words = []
for x in ml_data:
    for z in ml_data[x]:
        words.extend(z)
        
words = sorted(list(set(words)))

#source: https://www.techwithtim.net/tutorials/ai-chatbot/part-2/
out_empty = [0 for _ in range(len(labels))]
training = []
output = []

for x, ques in enumerate(ml_data):
    print(f"question: {ques}\n\n")
    bag = []
    wrds = []
    
    for w in ml_data[ques]:
        wrds.extend(w)
    
    for w in words:
        if w in wrds:
            bag.append(1)
            print(f"{w} = 1")
        else:
            bag.append(0)
#             print(f"words: {w} = 0")

    output_row = out_empty[:]
    output_row[labels.index(ques)] = 1
    print('\n', output_row)

    training.append(bag)
    output.append(output_row)
    print(labels)
    print("\n\n****\n\n")

training = numpy.array(training)
output = numpy.array(output)


tensorflow.compat.v1.reset_default_graph()

net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
net = tflearn.regression(net)

model = tflearn.DNN(net)

if os.path.exists('minh103.meta'):
    model.load('minh103')
else:
    model = tflearn.DNN(net)
    model.fit(training, output, n_epoch=20000, batch_size=10, show_metric=True)
    model.save('minh103')