File size: 4,391 Bytes
d9da15c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import string
import random
import nltk
import os
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense, Dropout
from nltk.tokenize import word_tokenize

"""

nltk.download('omw-1.4')

nltk.download("stopwords", quiet=True)

nltk.download("punkt", quiet=True)

nltk.download("wordnet", quiet=True)

"""


class ModeleDeepLearning:
    def __init__(self, file_path, epochs=200):
        self.file_path = file_path
        self.epochs = epochs
        self.model = None
        self.words = []
        self.classes = []
        self.lemmatizer = WordNetLemmatizer()
        self.run()

    def importer(self):
        with open(self.file_path, encoding="utf-8") as f:
            data = json.load(f)
        return data

    def preprocess_data(self):
        doc_X = []
        doc_y = []

        for intent in self.data["intents"]:
            for pattern in intent["patterns"]:
                tokens = word_tokenize(pattern)
                self.words.extend(tokens)
                doc_X.append(pattern)
                doc_y.append(intent["tag"])

            if intent["tag"] not in self.classes:
                self.classes.append(intent["tag"])

        self.words = [self.lemmatizer.lemmatize(word.lower()) for word in self.words if word not in string.punctuation]
        self.words = sorted(set(self.words))
        self.classes = sorted(set(self.classes))

        training = []
        out_empty = [0] * len(self.classes)

        for idx, doc in enumerate(doc_X):
            bow = []
            text = self.lemmatizer.lemmatize(doc.lower())
            for word in self.words:
                bow.append(1) if word in text else bow.append(0)

            output_row = list(out_empty)
            output_row[self.classes.index(doc_y[idx])] = 1
            training.append([bow, output_row])

        random.shuffle(training)
        training = np.array(training, dtype=object)

        train_X = np.array(list(training[:, 0]))
        train_y = np.array(list(training[:, 1]))

        return train_X, train_y

    def build_model(self, input_shape, output_shape):
        model = Sequential()
        model.add(Dense(128, input_shape=input_shape, activation="relu"))
        model.add(Dropout(0.5))
        model.add(Dense(64, activation="relu"))
        model.add(Dropout(0.3))
        model.add(Dense(output_shape, activation="softmax"))
        adam = tf.keras.optimizers.Adam(learning_rate=0.01)
        model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
        return model

    def train_model(self):
        input_shape = (len(self.train_X[0]),)
        output_shape = len(self.train_y[0])

        self.model = self.build_model(input_shape, output_shape)
        self.model.fit(x=self.train_X, y=self.train_y, epochs=self.epochs, verbose=1)

    def clean_text(self, text):
        tokens = word_tokenize(text)
        tokens = [self.lemmatizer.lemmatize(word) for word in tokens]
        return tokens

    def bag_of_words(self, text):
        tokens = self.clean_text(text)
        bow = [0] * len(self.words)
        for w in tokens:
            for idx, word in enumerate(self.words):
                if word == w:
                    bow[idx] = 1
        return np.array(bow)

    def predict_class(self, text):
        bow = self.bag_of_words(text)
        result = self.model.predict(np.array([bow]))[0]
        thresh = 0.2
        y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
        y_pred.sort(key=lambda x: x[1], reverse=True)
        return_list = [self.classes[r[0]] for r in y_pred]
        return return_list

    def get_response(self, intents_list):
        tag = intents_list[0]
        list_of_intents = self.data["intents"]
        for i in list_of_intents:
            if i["tag"] == tag:
                result = random.choice(i["responses"])
                break
        return result

    def predict(self, question):
        intents = self.predict_class(question)
        return self.get_response(intents)

    def run(self):
        self.data = self.importer()
        self.train_X, self.train_y = self.preprocess_data()
        self.train_model()