File size: 4,391 Bytes
d9da15c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import json
import string
import random
import nltk
import os
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense, Dropout
from nltk.tokenize import word_tokenize
"""
nltk.download('omw-1.4')
nltk.download("stopwords", quiet=True)
nltk.download("punkt", quiet=True)
nltk.download("wordnet", quiet=True)
"""
class ModeleDeepLearning:
def __init__(self, file_path, epochs=200):
self.file_path = file_path
self.epochs = epochs
self.model = None
self.words = []
self.classes = []
self.lemmatizer = WordNetLemmatizer()
self.run()
def importer(self):
with open(self.file_path, encoding="utf-8") as f:
data = json.load(f)
return data
def preprocess_data(self):
doc_X = []
doc_y = []
for intent in self.data["intents"]:
for pattern in intent["patterns"]:
tokens = word_tokenize(pattern)
self.words.extend(tokens)
doc_X.append(pattern)
doc_y.append(intent["tag"])
if intent["tag"] not in self.classes:
self.classes.append(intent["tag"])
self.words = [self.lemmatizer.lemmatize(word.lower()) for word in self.words if word not in string.punctuation]
self.words = sorted(set(self.words))
self.classes = sorted(set(self.classes))
training = []
out_empty = [0] * len(self.classes)
for idx, doc in enumerate(doc_X):
bow = []
text = self.lemmatizer.lemmatize(doc.lower())
for word in self.words:
bow.append(1) if word in text else bow.append(0)
output_row = list(out_empty)
output_row[self.classes.index(doc_y[idx])] = 1
training.append([bow, output_row])
random.shuffle(training)
training = np.array(training, dtype=object)
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))
return train_X, train_y
def build_model(self, input_shape, output_shape):
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation="softmax"))
adam = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
return model
def train_model(self):
input_shape = (len(self.train_X[0]),)
output_shape = len(self.train_y[0])
self.model = self.build_model(input_shape, output_shape)
self.model.fit(x=self.train_X, y=self.train_y, epochs=self.epochs, verbose=1)
def clean_text(self, text):
tokens = word_tokenize(text)
tokens = [self.lemmatizer.lemmatize(word) for word in tokens]
return tokens
def bag_of_words(self, text):
tokens = self.clean_text(text)
bow = [0] * len(self.words)
for w in tokens:
for idx, word in enumerate(self.words):
if word == w:
bow[idx] = 1
return np.array(bow)
def predict_class(self, text):
bow = self.bag_of_words(text)
result = self.model.predict(np.array([bow]))[0]
thresh = 0.2
y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
y_pred.sort(key=lambda x: x[1], reverse=True)
return_list = [self.classes[r[0]] for r in y_pred]
return return_list
def get_response(self, intents_list):
tag = intents_list[0]
list_of_intents = self.data["intents"]
for i in list_of_intents:
if i["tag"] == tag:
result = random.choice(i["responses"])
break
return result
def predict(self, question):
intents = self.predict_class(question)
return self.get_response(intents)
def run(self):
self.data = self.importer()
self.train_X, self.train_y = self.preprocess_data()
self.train_model()
|