krishujeniya's picture
Upload 6 files
4824cb6 verified
import nltk
import random
import numpy as np
import json
import pickle
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
lemmatizer = WordNetLemmatizer()
# Load the intents file
with open('intents.json') as json_file:
intents = json.load(json_file)
# Initialize lists
words = []
classes = []
documents = []
ignore_words = ['?', '!']
# Process the intents
for intent in intents['intents']:
for pattern in intent['patterns']:
word_list = nltk.word_tokenize(pattern)
words.extend(word_list)
documents.append((word_list, intent['tag']))
if intent['tag'] not in classes:
classes.append(intent['tag'])
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))
training = []
output_empty = [0] * len(classes)
# Debugging: Print lengths of words and classes
print(f'Number of words: {len(words)}')
print(f'Number of classes: {len(classes)}')
for doc in documents:
bag = []
pattern_words = doc[0]
pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
for word in words:
bag.append(1) if word in pattern_words else bag.append(0)
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
training.append([bag, output_row])
# Debugging: Check for inconsistencies in training data
for i, t in enumerate(training):
if len(t[0]) != len(words):
print(f'Inconsistent length in training data at index {i}: {len(t[0])} != {len(words)}')
random.shuffle(training)
training = np.array(training, dtype=object)
# Debugging: Print shape of training data
print(f'Training data shape: {training.shape}')
train_x = list(training[:, 0])
train_y = list(training[:, 1])
# Debugging: Print shapes of train_x and train_y
print(f'Shape of train_x: {np.array(train_x).shape}')
print(f'Shape of train_y: {np.array(train_y).shape}')
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbotmodel.h5', hist)
print("Model trained and saved.")