| import pandas as pd
|
| import numpy as np
|
| import tensorflow as tf
|
| from tensorflow.keras.preprocessing.text import Tokenizer
|
| from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| from tensorflow.keras.models import Sequential
|
| from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.metrics import classification_report, confusion_matrix
|
| import pickle
|
| import os
|
|
|
| import pandas as pd
|
| import numpy as np
|
| import tensorflow as tf
|
| from tensorflow.keras.preprocessing.text import Tokenizer
|
| from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| from tensorflow.keras.models import Model
|
| from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate
|
| import tensorflow.keras.backend as K
|
| import pickle
|
| import os
|
|
|
|
|
| class Attention(Layer):
|
| def __init__(self, **kwargs):
|
| super(Attention, self).__init__(**kwargs)
|
|
|
| def build(self, input_shape):
|
| self.W = self.add_weight(name='attention_weight',
|
| shape=(input_shape[-1], 1),
|
| initializer='random_normal',
|
| trainable=True)
|
| self.b = self.add_weight(name='attention_bias',
|
| shape=(input_shape[1], 1),
|
| initializer='zeros',
|
| trainable=True)
|
| super(Attention, self).build(input_shape)
|
|
|
| def call(self, x):
|
| e = K.tanh(K.dot(x, self.W) + self.b)
|
| a = K.softmax(e, axis=1)
|
| output = x * a
|
| return K.sum(output, axis=1)
|
|
|
| def train_advanced_model(file_path):
|
| print("Loading data for advanced model...")
|
| df = pd.read_csv(file_path)
|
|
|
|
|
| df['related_facts'] = df['related_facts'].fillna("No context provided.")
|
|
|
|
|
|
|
| df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \
|
| " [QUERY] " + df['question'].astype(str) + \
|
| " [RES] " + df['engine_response'].astype(str)
|
|
|
| y = df['best'].astype(int).values
|
| X_text = df['text'].astype(str).str.lower().values
|
|
|
| max_words = 15000
|
| max_len = 300
|
|
|
| tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
|
| tokenizer.fit_on_texts(X_text)
|
| X_seq = tokenizer.texts_to_sequences(X_text)
|
| X_pad = pad_sequences(X_seq, maxlen=max_len)
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y)
|
|
|
|
|
| with open('tokenizer_advanced.pickle', 'wb') as handle:
|
| pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
| print("Tokenizer saved.")
|
|
|
|
|
| inputs = Input(shape=(max_len,))
|
| embed = Embedding(max_words, 128)(inputs)
|
| drop1 = SpatialDropout1D(0.3)(embed)
|
| lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1)
|
| attn = Attention()(lstm)
|
| dense1 = Dense(64, activation='relu')(attn)
|
| drop2 = Dropout(0.4)(dense1)
|
| outputs = Dense(1, activation='sigmoid')(drop2)
|
|
|
| model = Model(inputs=inputs, outputs=outputs)
|
| model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
|
| print(model.summary())
|
|
|
|
|
| batch_size = 128
|
| epochs = 2
|
| class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])}
|
|
|
| checkpoint = tf.keras.callbacks.ModelCheckpoint(
|
| 'chatbot_performance_advanced.h5',
|
| monitor='val_accuracy',
|
| save_best_only=True,
|
| mode='max',
|
| verbose=1
|
| )
|
|
|
| print("Training advanced model with Attention...")
|
| model.fit(
|
| X_train, y_train,
|
| epochs=epochs,
|
| batch_size=batch_size,
|
| validation_split=0.1,
|
| class_weight=class_weight,
|
| callbacks=[checkpoint],
|
| verbose=1
|
| )
|
|
|
| print("Training complete.")
|
|
|
|
|
| if __name__ == "__main__":
|
| train_advanced_model('BP_MHS_V1.csv')
|
|
|