LSTM1 / train_model.py

Upload folder using huggingface_hub

bde793d verified 3 months ago

4.44 kB

	import pandas as pd
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report, confusion_matrix
	import pickle
	import os

	import pandas as pd
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Model
	from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate
	import tensorflow.keras.backend as K
	import pickle
	import os

	# Custom Attention Layer
	class Attention(Layer):
	def __init__(self, **kwargs):
	super(Attention, self).__init__(**kwargs)

	def build(self, input_shape):
	self.W = self.add_weight(name='attention_weight',
	shape=(input_shape[-1], 1),
	initializer='random_normal',
	trainable=True)
	self.b = self.add_weight(name='attention_bias',
	shape=(input_shape[1], 1),
	initializer='zeros',
	trainable=True)
	super(Attention, self).build(input_shape)

	def call(self, x):
	e = K.tanh(K.dot(x, self.W) + self.b)
	a = K.softmax(e, axis=1)
	output = x * a
	return K.sum(output, axis=1)

	def train_advanced_model(file_path):
	print("Loading data for advanced model...")
	df = pd.read_csv(file_path)

	# Fill missing facts
	df['related_facts'] = df['related_facts'].fillna("No context provided.")

	# Advanced Preprocessing: Combine facts, question, and response
	# Structure: [FACTS] facts [SEP] [QUERY] question [SEP] [RES] response
	df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \
	" [QUERY] " + df['question'].astype(str) + \
	" [RES] " + df['engine_response'].astype(str)

	y = df['best'].astype(int).values
	X_text = df['text'].astype(str).str.lower().values

	max_words = 15000
	max_len = 300

	tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
	tokenizer.fit_on_texts(X_text)
	X_seq = tokenizer.texts_to_sequences(X_text)
	X_pad = pad_sequences(X_seq, maxlen=max_len)

	X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y)

	# Save tokenizer immediately so it's available as soon as model starts saving checkpoints
	with open('tokenizer_advanced.pickle', 'wb') as handle:
	pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
	print("Tokenizer saved.")

	# Advanced Arch: Bi-LSTM + Attention
	inputs = Input(shape=(max_len,))
	embed = Embedding(max_words, 128)(inputs)
	drop1 = SpatialDropout1D(0.3)(embed)
	lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1)
	attn = Attention()(lstm)
	dense1 = Dense(64, activation='relu')(attn)
	drop2 = Dropout(0.4)(dense1)
	outputs = Dense(1, activation='sigmoid')(drop2)

	model = Model(inputs=inputs, outputs=outputs)
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	print(model.summary())

	# Training with Checkpointing
	batch_size = 128
	epochs = 2
	class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])}

	checkpoint = tf.keras.callbacks.ModelCheckpoint(
	'chatbot_performance_advanced.h5',
	monitor='val_accuracy',
	save_best_only=True,
	mode='max',
	verbose=1
	)

	print("Training advanced model with Attention...")
	model.fit(
	X_train, y_train,
	epochs=epochs,
	batch_size=batch_size,
	validation_split=0.1,
	class_weight=class_weight,
	callbacks=[checkpoint],
	verbose=1
	)

	print("Training complete.")


	if __name__ == "__main__":
	train_advanced_model('BP_MHS_V1.csv')