d-e-e-k-11
/

students

Model card Files Files and versions

students / train_rnn.py

d-e-e-k-11's picture

Upload folder using huggingface_hub

5575a8a verified 3 months ago

history blame contribute delete

3.48 kB

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import tensorflow as tf
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import MinMaxScaler
	import joblib
	import os

	# 1. Load Data
	def load_data(file_path):
	df = pd.read_csv(file_path)
	print("Dataset Head:")
	print(df.head())
	return df

	# 2. Preprocessing
	def preprocess_data(df):
	X = df[['number_courses', 'time_study']].values
	y = df['Marks'].values.reshape(-1, 1)

	# Scaling
	scaler_X = MinMaxScaler()
	scaler_y = MinMaxScaler()

	X_scaled = scaler_X.fit_transform(X)
	y_scaled = scaler_y.fit_transform(y)

	# Split
	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

	# Reshape for RNN: (samples, time_steps, features)
	# Here time_steps = 1
	X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
	X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

	return X_train, X_test, y_train, y_test, scaler_X, scaler_y

	# 3. Build Model
	def build_model(input_shape):
	model = Sequential([
	LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
	Dropout(0.2),
	LSTM(32, activation='relu'),
	Dense(16, activation='relu'),
	Dense(1) # Output for regression
	])

	model.compile(optimizer='adam', loss='mse', metrics=['mae'])
	return model

	# 4. Main Execution
	if __name__ == "__main__":
	file_path = 'Student_Marks.csv'
	if not os.path.exists(file_path):
	print(f"Error: {file_path} not found.")
	exit()

	df = load_data(file_path)
	X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_data(df)

	print(f"X_train shape: {X_train.shape}")
	print(f"y_train shape: {y_train.shape}")

	model = build_model((X_train.shape[1], X_train.shape[2]))
	model.summary()

	# Training
	print("\nStarting training...")
	history = model.fit(
	X_train, y_train,
	epochs=100,
	batch_size=8,
	validation_split=0.1,
	verbose=1
	)

	# Evaluation
	print("\nEvaluating model...")
	loss, mae = model.evaluate(X_test, y_test)
	print(f"Test Loss (MSE): {loss:.4f}")
	print(f"Test MAE: {mae:.4f}")

	# Plot History
	plt.figure(figsize=(10, 5))
	plt.plot(history.history['loss'], label='Train Loss')
	plt.plot(history.history['val_loss'], label='Val Loss')
	plt.title('Model Loss (MSE)')
	plt.xlabel('Epochs')
	plt.ylabel('Loss')
	plt.legend()
	plt.savefig('loss_plot.png')
	print("Loss plot saved as 'loss_plot.png'")

	# Predictions
	y_pred_scaled = model.predict(X_test)
	y_pred = scaler_y.inverse_transform(y_pred_scaled)
	y_actual = scaler_y.inverse_transform(y_test)

	# Compare first 5
	print("\nSample Predictions:")
	for i in range(5):
	print(f"Actual: {y_actual[i][0]:.2f}, Predicted: {y_pred[i][0]:.2f}")

	# Save Model and Scalers
	model.save('student_marks_rnn_model.h5')
	joblib.dump(scaler_X, 'scaler_X.pkl')
	joblib.dump(scaler_y, 'scaler_y.pkl')
	print("\nModel saved as 'student_marks_rnn_model.h5'")
	print("Scalers saved as 'scaler_X.pkl' and 'scaler_y.pkl'")