Spaces:

SURESHBEEKHANI
/

StudentExamPerformancePrediction

Running

App Files Files Community

StudentExamPerformancePrediction / src /components /model_trainer.py

SURESHBEEKHANI

Upload 15 files

72475ed verified 10 months ago

raw

history blame contribute delete

4.46 kB

	import os
	import sys
	from dataclasses import dataclass

	from catboost import CatBoostRegressor
	from sklearn.ensemble import (
	AdaBoostRegressor,
	GradientBoostingRegressor,
	RandomForestRegressor,
	)
	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import r2_score
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.tree import DecisionTreeRegressor
	from xgboost import XGBRegressor

	from src.exception import CustomException
	from src.logger import logging

	from src.utils import save_object,evaluate_models

	@dataclass
	class ModelTrainerConfig:
	trained_model_file_path=os.path.join("artifacts","model.pkl")

	class ModelTrainer:
	def __init__(self):
	self.model_trainer_config=ModelTrainerConfig()


	def initiate_model_trainer(self,train_array,test_array):
	try:
	logging.info("Split training and test input data")
	X_train,y_train,X_test,y_test=(
	train_array[:,:-1],
	train_array[:,-1],
	test_array[:,:-1],
	test_array[:,-1]
	)
	models = {
	"Random Forest": RandomForestRegressor(),
	"Decision Tree": DecisionTreeRegressor(),
	"Gradient Boosting": GradientBoostingRegressor(),
	"Linear Regression": LinearRegression(),
	"XGBRegressor": XGBRegressor(),
	"CatBoosting Regressor": CatBoostRegressor(verbose=False),
	"AdaBoost Regressor": AdaBoostRegressor(),
	}
	params={
	"Decision Tree": {
	'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
	# 'splitter':['best','random'],
	# 'max_features':['sqrt','log2'],
	},
	"Random Forest":{
	# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],

	# 'max_features':['sqrt','log2',None],
	'n_estimators': [8,16,32,64,128,256]
	},
	"Gradient Boosting":{
	# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
	'learning_rate':[.1,.01,.05,.001],
	'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
	# 'criterion':['squared_error', 'friedman_mse'],
	# 'max_features':['auto','sqrt','log2'],
	'n_estimators': [8,16,32,64,128,256]
	},
	"Linear Regression":{},
	"XGBRegressor":{
	'learning_rate':[.1,.01,.05,.001],
	'n_estimators': [8,16,32,64,128,256]
	},
	"CatBoosting Regressor":{
	'depth': [6,8,10],
	'learning_rate': [0.01, 0.05, 0.1],
	'iterations': [30, 50, 100]
	},
	"AdaBoost Regressor":{
	'learning_rate':[.1,.01,0.5,.001],
	# 'loss':['linear','square','exponential'],
	'n_estimators': [8,16,32,64,128,256]
	}

	}

	model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,
	models=models,param=params)

	## To get best model score from dict
	best_model_score = max(sorted(model_report.values()))

	## To get best model name from dict

	best_model_name = list(model_report.keys())[
	list(model_report.values()).index(best_model_score)
	]
	best_model = models[best_model_name]

	if best_model_score<0.6:
	raise CustomException("No best model found")
	logging.info(f"Best found model on both training and testing dataset")

	save_object(
	file_path=self.model_trainer_config.trained_model_file_path,
	obj=best_model
	)

	predicted=best_model.predict(X_test)

	r2_square = r2_score(y_test, predicted)
	return r2_square





	except Exception as e:
	raise CustomException(e,sys)