Spaces:

rahul2001
/

student_performance

Sleeping

App Files Files Community

student_performance / src /Components /model_tranier.py

rahul2001

predict pipeline

967f65c almost 2 years ago

raw

history blame contribute delete

4.43 kB

	# Basic Import
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import os
	# Modelling
	from sklearn.metrics import mean_squared_error, r2_score
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor
	from sklearn.svm import SVR
	from sklearn.linear_model import LinearRegression, Ridge,Lasso
	from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
	from sklearn.model_selection import RandomizedSearchCV
	from catboost import CatBoostRegressor
	from xgboost import XGBRegressor
	import warnings
	import sys
	from dataclasses import dataclass
	from src.utils import save_object,evaluate_model
	from src.logger import logging
	from src.exception import CustomException


	@dataclass

	class Model_training_config:
	trained_model_path = os.path.join("artifact","model.pkl")
	class Model_trainer:
	def __init__(self) -> None:
	self.model_trainer_config = Model_training_config()

	def intiate_model_trainer(self,train_array,test_array):
	try:
	logging.info("Split training and testing data ")
	x_train,y_train,x_test,y_test = (
	train_array[:,:-1],
	train_array[:,-1],
	test_array[:,:-1],
	test_array[:,-1]
	)
	models={
	"Random Forest": RandomForestRegressor(),
	"Decision Tree": DecisionTreeRegressor(),
	"Gradient Boosting": GradientBoostingRegressor(),
	"Linear Regression": LinearRegression(),
	"XGBRegressor": XGBRegressor(),
	"CatBoosting Regressor": CatBoostRegressor(verbose=False),
	"AdaBoost Regressor": AdaBoostRegressor(),
	}
	params={
	"Decision Tree": {
	'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
	# 'splitter':['best','random'],
	# 'max_features':['sqrt','log2'],
	},
	"Random Forest":{
	# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],

	# 'max_features':['sqrt','log2',None],
	'n_estimators': [8,16,32,64,128,256]
	},
	"Gradient Boosting":{
	# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
	'learning_rate':[.1,.01,.05,.001],
	'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
	# 'criterion':['squared_error', 'friedman_mse'],
	# 'max_features':['auto','sqrt','log2'],
	'n_estimators': [8,16,32,64,128,256]
	},
	"Linear Regression":{},
	"XGBRegressor":{
	'learning_rate':[.1,.01,.05,.001],
	'n_estimators': [8,16,32,64,128,256]
	},
	"CatBoosting Regressor":{
	'depth': [6,8,10],
	'learning_rate': [0.01, 0.05, 0.1],
	'iterations': [30, 50, 100]
	},
	"AdaBoost Regressor":{
	'learning_rate':[.1,.01,0.5,.001],
	# 'loss':['linear','square','exponential'],
	'n_estimators': [8,16,32,64,128,256]
	}

	}
	model_report:dict = evaluate_model(X=x_train,Y = y_train,X_test = x_test,Y_test=y_test,Models = models,Param = params)

	best_model_score = max(sorted(model_report.values()))

	best_model_nm = list(model_report.keys())[
	list(model_report.values()).index(best_model_score)
	]
	best_model = models[best_model_nm]
	if best_model_score < 0.6:
	raise CustomException("No best model found")
	logging.info("Best model Found")

	save_object(file_path= Model_training_config.trained_model_path,
	obj = best_model )
	predicted = best_model.predict(x_test)
	r2score = r2_score(y_test,predicted)
	return r2score

	except Exception as e:
	raise CustomException(e,sys)