Spaces:
Sleeping
Sleeping
# Basic Import | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import os | |
# Modelling | |
from sklearn.metrics import mean_squared_error, r2_score | |
from sklearn.neighbors import KNeighborsRegressor | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor | |
from sklearn.svm import SVR | |
from sklearn.linear_model import LinearRegression, Ridge,Lasso | |
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error | |
from sklearn.model_selection import RandomizedSearchCV | |
from catboost import CatBoostRegressor | |
from xgboost import XGBRegressor | |
import warnings | |
import sys | |
from dataclasses import dataclass | |
from src.utils import save_object,evaluate_model | |
from src.logger import logging | |
from src.exception import CustomException | |
class Model_training_config: | |
trained_model_path = os.path.join("artifact","model.pkl") | |
class Model_trainer: | |
def __init__(self) -> None: | |
self.model_trainer_config = Model_training_config() | |
def intiate_model_trainer(self,train_array,test_array): | |
try: | |
logging.info("Split training and testing data ") | |
x_train,y_train,x_test,y_test = ( | |
train_array[:,:-1], | |
train_array[:,-1], | |
test_array[:,:-1], | |
test_array[:,-1] | |
) | |
models={ | |
"Random Forest": RandomForestRegressor(), | |
"Decision Tree": DecisionTreeRegressor(), | |
"Gradient Boosting": GradientBoostingRegressor(), | |
"Linear Regression": LinearRegression(), | |
"XGBRegressor": XGBRegressor(), | |
"CatBoosting Regressor": CatBoostRegressor(verbose=False), | |
"AdaBoost Regressor": AdaBoostRegressor(), | |
} | |
params={ | |
"Decision Tree": { | |
'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], | |
# 'splitter':['best','random'], | |
# 'max_features':['sqrt','log2'], | |
}, | |
"Random Forest":{ | |
# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], | |
# 'max_features':['sqrt','log2',None], | |
'n_estimators': [8,16,32,64,128,256] | |
}, | |
"Gradient Boosting":{ | |
# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'], | |
'learning_rate':[.1,.01,.05,.001], | |
'subsample':[0.6,0.7,0.75,0.8,0.85,0.9], | |
# 'criterion':['squared_error', 'friedman_mse'], | |
# 'max_features':['auto','sqrt','log2'], | |
'n_estimators': [8,16,32,64,128,256] | |
}, | |
"Linear Regression":{}, | |
"XGBRegressor":{ | |
'learning_rate':[.1,.01,.05,.001], | |
'n_estimators': [8,16,32,64,128,256] | |
}, | |
"CatBoosting Regressor":{ | |
'depth': [6,8,10], | |
'learning_rate': [0.01, 0.05, 0.1], | |
'iterations': [30, 50, 100] | |
}, | |
"AdaBoost Regressor":{ | |
'learning_rate':[.1,.01,0.5,.001], | |
# 'loss':['linear','square','exponential'], | |
'n_estimators': [8,16,32,64,128,256] | |
} | |
} | |
model_report:dict = evaluate_model(X=x_train,Y = y_train,X_test = x_test,Y_test=y_test,Models = models,Param = params) | |
best_model_score = max(sorted(model_report.values())) | |
best_model_nm = list(model_report.keys())[ | |
list(model_report.values()).index(best_model_score) | |
] | |
best_model = models[best_model_nm] | |
if best_model_score < 0.6: | |
raise CustomException("No best model found") | |
logging.info("Best model Found") | |
save_object(file_path= Model_training_config.trained_model_path, | |
obj = best_model ) | |
predicted = best_model.predict(x_test) | |
r2score = r2_score(y_test,predicted) | |
return r2score | |
except Exception as e: | |
raise CustomException(e,sys) | |