|
import os
|
|
import sys
|
|
from dataclasses import dataclass
|
|
|
|
from catboost import CatBoostRegressor
|
|
from sklearn.ensemble import (
|
|
AdaBoostRegressor,
|
|
GradientBoostingRegressor,
|
|
RandomForestRegressor,
|
|
)
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.metrics import r2_score
|
|
from sklearn.neighbors import KNeighborsRegressor
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
from xgboost import XGBRegressor
|
|
|
|
from src.exception import CustomException
|
|
from src.logger import logging
|
|
|
|
from src.utils import save_object,evaluate_models
|
|
|
|
@dataclass
|
|
class ModelTrainerConfig:
|
|
trained_model_file_path=os.path.join("artifacts","model.pkl")
|
|
|
|
class ModelTrainer:
|
|
def __init__(self):
|
|
self.model_trainer_config=ModelTrainerConfig()
|
|
|
|
|
|
def initiate_model_trainer(self,train_array,test_array):
|
|
try:
|
|
logging.info("Split training and test input data")
|
|
X_train,y_train,X_test,y_test=(
|
|
train_array[:,:-1],
|
|
train_array[:,-1],
|
|
test_array[:,:-1],
|
|
test_array[:,-1]
|
|
)
|
|
models = {
|
|
"Random Forest": RandomForestRegressor(),
|
|
"Decision Tree": DecisionTreeRegressor(),
|
|
"Gradient Boosting": GradientBoostingRegressor(),
|
|
"Linear Regression": LinearRegression(),
|
|
"XGBRegressor": XGBRegressor(),
|
|
"CatBoosting Regressor": CatBoostRegressor(verbose=False),
|
|
"AdaBoost Regressor": AdaBoostRegressor(),
|
|
}
|
|
params={
|
|
"Decision Tree": {
|
|
'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
|
|
|
|
|
|
},
|
|
"Random Forest":{
|
|
|
|
|
|
|
|
'n_estimators': [8,16,32,64,128,256]
|
|
},
|
|
"Gradient Boosting":{
|
|
|
|
'learning_rate':[.1,.01,.05,.001],
|
|
'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
|
|
|
|
|
|
'n_estimators': [8,16,32,64,128,256]
|
|
},
|
|
"Linear Regression":{},
|
|
"XGBRegressor":{
|
|
'learning_rate':[.1,.01,.05,.001],
|
|
'n_estimators': [8,16,32,64,128,256]
|
|
},
|
|
"CatBoosting Regressor":{
|
|
'depth': [6,8,10],
|
|
'learning_rate': [0.01, 0.05, 0.1],
|
|
'iterations': [30, 50, 100]
|
|
},
|
|
"AdaBoost Regressor":{
|
|
'learning_rate':[.1,.01,0.5,.001],
|
|
|
|
'n_estimators': [8,16,32,64,128,256]
|
|
}
|
|
|
|
}
|
|
|
|
model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,
|
|
models=models,param=params)
|
|
|
|
|
|
best_model_score = max(sorted(model_report.values()))
|
|
|
|
|
|
|
|
best_model_name = list(model_report.keys())[
|
|
list(model_report.values()).index(best_model_score)
|
|
]
|
|
best_model = models[best_model_name]
|
|
|
|
if best_model_score<0.6:
|
|
raise CustomException("No best model found")
|
|
logging.info(f"Best found model on both training and testing dataset")
|
|
|
|
save_object(
|
|
file_path=self.model_trainer_config.trained_model_file_path,
|
|
obj=best_model
|
|
)
|
|
|
|
predicted=best_model.predict(X_test)
|
|
|
|
r2_square = r2_score(y_test, predicted)
|
|
return r2_square
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
raise CustomException(e,sys) |