uservipin commited on
Commit
0ecc8d7
1 Parent(s): b6b9d98

updating the regression module

Browse files

updating the regression module to handle categorical data type

Files changed (3) hide show
  1. __pycache__/regression.cpython-310.pyc +0 -0
  2. app.py +6 -3
  3. regression.py +121 -0
__pycache__/regression.cpython-310.pyc CHANGED
Binary files a/__pycache__/regression.cpython-310.pyc and b/__pycache__/regression.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,12 +1,15 @@
1
  from classification import ClassificationModels
2
  from regression import RegressionModels
3
  from resume import Resume
4
-
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.pipeline import Pipeline
7
  from sklearn.compose import ColumnTransformer
8
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
 
 
 
 
10
 
11
  import pandas as pd
12
  import warnings
@@ -343,7 +346,7 @@ def classification():
343
  if max_key == "Random Forests":
344
  random_forests_model = random_forests_model.predict(X)
345
  X['Predict'] = random_forests_model
346
- st.write("Model used for Prediction is: Random Forests Model:\n Predictions are:", random_forests_model)
347
 
348
  if max_key == "SVM":
349
  svm_model = svm_model.predict(X)
@@ -374,7 +377,7 @@ def classification():
374
 
375
 
376
  def regressor():
377
- EDA, train, test = st.tabs(['Train','Test'])
378
 
379
  with train:
380
  st.title("Regression / Train data")
 
1
  from classification import ClassificationModels
2
  from regression import RegressionModels
3
  from resume import Resume
4
+ '''
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.pipeline import Pipeline
7
  from sklearn.compose import ColumnTransformer
8
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
 
10
+ '''
11
+
12
+
13
 
14
  import pandas as pd
15
  import warnings
 
346
  if max_key == "Random Forests":
347
  random_forests_model = random_forests_model.predict(X)
348
  X['Predict'] = random_forests_model
349
+ st.write("Model used for Prediction is: Random Forests Model:", random_forests_model)
350
 
351
  if max_key == "SVM":
352
  svm_model = svm_model.predict(X)
 
377
 
378
 
379
  def regressor():
380
+ train, test = st.tabs(['Train','Test'])
381
 
382
  with train:
383
  st.title("Regression / Train data")
regression.py CHANGED
@@ -1,3 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
2
  from sklearn.preprocessing import PolynomialFeatures
3
  from sklearn.tree import DecisionTreeRegressor
@@ -69,3 +189,4 @@ class RegressionModels:
69
  def predict(self, model_name, X):
70
  model = self.models[model_name]
71
  return model.predict(X)
 
 
1
+ from sklearn.pipeline import Pipeline
2
+ from sklearn.compose import ColumnTransformer
3
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
4
+ from sklearn.impute import SimpleImputer
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
7
+ from sklearn.tree import DecisionTreeRegressor
8
+ from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
9
+ from sklearn.svm import SVR
10
+ from xgboost import XGBRegressor
11
+ from lightgbm import LGBMRegressor
12
+ from sklearn.metrics import mean_squared_error, r2_score
13
+
14
+ class RegressionModels:
15
+ def __init__(self):
16
+ self.data = None
17
+ self.X_train = None
18
+ self.X_test = None
19
+ self.y_train = None
20
+ self.y_test = None
21
+ self.models = {
22
+ 'Linear Regression': LinearRegression(),
23
+ 'Polynomial Regression': LinearRegression(),
24
+ 'Ridge Regression': Ridge(),
25
+ 'Lasso Regression': Lasso(),
26
+ 'ElasticNet Regression': ElasticNet(),
27
+ 'Logistic Regression': LogisticRegression(),
28
+ 'Decision Tree Regression': DecisionTreeRegressor(),
29
+ 'Random Forest Regression': RandomForestRegressor(),
30
+ 'Gradient Boosting Regression': GradientBoostingRegressor(),
31
+ 'Support Vector Regression (SVR)': SVR(),
32
+ 'XGBoost': XGBRegressor(),
33
+ 'LightGBM': LGBMRegressor()
34
+ }
35
+
36
+ def add_data(self, X, y):
37
+ self.data = (X, y)
38
+
39
+ def split_data(self, test_size=0.2, random_state=None):
40
+ if self.data is None:
41
+ raise ValueError("No data provided. Use add_data method to add data first.")
42
+ X, y = self.data
43
+ self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
44
+
45
+ def build_preprocessor(self):
46
+ # Separate numerical and categorical columns
47
+ numeric_features = self.X_train.select_dtypes(include=['int64', 'float64']).columns
48
+ categorical_features = self.X_train.select_dtypes(include=['object']).columns
49
+
50
+ # Define transformers for numerical and categorical data
51
+ numeric_transformer = Pipeline(steps=[
52
+ ('imputer', SimpleImputer(strategy='mean')),
53
+ ('scaler', StandardScaler())
54
+ ])
55
+ categorical_transformer = Pipeline(steps=[
56
+ ('imputer', SimpleImputer(strategy='most_frequent')),
57
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
58
+ ])
59
+
60
+ # Combine transformers using ColumnTransformer
61
+ preprocessor = ColumnTransformer(
62
+ transformers=[
63
+ ('num', numeric_transformer, numeric_features),
64
+ ('cat', categorical_transformer, categorical_features)
65
+ ])
66
+ return preprocessor
67
+
68
+ def fit(self, model_name):
69
+ if self.X_train is None or self.y_train is None:
70
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
71
+ model = self.models[model_name]
72
+ preprocessor = self.build_preprocessor()
73
+ model_pipeline = Pipeline(steps=[
74
+ ('preprocessor', preprocessor),
75
+ ('model', model)
76
+ ])
77
+ model_pipeline.fit(self.X_train, self.y_train)
78
+
79
+ def train(self, model_name):
80
+ if self.X_train is None or self.y_train is None or self.X_test is None:
81
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
82
+ model = self.models[model_name]
83
+ preprocessor = self.build_preprocessor()
84
+ model_pipeline = Pipeline(steps=[
85
+ ('preprocessor', preprocessor),
86
+ ('model', model)
87
+ ])
88
+ model_pipeline.fit(self.X_train, self.y_train)
89
+ y_pred = model_pipeline.predict(self.X_test)
90
+ return y_pred
91
+
92
+ def evaluate(self, model_name):
93
+ if self.X_test is None or self.y_test is None:
94
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
95
+ model = self.models[model_name]
96
+ preprocessor = self.build_preprocessor()
97
+ model_pipeline = Pipeline(steps=[
98
+ ('preprocessor', preprocessor),
99
+ ('model', model)
100
+ ])
101
+ model_pipeline.fit(self.X_train, self.y_train)
102
+ y_pred = model_pipeline.predict(self.X_test)
103
+ mse = mean_squared_error(self.y_test, y_pred)
104
+ r2 = r2_score(self.y_test, y_pred)
105
+ return mse, r2
106
+
107
+ def predict(self, model_name, X):
108
+ model = self.models[model_name]
109
+ preprocessor = self.build_preprocessor()
110
+ model_pipeline = Pipeline(steps=[
111
+ ('preprocessor', preprocessor),
112
+ ('model', model)
113
+ ])
114
+ return model_pipeline.predict(X)
115
+
116
+
117
+
118
+
119
+
120
+ '''
121
  from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
122
  from sklearn.preprocessing import PolynomialFeatures
123
  from sklearn.tree import DecisionTreeRegressor
 
189
  def predict(self, model_name, X):
190
  model = self.models[model_name]
191
  return model.predict(X)
192
+ '''