uservipin commited on
Commit
905c0d8
1 Parent(s): 0ecc8d7

Modified regressor model

Browse files
Files changed (3) hide show
  1. __pycache__/regression.cpython-310.pyc +0 -0
  2. app.py +47 -18
  3. regression.py +124 -2
__pycache__/regression.cpython-310.pyc CHANGED
Binary files a/__pycache__/regression.cpython-310.pyc and b/__pycache__/regression.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,16 +1,12 @@
1
  from classification import ClassificationModels
2
  from regression import RegressionModels
3
  from resume import Resume
4
- '''
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.pipeline import Pipeline
7
  from sklearn.compose import ColumnTransformer
8
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
 
10
- '''
11
-
12
-
13
-
14
  import pandas as pd
15
  import warnings
16
  import streamlit as st
@@ -312,16 +308,6 @@ def classification():
312
  #spectra_df1 = spectra_df1.drop(columns=['Disease'])
313
  st.write(spectra_df1.head(5))
314
  st.divider()
315
-
316
- model_dict ={
317
- "Naive Bayes Classifier":'GaussianNB()',
318
- "Logistic Regression":'LogisticRegression()',
319
- "Decision Tree":'DecisionTreeClassifier()',
320
- "Random Forests":'RandomForestClassifier()',
321
- "SVM":'SVC()',
322
- "KNN":'KNeighborsClassifier()',
323
- "K- Means Clustering":'KMeans()'
324
- }
325
 
326
  X= spectra_df1
327
  if max_key == "Naive Bayes Classifier":
@@ -453,15 +439,58 @@ def regressor():
453
  models.split_data()
454
 
455
  # Train and evaluate selected models
 
 
456
  for model_name in selected_models:
457
- st.subheader(f"Model: {model_name}")
458
  models.fit(model_name)
459
  y_pred = models.train(model_name)
460
  mse, r2 = models.evaluate(model_name)
461
- st.write(f"MSE: {mse}")
462
- st.write(f"R-squared: {r2}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
 
 
 
465
  def NLP():
466
  Gemini_Chat,Gemini_Vision,Gemini_PDF, Bert, = st.tabs(['Gemini-Chat','Gemini-Vision',"Gemini-PDF Chat",'ChatBot'])
467
 
 
1
  from classification import ClassificationModels
2
  from regression import RegressionModels
3
  from resume import Resume
4
+
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.pipeline import Pipeline
7
  from sklearn.compose import ColumnTransformer
8
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
 
 
 
 
 
10
  import pandas as pd
11
  import warnings
12
  import streamlit as st
 
308
  #spectra_df1 = spectra_df1.drop(columns=['Disease'])
309
  st.write(spectra_df1.head(5))
310
  st.divider()
 
 
 
 
 
 
 
 
 
 
311
 
312
  X= spectra_df1
313
  if max_key == "Naive Bayes Classifier":
 
439
  models.split_data()
440
 
441
  # Train and evaluate selected models
442
+ best_model = None
443
+ best_metric = float('inf') # Initialize with a high value for MSE (lower is better)
444
  for model_name in selected_models:
445
+ # st.subheader(f"Model: {model_name}")
446
  models.fit(model_name)
447
  y_pred = models.train(model_name)
448
  mse, r2 = models.evaluate(model_name)
449
+ # st.write(f"MSE: {mse}")
450
+ # st.write(f"R-squared: {r2}")
451
+
452
+ # Update best model based on MSE
453
+ if r2 < best_metric:
454
+ best_model = model_name
455
+ best_metric = r2
456
+
457
+
458
+ # Perform testing based on the best model
459
+ if best_model:
460
+ st.subheader(f"Best Model: {best_model}")
461
+ test_mse, test_r2 = models.evaluate(best_model)
462
+ st.write(f"Test MSE: {test_mse}")
463
+ st.write(f"Test R-squared: {test_r2}")
464
+ # You can also visualize the predictions vs. true values, residual plots, etc. here
465
+ else:
466
+ st.write("No best model selected.")
467
+
468
+
469
+
470
+ with test:
471
+ st.title("Regression / Test")
472
+ spectra_1 = st.file_uploader("Upload file test the model", type={"csv", "txt"})
473
+ if spectra_1 is not None:
474
+ spectra_df1 = pd.read_csv(spectra_1)
475
+ st.write(spectra_df1.head(5))
476
+ st.divider()
477
+ st.write("models",models)
478
+ # models = RegressionModels()
479
+ if best_model:
480
+ # st.subheader(f"Best Model: {best_model}")
481
+ st.write("best model", best_model)
482
+ y_pred= models.predict(model_name = best_model,X = spectra_df1)
483
+ # st.write(f"Test MSE: {test_mse}")
484
+ st.write(f"Y pred is : {max(y_pred)}")
485
+ # You can also visualize the predictions vs. true values, residual plots, etc. here
486
+ else:
487
+ st.write("No best model selected.")
488
+
489
+
490
 
491
 
492
+
493
+
494
  def NLP():
495
  Gemini_Chat,Gemini_Vision,Gemini_PDF, Bert, = st.tabs(['Gemini-Chat','Gemini-Vision',"Gemini-PDF Chat",'ChatBot'])
496
 
regression.py CHANGED
@@ -10,6 +10,125 @@ from sklearn.svm import SVR
10
  from xgboost import XGBRegressor
11
  from lightgbm import LGBMRegressor
12
  from sklearn.metrics import mean_squared_error, r2_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class RegressionModels:
15
  def __init__(self):
@@ -110,10 +229,13 @@ class RegressionModels:
110
  model_pipeline = Pipeline(steps=[
111
  ('preprocessor', preprocessor),
112
  ('model', model)
113
- ])
 
 
 
114
  return model_pipeline.predict(X)
115
 
116
-
117
 
118
 
119
 
 
10
  from xgboost import XGBRegressor
11
  from lightgbm import LGBMRegressor
12
  from sklearn.metrics import mean_squared_error, r2_score
13
+ import streamlit as st
14
+
15
+
16
+
17
+
18
+
19
+
20
+ class RegressionModels:
21
+ def __init__(self):
22
+ self.data = None
23
+ self.X_train = None
24
+ self.X_test = None
25
+ self.y_train = None
26
+ self.y_test = None
27
+ self.column_transformer = None # Initialize as None
28
+ self.models = {
29
+ 'Linear Regression': LinearRegression(),
30
+ 'Polynomial Regression': LinearRegression(),
31
+ 'Ridge Regression': Ridge(),
32
+ 'Lasso Regression': Lasso(),
33
+ 'ElasticNet Regression': ElasticNet(),
34
+ 'Logistic Regression': LogisticRegression(),
35
+ 'Decision Tree Regression': DecisionTreeRegressor(),
36
+ 'Random Forest Regression': RandomForestRegressor(),
37
+ 'Gradient Boosting Regression': GradientBoostingRegressor(),
38
+ 'Support Vector Regression (SVR)': SVR(),
39
+ 'XGBoost': XGBRegressor(),
40
+ 'LightGBM': LGBMRegressor()
41
+ }
42
+
43
+ def add_data(self, X, y):
44
+ self.data = (X, y)
45
+
46
+ def split_data(self, test_size=0.2, random_state=None):
47
+ if self.data is None:
48
+ raise ValueError("No data provided. Use add_data method to add data first.")
49
+ X, y = self.data
50
+ self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
51
+
52
+ def build_preprocessor(self):
53
+ if self.column_transformer is not None:
54
+ return self.column_transformer # Return the existing fitted ColumnTransformer
55
+ else:
56
+ # Separate numerical and categorical columns
57
+ numeric_features = self.X_train.select_dtypes(include=['int64', 'float64']).columns
58
+ categorical_features = self.X_train.select_dtypes(include=['object']).columns
59
+
60
+ # Define transformers for numerical and categorical data
61
+ numeric_transformer = Pipeline(steps=[
62
+ ('imputer', SimpleImputer(strategy='mean')),
63
+ ('scaler', StandardScaler())
64
+ ])
65
+ categorical_transformer = Pipeline(steps=[
66
+ ('imputer', SimpleImputer(strategy='most_frequent')),
67
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
68
+ ])
69
+
70
+ # Combine transformers using ColumnTransformer
71
+ self.column_transformer = ColumnTransformer(
72
+ transformers=[
73
+ ('num', numeric_transformer, numeric_features),
74
+ ('cat', categorical_transformer, categorical_features)
75
+ ])
76
+ return self.column_transformer
77
+
78
+ def fit(self, model_name):
79
+ if self.X_train is None or self.y_train is None:
80
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
81
+ model = self.models[model_name]
82
+ preprocessor = self.build_preprocessor()
83
+ model_pipeline = Pipeline(steps=[
84
+ ('preprocessor', preprocessor),
85
+ ('model', model)
86
+ ])
87
+ model_pipeline.fit(self.X_train, self.y_train)
88
+
89
+ def train(self, model_name):
90
+ if self.X_train is None or self.y_train is None or self.X_test is None:
91
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
92
+ model = self.models[model_name]
93
+ preprocessor = self.build_preprocessor()
94
+ model_pipeline = Pipeline(steps=[
95
+ ('preprocessor', preprocessor),
96
+ ('model', model)
97
+ ])
98
+ model_pipeline.fit(self.X_train, self.y_train)
99
+ y_pred = model_pipeline.predict(self.X_test)
100
+ return y_pred
101
+
102
+ def evaluate(self, model_name):
103
+ if self.X_test is None or self.y_test is None:
104
+ raise ValueError("Data not split. Use split_data method to split data into training and testing sets.")
105
+ model = self.models[model_name]
106
+ preprocessor = self.build_preprocessor()
107
+ model_pipeline = Pipeline(steps=[
108
+ ('preprocessor', preprocessor),
109
+ ('model', model)
110
+ ])
111
+ model_pipeline.fit(self.X_train, self.y_train)
112
+ y_pred = model_pipeline.predict(self.X_test)
113
+ mse = mean_squared_error(self.y_test, y_pred)
114
+ r2 = r2_score(self.y_test, y_pred)
115
+ return mse, r2
116
+
117
+ def predict(self, model_name, X):
118
+ model = self.models[model_name]
119
+ preprocessor = self.build_preprocessor() # Ensure that the ColumnTransformer is fitted
120
+ model_pipeline = Pipeline(steps=[
121
+ ('preprocessor', preprocessor),
122
+ ('model', model)
123
+ ])
124
+ return model_pipeline.predict(X)
125
+
126
+
127
+
128
+
129
+
130
+ '''
131
+
132
 
133
  class RegressionModels:
134
  def __init__(self):
 
229
  model_pipeline = Pipeline(steps=[
230
  ('preprocessor', preprocessor),
231
  ('model', model)
232
+ ])
233
+
234
+ st.write("Model", model)
235
+ st.write(X.head(4))
236
  return model_pipeline.predict(X)
237
 
238
+ '''
239
 
240
 
241