PreyPatel commited on
Commit
127e0e7
1 Parent(s): 2a4f416
Files changed (1) hide show
  1. app.py +43 -59
app.py CHANGED
@@ -1,44 +1,32 @@
1
  import streamlit as st
2
- import pandas as pd
3
  import numpy as np
4
  from sklearn.datasets import make_regression
5
  from sklearn.model_selection import train_test_split, KFold
6
  from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor, AdaBoostRegressor
7
- from sklearn.linear_model import LinearRegression, Lasso, Ridge
 
8
  from sklearn.svm import SVR
9
  from sklearn.metrics import mean_squared_error
10
  import matplotlib.pyplot as plt
11
 
12
-
13
  st.title('Boosting in Regression')
14
-
15
- DATE_COLUMN = 'date/time'
16
- DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
17
- 'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
18
-
19
- @st.cache_data
20
- def load_data(nrows):
21
- data = pd.read_csv(DATA_URL, nrows=nrows)
22
- lowercase = lambda x: str(x).lower()
23
- data.rename(lowercase, axis='columns', inplace=True)
24
- data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
25
- return data
26
-
27
  @st.cache_data
28
  def make_data(dataset_option):
29
  opt = dataset_option.split()[0]
30
  if opt == "100":
31
  X, y = make_regression(n_samples=100,
32
- n_features=10, n_informative=2,
33
- random_state=2)
34
  elif opt == "200":
35
  X, y = make_regression(n_samples=200,
36
- n_features=5, n_informative=2,
37
- random_state=4)
38
  elif opt == "150":
39
  X, y = make_regression(n_samples=150,
40
- n_features=7,n_informative=2,
41
- random_state=2)
42
  else:
43
  X, y = make_regression(random_state=10)
44
  return X, y
@@ -46,10 +34,8 @@ def make_data(dataset_option):
46
  def estimator_model(estimator_type):
47
  if estimator_type == "Linear regressor":
48
  model = LinearRegression()
49
- elif estimator_type == "Ridge regressor":
50
- model = Ridge()
51
- elif estimator_type == "Lasso regressor":
52
- model = Lasso()
53
  elif estimator_type == "SVR":
54
  model = SVR()
55
  else:
@@ -67,7 +53,31 @@ plt.title("Dataset")
67
  plt.scatter(X[:,0], y)
68
  st.pyplot(fig)
69
 
70
- options = ['Linear regressor', 'Ridge regressor', 'Lasso regressor', 'SVR']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  model_type = st.selectbox('Select model type to use:', options)
72
  options = ['boosting', 'bagging', 'gradient descent']
73
  ensemble_type = st.selectbox('Select the ensemble type:', options)
@@ -75,11 +85,11 @@ estimator_number = st.slider('n_estimators', 1, 20, 4)
75
 
76
  fig = plt.figure()
77
  if ensemble_type == "bagging":
78
- estimator_ = estimator_model(model_type)
79
  test_loss = []
80
  train_loss = []
81
  for i in range(1, estimator_number):
82
- model = BaggingRegressor( n_estimators=i, random_state=45)
83
  model.fit(X_train, y_train)
84
  y_pred = model.predict(X_test)
85
  temp = mean_squared_error(y_test, y_pred)
@@ -91,7 +101,7 @@ if ensemble_type == "bagging":
91
  plt.plot(range(1, estimator_number), train_loss, label="train loss")
92
  elif ensemble_type == "gradient descent":
93
  test_loss = []
94
- estimator_ = estimator_model(model_type)
95
  for i in range(1, estimator_number):
96
  model = GradientBoostingRegressor( n_estimators=i, learning_rate=0.1, random_state=45)
97
  model.fit(X_train, y_train)
@@ -100,9 +110,9 @@ elif ensemble_type == "gradient descent":
100
  plt.plot(range(1, estimator_number), test_loss, label="test loss")
101
  elif ensemble_type == "boosting":
102
  test_loss = []
103
- estimator_ = estimator_model(model_type)
104
  for i in range(1, estimator_number):
105
- model = AdaBoostRegressor(n_estimators=i)
106
  model.fit(X_train, y_train)
107
  y_pred = model.predict(X_test)
108
  test_loss.append(mean_squared_error(y_test, y_pred))
@@ -111,31 +121,5 @@ elif ensemble_type == "boosting":
111
  plt.legend()
112
  plt.title("loss plot")
113
  plt.xlabel("n_estimators")
114
- plt.ylabel("loss")
115
  st.pyplot(fig)
116
-
117
- if st.button('Magic'):
118
- loss = []
119
- n_splits=5
120
- opts = ['Linear regressor', 'Ridge regressor', 'Lasso regressor', 'SVR']
121
- for opt in opts:
122
- kf = KFold(n_splits=n_splits, shuffle=True, random_state=32)
123
- cv_scores = []
124
- for train_index, val_index in kf.split(X_train):
125
- model = estimator_model(opt)
126
- X_train_cv, X_val_cv = X_train[train_index], X_train[val_index]
127
- y_train_cv, y_val_cv = y_train[train_index], y_train[val_index]
128
- model.fit(X_train_cv, y_train_cv)
129
- y_val_pred = model.predict(X_val_cv)
130
- cv_scores.append(mean_squared_error(y_val_cv, y_val_pred))
131
- loss.append(np.mean(cv_scores))
132
- best_model = estimator_model(opts[np.argmin(loss)])
133
- best_model.fit(X_train, y_train)
134
- y_pred = best_model.predict(X_test)
135
- fig = plt.figure()
136
- plt.title(f"Best model fit is of {opts[np.argmin(loss)]}")
137
- plt.scatter(X_test[:,0], y_pred)
138
- plt.scatter(X_test[:,0], y_test)
139
- st.pyplot(fig)
140
-
141
-
 
1
  import streamlit as st
 
2
  import numpy as np
3
  from sklearn.datasets import make_regression
4
  from sklearn.model_selection import train_test_split, KFold
5
  from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor, AdaBoostRegressor
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.tree import DecisionTreeRegressor
8
  from sklearn.svm import SVR
9
  from sklearn.metrics import mean_squared_error
10
  import matplotlib.pyplot as plt
11
 
 
12
  st.title('Boosting in Regression')
13
+ st.write("Over here, we will try to visualise the effect of number of estimators in the ensembling methods for the regression and could tryout with different basic estimators")
14
+ st.write("Magic button will help you to find the best individual estimator on selected dataset.")
 
 
 
 
 
 
 
 
 
 
 
15
  @st.cache_data
16
  def make_data(dataset_option):
17
  opt = dataset_option.split()[0]
18
  if opt == "100":
19
  X, y = make_regression(n_samples=100,
20
+ n_features=2,
21
+ random_state=42)
22
  elif opt == "200":
23
  X, y = make_regression(n_samples=200,
24
+ n_features=2,
25
+ random_state=56)
26
  elif opt == "150":
27
  X, y = make_regression(n_samples=150,
28
+ n_features=2,
29
+ random_state=25)
30
  else:
31
  X, y = make_regression(random_state=10)
32
  return X, y
 
34
  def estimator_model(estimator_type):
35
  if estimator_type == "Linear regressor":
36
  model = LinearRegression()
37
+ elif estimator_type == "Decision Tree regressor":
38
+ model = DecisionTreeRegressor()
 
 
39
  elif estimator_type == "SVR":
40
  model = SVR()
41
  else:
 
53
  plt.scatter(X[:,0], y)
54
  st.pyplot(fig)
55
 
56
+ if st.button('Magic'):
57
+ loss = []
58
+ n_splits=5
59
+ opts = ['Linear regressor', 'Decision Tree regressor', 'SVR']
60
+ for opt in opts:
61
+ kf = KFold(n_splits=n_splits, shuffle=True, random_state=32)
62
+ cv_scores = []
63
+ for train_index, val_index in kf.split(X_train):
64
+ model = estimator_model(opt)
65
+ X_train_cv, X_val_cv = X_train[train_index], X_train[val_index]
66
+ y_train_cv, y_val_cv = y_train[train_index], y_train[val_index]
67
+ model.fit(X_train_cv, y_train_cv)
68
+ y_val_pred = model.predict(X_val_cv)
69
+ cv_scores.append(mean_squared_error(y_val_cv, y_val_pred))
70
+ loss.append(np.mean(cv_scores))
71
+ best_model = estimator_model(opts[np.argmin(loss)])
72
+ best_model.fit(X_train, y_train)
73
+ y_pred = best_model.predict(X_test)
74
+ fig = plt.figure()
75
+ plt.title(f"Best model fit is of {opts[np.argmin(loss)]}")
76
+ plt.scatter(X_test[:,0], y_pred)
77
+ plt.scatter(X_test[:,0], y_test)
78
+ st.pyplot(fig)
79
+
80
+ options = ['Linear regressor', 'Decision Tree regressor', 'SVR']
81
  model_type = st.selectbox('Select model type to use:', options)
82
  options = ['boosting', 'bagging', 'gradient descent']
83
  ensemble_type = st.selectbox('Select the ensemble type:', options)
 
85
 
86
  fig = plt.figure()
87
  if ensemble_type == "bagging":
88
+ estimator = estimator_model(model_type)
89
  test_loss = []
90
  train_loss = []
91
  for i in range(1, estimator_number):
92
+ model = BaggingRegressor(estimator=estimator, n_estimators=i, random_state=45)
93
  model.fit(X_train, y_train)
94
  y_pred = model.predict(X_test)
95
  temp = mean_squared_error(y_test, y_pred)
 
101
  plt.plot(range(1, estimator_number), train_loss, label="train loss")
102
  elif ensemble_type == "gradient descent":
103
  test_loss = []
104
+ estimator = estimator_model(model_type)
105
  for i in range(1, estimator_number):
106
  model = GradientBoostingRegressor( n_estimators=i, learning_rate=0.1, random_state=45)
107
  model.fit(X_train, y_train)
 
110
  plt.plot(range(1, estimator_number), test_loss, label="test loss")
111
  elif ensemble_type == "boosting":
112
  test_loss = []
113
+ estimator = estimator_model(model_type)
114
  for i in range(1, estimator_number):
115
+ model = AdaBoostRegressor(n_estimators=i, estimator=estimator)
116
  model.fit(X_train, y_train)
117
  y_pred = model.predict(X_test)
118
  test_loss.append(mean_squared_error(y_test, y_pred))
 
121
  plt.legend()
122
  plt.title("loss plot")
123
  plt.xlabel("n_estimators")
124
+ plt.ylabel("mean squared error loss")
125
  st.pyplot(fig)