Shafeek Saleem commited on
Commit
b3a91c7
1 Parent(s): 086dd3b
.idea/sonarlint/issuestore/6/0/603fc2a4019aac2f96f36d343c1617f2e625b0f6 DELETED
File without changes
.idea/sonarlint/issuestore/index.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f192a166326627fde11805cd80e2ecf025cd67b6c5340e631dce549929f014ac
3
- size 206
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c493d713dc16abecbcf2979862f605e2882a86c4027cec59edadf806b82f7144
3
+ size 130
pages/3_Training the Model.py CHANGED
@@ -24,9 +24,10 @@ LEVEL = 3
24
  File_PATH = 'datasets/Building_forcasting.csv'
25
 
26
  def process_file(csv_file):
27
- data = pd.read_csv(csv_file, index_col='Timestamp')
28
- data.index = pd.to_datetime(data.index)
29
- data = data.fillna(0)
 
30
  return data
31
 
32
 
@@ -41,38 +42,46 @@ def model_train(train_X, train_y, model_choice, train_size):
41
  return model, X_test, y_test
42
 
43
 
44
- def create_model_inputs(data, lag, mean_period):
45
  df_processed = data.copy()
46
- df_processed['PV_Output_lag'] = df_processed['PV_Output'].shift(lag)
47
- df_processed['PV_Output_mean'] = df_processed['PV_Output'].rolling(window=mean_period).mean()
48
-
49
- X = df_processed[['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', 'PV_Output_mean']].dropna()
50
- y = df_processed[['PV_Output']].loc[X.index]
51
-
52
- return X, y
53
-
54
-
55
- def show_output(y_test, y_pred):
 
 
 
 
 
 
 
 
56
  st.subheader("Model Performance")
57
  st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
58
 
59
  fig, axs = plt.subplots(3, figsize=(12, 18))
60
- axs[0].plot(y_test.index, y_pred/1000, label='Predicted')
61
- axs[0].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
62
  axs[0].legend()
63
- axs[0].set_title('Prediction vs Actual (Solar Power Generation)')
64
- axs[0].set_xlabel('Date')
65
- axs[0].set_ylabel('Solar Power Generation (kW)')
66
 
67
- axs[1].plot(y_test.index, y_pred/1000, label='Predicted')
68
- axs[1].set_title('Predicted Solar Power Generation')
69
- axs[1].set_xlabel('Date')
70
- axs[1].set_ylabel('Solar Power Generation (kW)')
71
 
72
- axs[2].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
73
- axs[2].set_title('Actual Solar Power Generation')
74
- axs[2].set_xlabel('Date')
75
- axs[2].set_ylabel('Solar Power Generation (kW)')
76
 
77
  fig.tight_layout()
78
  with _lock:
@@ -151,8 +160,10 @@ def step3_page():
151
  if state == "preprocessing":
152
  st.subheader("Step 2: Data Preprocessing and Feature Engineering")
153
  st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
154
- # if st.button("Create Features and Target variable"):
155
- X, y = create_model_inputs(data, 288, 288)
 
 
156
  cols = st.columns(2)
157
  state = "splitting"
158
  with cols[0]:
@@ -199,7 +210,7 @@ def step3_page():
199
  st.subheader("Step 5: Model Evaluation")
200
  st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
201
  y_pred = model.predict(X_test)
202
- fig = show_output(y_test, y_pred)
203
  # download_link(y_test, y_pred)
204
  #
205
  # download_plot(fig)
 
24
  File_PATH = 'datasets/Building_forcasting.csv'
25
 
26
  def process_file(csv_file):
27
+ data = pd.read_csv(csv_file)
28
+ data['index'] = data['Date'].str.cat(data['Location'], sep='_')
29
+ data.set_index('index', inplace=True)
30
+ data['Date'] = pd.to_datetime(data['Date'])
31
  return data
32
 
33
 
 
42
  return model, X_test, y_test
43
 
44
 
45
+ def create_model_inputs(data, lag, mean_period, target_variable):
46
  df_processed = data.copy()
47
+ selected_columns = ["MinTemp", "MaxTemp", "Rainfall", "WindGustSpeed", "WindSpeed9am", "WindSpeed3pm",
48
+ "Humidity9am", "Humidity3pm", "Temp9am", "Temp3pm", "Pressure9am", "Pressure3pm"]
49
+ for col in selected_columns:
50
+ df_processed[col].fillna(df_processed[col].mean(), inplace=True)
51
+ df_processed[target_variable + "Tomorrow"] = df_processed[target_variable].shift(-1 * lag)
52
+ df_processed[target_variable + "_mean"] = df_processed[target_variable].rolling(window=mean_period).mean()
53
+ df_processed.drop(columns=['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm'], inplace=True)
54
+ X = df_processed[
55
+ ["Location", "MinTemp", "MaxTemp", "Rainfall", "WindGustDir", "WindGustSpeed", "WindDir9am", "WindDir3pm",
56
+ "WindSpeed9am", "WindSpeed3pm", "Humidity9am", "Humidity3pm", "Pressure9am", "Pressure3pm", "Temp9am",
57
+ "Temp3pm", "RainToday", target_variable + "_mean"]]
58
+ X = pd.get_dummies(X, columns=['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])
59
+ y = df_processed[target_variable + "Tomorrow"].loc[X.index]
60
+
61
+ return X, y, target_variable + "Tomorrow"
62
+
63
+
64
+ def show_output(y_test, y_pred, target_variable_name):
65
  st.subheader("Model Performance")
66
  st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
67
 
68
  fig, axs = plt.subplots(3, figsize=(12, 18))
69
+ axs[0].plot(y_test.index, y_pred, label='Predicted')
70
+ axs[0].plot(y_test.index, y_test[target_variable_name], label='Actual')
71
  axs[0].legend()
72
+ axs[0].set_title(f'Prediction vs Actual ({target_variable_name})')
73
+ axs[0].set_xlabel('Date and Location')
74
+ axs[0].set_ylabel(f'{target_variable_name}')
75
 
76
+ axs[1].plot(y_test.index, y_pred, label='Predicted')
77
+ axs[1].set_title(f'Predicted {target_variable_name}')
78
+ axs[1].set_xlabel('Date and Location')
79
+ axs[1].set_ylabel(f'{target_variable_name}')
80
 
81
+ axs[2].plot(y_test.index, y_test[target_variable_name], label='Actual')
82
+ axs[2].set_title(f'Actual {target_variable_name}')
83
+ axs[2].set_xlabel('Date and Location')
84
+ axs[2].set_ylabel(f'{target_variable_name}')
85
 
86
  fig.tight_layout()
87
  with _lock:
 
160
  if state == "preprocessing":
161
  st.subheader("Step 2: Data Preprocessing and Feature Engineering")
162
  st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
163
+ st.info("You can select the weather attribute that you want to forecast (WindSpeed/ Humidity/ Pressure/ Temperature) and the time of the forecast (9am tomorrow/ 3pm tomorrow)")
164
+ target_variables = ['WindSpeed9amTomorrow', 'WindSpeed3pmTomorrow', 'Humidity9amTomorrow', 'Humidity3pmTomorrow', 'Pressure9amTomorrow', 'Pressure3pmTomorrow', 'Temp9amTomorrow', 'Temp3pmTomorrow']
165
+ target_variable = st.selectbox('Select Target Variable', target_variables)
166
+ X, y, target_variable_name = create_model_inputs(data, 1, 30, target_variable)
167
  cols = st.columns(2)
168
  state = "splitting"
169
  with cols[0]:
 
210
  st.subheader("Step 5: Model Evaluation")
211
  st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
212
  y_pred = model.predict(X_test)
213
+ fig = show_output(y_test, y_pred, target_variable_name)
214
  # download_link(y_test, y_pred)
215
  #
216
  # download_plot(fig)
pages/{4_Congratulations.py → 5_Congratulations.py} RENAMED
File without changes