Spaces:

aieye
/

weather_forecasting_tutorial

Sleeping

App Files Files Community

Shafeek Saleem commited on Jul 25, 2023

Commit

b3a91c7

1 Parent(s): 086dd3b

ss

Browse files

Files changed (4) hide show

.idea/sonarlint/issuestore/6/0/603fc2a4019aac2f96f36d343c1617f2e625b0f6 +0 -0
.idea/sonarlint/issuestore/index.pb +2 -2
pages/3_Training the Model.py +41 -30
pages/{4_Congratulations.py → 5_Congratulations.py} +0 -0

.idea/sonarlint/issuestore/6/0/603fc2a4019aac2f96f36d343c1617f2e625b0f6 DELETED Viewed

File without changes

.idea/sonarlint/issuestore/index.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f192a166326627fde11805cd80e2ecf025cd67b6c5340e631dce549929f014ac
-size 206

 version https://git-lfs.github.com/spec/v1
+oid sha256:c493d713dc16abecbcf2979862f605e2882a86c4027cec59edadf806b82f7144
+size 130

pages/3_Training the Model.py CHANGED Viewed

@@ -24,9 +24,10 @@ LEVEL = 3
 File_PATH = 'datasets/Building_forcasting.csv'
 def process_file(csv_file):
-    data = pd.read_csv(csv_file, index_col='Timestamp')
-    data.index = pd.to_datetime(data.index)
-    data = data.fillna(0)
     return data
@@ -41,38 +42,46 @@ def model_train(train_X, train_y, model_choice, train_size):
     return model, X_test, y_test
-def create_model_inputs(data, lag, mean_period):
     df_processed = data.copy()
-    df_processed['PV_Output_lag'] = df_processed['PV_Output'].shift(lag)
-    df_processed['PV_Output_mean'] = df_processed['PV_Output'].rolling(window=mean_period).mean()
-    X = df_processed[['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', 'PV_Output_mean']].dropna()
-    y = df_processed[['PV_Output']].loc[X.index]
-    return X, y
-def show_output(y_test, y_pred):
     st.subheader("Model Performance")
     st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
     fig, axs = plt.subplots(3, figsize=(12, 18))
-    axs[0].plot(y_test.index, y_pred/1000, label='Predicted')
-    axs[0].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
     axs[0].legend()
-    axs[0].set_title('Prediction vs Actual (Solar Power Generation)')
-    axs[0].set_xlabel('Date')
-    axs[0].set_ylabel('Solar Power Generation (kW)')
-    axs[1].plot(y_test.index, y_pred/1000, label='Predicted')
-    axs[1].set_title('Predicted Solar Power Generation')
-    axs[1].set_xlabel('Date')
-    axs[1].set_ylabel('Solar Power Generation (kW)')
-    axs[2].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
-    axs[2].set_title('Actual Solar Power Generation')
-    axs[2].set_xlabel('Date')
-    axs[2].set_ylabel('Solar Power Generation (kW)')
     fig.tight_layout()
     with _lock:
@@ -151,8 +160,10 @@ def step3_page():
     if state == "preprocessing":
         st.subheader("Step 2: Data Preprocessing and Feature Engineering")
         st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
-        # if st.button("Create Features and Target variable"):
-        X, y = create_model_inputs(data, 288, 288)
         cols = st.columns(2)
         state = "splitting"
         with cols[0]:
@@ -199,7 +210,7 @@ def step3_page():
         st.subheader("Step 5: Model Evaluation")
         st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
         y_pred = model.predict(X_test)
-        fig = show_output(y_test, y_pred)
         # download_link(y_test, y_pred)
         #
         # download_plot(fig)

 File_PATH = 'datasets/Building_forcasting.csv'
 def process_file(csv_file):
+    data = pd.read_csv(csv_file)
+    data['index'] = data['Date'].str.cat(data['Location'], sep='_')
+    data.set_index('index', inplace=True)
+    data['Date'] = pd.to_datetime(data['Date'])
     return data
     return model, X_test, y_test
+def create_model_inputs(data, lag, mean_period, target_variable):
     df_processed = data.copy()
+    selected_columns = ["MinTemp", "MaxTemp", "Rainfall", "WindGustSpeed", "WindSpeed9am", "WindSpeed3pm",
+                        "Humidity9am", "Humidity3pm", "Temp9am", "Temp3pm", "Pressure9am", "Pressure3pm"]
+    for col in selected_columns:
+        df_processed[col].fillna(df_processed[col].mean(), inplace=True)
+    df_processed[target_variable + "Tomorrow"] = df_processed[target_variable].shift(-1 * lag)
+    df_processed[target_variable + "_mean"] = df_processed[target_variable].rolling(window=mean_period).mean()
+    df_processed.drop(columns=['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm'], inplace=True)
+    X = df_processed[
+        ["Location", "MinTemp", "MaxTemp", "Rainfall", "WindGustDir", "WindGustSpeed", "WindDir9am", "WindDir3pm",
+         "WindSpeed9am", "WindSpeed3pm", "Humidity9am", "Humidity3pm", "Pressure9am", "Pressure3pm", "Temp9am",
+         "Temp3pm", "RainToday", target_variable + "_mean"]]
+    X = pd.get_dummies(X, columns=['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])
+    y = df_processed[target_variable + "Tomorrow"].loc[X.index]
+    return X, y, target_variable + "Tomorrow"
+def show_output(y_test, y_pred, target_variable_name):
     st.subheader("Model Performance")
     st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
     fig, axs = plt.subplots(3, figsize=(12, 18))
+    axs[0].plot(y_test.index, y_pred, label='Predicted')
+    axs[0].plot(y_test.index, y_test[target_variable_name], label='Actual')
     axs[0].legend()
+    axs[0].set_title(f'Prediction vs Actual ({target_variable_name})')
+    axs[0].set_xlabel('Date and Location')
+    axs[0].set_ylabel(f'{target_variable_name}')
+    axs[1].plot(y_test.index, y_pred, label='Predicted')
+    axs[1].set_title(f'Predicted {target_variable_name}')
+    axs[1].set_xlabel('Date and Location')
+    axs[1].set_ylabel(f'{target_variable_name}')
+    axs[2].plot(y_test.index, y_test[target_variable_name], label='Actual')
+    axs[2].set_title(f'Actual {target_variable_name}')
+    axs[2].set_xlabel('Date and Location')
+    axs[2].set_ylabel(f'{target_variable_name}')
     fig.tight_layout()
     with _lock:
     if state == "preprocessing":
         st.subheader("Step 2: Data Preprocessing and Feature Engineering")
         st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
+        st.info("You can select the weather attribute that you want to forecast (WindSpeed/ Humidity/ Pressure/ Temperature) and the time of the forecast (9am tomorrow/ 3pm tomorrow)")
+        target_variables = ['WindSpeed9amTomorrow', 'WindSpeed3pmTomorrow', 'Humidity9amTomorrow', 'Humidity3pmTomorrow', 'Pressure9amTomorrow', 'Pressure3pmTomorrow', 'Temp9amTomorrow', 'Temp3pmTomorrow']
+        target_variable = st.selectbox('Select Target Variable', target_variables)
+        X, y, target_variable_name = create_model_inputs(data, 1, 30, target_variable)
         cols = st.columns(2)
         state = "splitting"
         with cols[0]:
         st.subheader("Step 5: Model Evaluation")
         st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
         y_pred = model.predict(X_test)
+        fig = show_output(y_test, y_pred, target_variable_name)
         # download_link(y_test, y_pred)
         #
         # download_plot(fig)

pages/{4_Congratulations.py → 5_Congratulations.py} RENAMED Viewed

File without changes