Spaces:

crudcook
/

NYC_Energy_Consumption_Forecasting

Sleeping

App Files Files Community

Rahul-Crudcook commited on Nov 8

Commit

b89b1ca

•

1 Parent(s): 48db7e7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +136 -0
nyc_energy_consumption.csv +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+import matplotlib.pyplot as plt
+from datetime import timedelta
+# Load and preprocess data
+@st.cache_data
+def load_data():
+    data = pd.read_csv("nyc_energy_consumption.csv")
+    data.columns = ['timeStamp', 'demand', 'precip', 'temp']
+    data['timeStamp'] = pd.to_datetime(data['timeStamp'])
+    data.set_index('timeStamp', inplace=True)
+    data = data.dropna()  # Drop any missing values
+    return data
+data = load_data()
+# Scale the data
+scaler = MinMaxScaler(feature_range=(0, 1))
+scaled_data = scaler.fit_transform(data[['demand', 'precip', 'temp']])
+# Create dataset function for LSTM
+def create_dataset(dataset, look_back=60):
+    X, y = [], []
+    for i in range(look_back, len(dataset)):
+        X.append(dataset[i-look_back:i])
+        y.append(dataset[i, 0])  # Predicting demand
+    return np.array(X), np.array(y)
+# Set look-back period
+look_back = 60
+X, y = create_dataset(scaled_data, look_back)
+# Split the dataset into train and test sets
+split_ratio = 0.8
+split_index = int(len(X) * split_ratio)
+X_train, X_test = X[:split_index], X[split_index:]
+y_train, y_test = y[:split_index], y[split_index:]
+# Build and compile LSTM model
+model = Sequential([
+    LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
+    Dropout(0.2),
+    LSTM(units=50, return_sequences=False),
+    Dropout(0.2),
+    Dense(units=25),
+    Dense(units=1)
+])
+model.compile(optimizer='adam', loss='mean_squared_error')
+# Train the model with validation
+history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))
+# Make predictions
+train_predict = model.predict(X_train)
+test_predict = model.predict(X_test)
+# Inverse transform predictions to original scale
+train_predict = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], 2))), axis=1))[:, 0]
+test_predict = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], 2))), axis=1))[:, 0]
+y_train_inv = scaler.inverse_transform(np.concatenate((y_train.reshape(-1, 1), np.zeros((y_train.shape[0], 2))), axis=1))[:, 0]
+y_test_inv = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))), axis=1))[:, 0]
+# Calculate error metrics
+rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
+mape = mean_absolute_percentage_error(y_test_inv, test_predict) * 100
+accuracy = 100 - mape
+# Streamlit App with filter for future prediction periods
+st.title("NYC Energy Consumption Forecasting with LSTM")
+st.subheader("Dataset Preview")
+st.write(data.head())
+# Forecasting options
+st.subheader("Forecasting Options")
+forecast_period = st.slider("Select number of future hours to predict", min_value=1, max_value=365, value=30)
+# Future prediction
+future_X = scaled_data[-look_back:]
+future_X = np.reshape(future_X, (1, look_back, scaled_data.shape[1]))
+future_predictions = []
+for _ in range(forecast_period):
+    future_pred = model.predict(future_X)
+    future_predictions.append(future_pred[0, 0])
+    # Update future_X for the next prediction
+    future_pred_expanded = np.array([[future_pred[0, 0], 0, 0]])  # Expand future_pred to match the 3 features
+    future_X = np.append(future_X[:, 1:, :], [future_pred_expanded], axis=1)
+# Scale back future predictions
+future_predictions = scaler.inverse_transform(
+    np.concatenate((np.array(future_predictions).reshape(-1, 1), np.zeros((forecast_period, 2))), axis=1))[:, 0]
+# Generate dates for future predictions
+last_date = data.index[-1]
+future_dates = [last_date + timedelta(hours=i) for i in range(1, forecast_period + 1)]
+future_predictions_df = pd.DataFrame({
+    'DateTime': future_dates,
+    'Predicted Demand': future_predictions
+})
+# Display evaluation metrics
+st.subheader("Forecasting and Model Evaluation")
+st.write(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
+st.write(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
+st.write(f"Model Accuracy: {accuracy:.2f}%")
+# Plotting actual vs predicted
+st.subheader("Actual vs Predicted Demand")
+plt.figure(figsize=(14,5))
+plt.plot(y_test_inv, color='blue', label='Actual Demand')
+plt.plot(test_predict, color='orange', linestyle='--', label='Predicted Demand')
+plt.legend()
+plt.xlabel('Time')
+plt.ylabel('Demand')
+st.pyplot(plt)
+# Display future predictions in a DataFrame
+st.subheader("Future Predictions with Date and Time")
+st.write(future_predictions_df)
+# Plotting future predictions
+st.subheader("Future Predictions Plot")
+plt.figure(figsize=(14,5))
+plt.plot(range(len(y_test_inv), len(y_test_inv) + forecast_period), future_predictions, color='green', linestyle='--', label='Future Prediction')
+plt.legend()
+plt.xlabel('Future Time')
+plt.ylabel('Demand')
+st.pyplot(plt)

nyc_energy_consumption.csv ADDED Viewed

The diff for this file is too large to render. See raw diff