Spaces:
Runtime error
Runtime error
File size: 8,542 Bytes
0374998 4967b34 88eb618 4967b34 88eb618 4967b34 30e1100 4967b34 30e1100 0374998 9689258 0374998 cad7c06 0374998 cad7c06 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 ff30085 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 4967b34 0374998 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# -*- coding: utf-8 -*-
"""Huggingface_Prototype.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
"""
#! mkdir ~/.kaggle
#! cp kaggle.json ~/.kaggle/
#! chmod 600 ~/.kaggle/kaggle.json
#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
#! unzip merged-dataset-electricty-weather-for-modelling.zip
#pip install gradio
'''
Use the files.upload() function to upload files from the local system.
'''
#from google.colab import files
#uploaded = files.upload()
'''
Read the CSV file "merged_data.csv" into a DataFrame df.
Drop the column 'is_holiday' from the DataFrame df using the drop() function with axis=1.
Save the modified DataFrame df to a new CSV file named "merged_data_huggingface.csv" using the to_csv() function with index=False.
Download the CSV file "merged_data_huggingface.csv" using the files.download() function from the google.colab module.
'''
#df = pd.read_csv("merged_data.csv")
#df.drop('is_holiday', axis=1, inplace=True)
#df.to_csv('merged_data_huggingface.csv', index=False)
#from google.colab import files
#files.download('merged_data_huggingface.csv')
from prophet import Prophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
def forecast_plot(forecast_days,test_days, days):
'''
Plot the forecasted values from forecast_days with a green line and label "Forecast".
Plot the actual values from test_days with orange points and label "Actual".
Set the x-axis label to "Date" and the y-axis label to "MGW".
Set the title of the plot using f-string formatting, including the model number and the days horizon.
Add a legend to the plot.
Return the figure object.
'''
fig, ax = plt.subplots(figsize=(14, 4))
ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
ax.set_xlabel('Date')
ax.set_ylabel('MGW')
plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
plt.legend()
return fig
def mean_absolute_percentage_error(y_true, y_pred):
'''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true))
return mape
def root_mean_squared_error(y_true, y_pred):
'''Calculate and return the Root Mean Squared Error (RMSE) between actual values (y_true) and predicted values (y_pred).'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mse = np.mean((y_true - y_pred) ** 2)
rmse = np.sqrt(mse)
return rmse
def r_squared(y_true, y_pred):
'''Calculate and return the coefficient of determination (R-squared) value showing the proportion of variance in the dependent variable predictable from the independent variable.'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mean_y_true = np.mean(y_true)
ss_total = np.sum((y_true - mean_y_true) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2
def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):
'''
The function predict_and_evaluate is designed to forecast electricity demand using the Prophet time series forecasting model and evaluate the forecast accuracy.
Parameters:
- csv_file: Path to the CSV file containing the historical electricity demand data with columns "ds" (datetime), "y" (target variable), and "temp" (temperature).
- days_to_predict: Number of days into the future to make predictions for.
- freq: Frequency of the time series data.
- country_name: Name of the country code for which the forecast is being made.
Steps:
1. Read the CSV file into a DataFrame and parse the datetime column.
2. Split the data into training and testing sets.
3. Set default values for frequency, days to predict, and country name.
4. Set parameters for the Prophet model including MCMC samples, changepoint prior scale, and seasonality prior scale.
5. Fit the Prophet model on the training data, adding country holidays and temperature as regressors.
6. Create a future DataFrame for prediction, setting regressors for both training and testing data.
7. Predict future values using the fitted model and calculate forecast metrics including MAPE, RMSE, and R-squared.
8. Plot the forecast using the forecast_plot function.
9. Return the forecast metrics and the plot.
'''
df_model = pd.read_csv(csv_file)
df_model.columns = ["ds", "y", "temp"]
df_model['ds'] = pd.to_datetime(df_model['ds'])
#Set parameters for the Prophet model
split_from = 90 * 12
train_data = df_model[:-split_from]
test_data = df_model[-split_from:]
freq = freq
seasonality_prior_scale = 0.01
changepoint_prior_scale = 0.05
mcmc_samples = 50
periods = days_to_predict * 12
#Train and fit the Prophet model
m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
seasonality_prior_scale=seasonality_prior_scale)
m.add_country_holidays(country_name=country_name)
m.add_regressor("temp", mode="additive")
m.fit(train_data)
#Create a future DataFrame for prediction, setting regressors for both training and testing data
future = m.make_future_dataframe(periods=periods, freq=freq)
train_idx = future["ds"].isin(train_data.ds)
test_idx = ~train_idx
reg = ["temp"]
for r in reg:
future.loc[train_idx, r] = train_data[r].to_list()
for r in reg:
future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()
forecast = m.predict(future)
forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
test_data["ds"] <= forecast_days["ds"].iloc[-1])]
#Plot the forecast using the forecast_plot function
plot = forecast_plot(forecast_days, test_days, days_to_predict)
#Predict future values using the fitted model and calculate forecast metrics
mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
rsqr = r_squared(test_days["y"], forecast_days["yhat"])
metrics = {
"MAPE": round(mape,3),
"RMSE": round(rmse,1),
"R-squared": round(rsqr,3)
}
return metrics,plot
csv_name = "merged_data_huggingface.csv"
#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
#df_model = df_merged[["tsd", "settlement_date", "temp"]]
#df_model.columns = ["y", "ds", "temp"]
days_to_predict = 15 # Set the default value for days to predict
country_name = "UK" # Set the default value for country to predict
freq = "2H" # Set the default value for country to predict
predict_and_evaluate(csv_name, days_to_predict, freq, country_name)
'''
This Gradio interface uses the `predict_and_evaluate` function to forecast electricity demand and evaluate the forecast accuracy.
Users can upload a CSV file containing historical electricity demand data, specify the number of days to predict,
and provide the data frequency and country code for holidays.
The interface displays evaluation metrics (MAPE, RMSE, R-squared) and a plot comparing forecasted values against actual values.
Example usage:
- Upload the file "merged_data_huggingface.csv"
- Set "Days to Predict" to 30
- Enter "2H" for data frequency
- Enter "UK" for the country code
'''
iface = gr.Interface(
fn=predict_and_evaluate,
inputs=[
gr.File(label="CSV File"),
gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
],
outputs=[
gr.Textbox(label=" Evaluation Metrics"),
"plot"
],
title="Prophet Electricty Load Forecasting Model",
description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",
examples=[
["merged_data_huggingface.csv", 30, "2H", "UK"]
]
)
iface.launch() |