|
|
|
"""Huggingface_Prototype.ipynb |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
''' |
|
Use the files.upload() function to upload files from the local system. |
|
''' |
|
|
|
|
|
|
|
|
|
''' |
|
Read the CSV file "merged_data.csv" into a DataFrame df. |
|
Drop the column 'is_holiday' from the DataFrame df using the drop() function with axis=1. |
|
Save the modified DataFrame df to a new CSV file named "merged_data_huggingface.csv" using the to_csv() function with index=False. |
|
Download the CSV file "merged_data_huggingface.csv" using the files.download() function from the google.colab module. |
|
''' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from prophet import Prophet |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import gradio as gr |
|
|
|
def forecast_plot(forecast_days,test_days, days): |
|
''' |
|
Plot the forecasted values from forecast_days with a green line and label "Forecast". |
|
Plot the actual values from test_days with orange points and label "Actual". |
|
Set the x-axis label to "Date" and the y-axis label to "MGW". |
|
Set the title of the plot using f-string formatting, including the model number and the days horizon. |
|
Add a legend to the plot. |
|
Return the figure object. |
|
''' |
|
|
|
fig, ax = plt.subplots(figsize=(14, 4)) |
|
ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green') |
|
ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange') |
|
ax.set_xlabel('Date') |
|
ax.set_ylabel('MGW') |
|
plt.title(f'Prophet Forecast - Model 3 - {days} days horizon') |
|
plt.legend() |
|
|
|
return fig |
|
|
|
def mean_absolute_percentage_error(y_true, y_pred): |
|
'''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).''' |
|
y_true, y_pred = np.array(y_true), np.array(y_pred) |
|
mape = np.mean(np.abs((y_true - y_pred) / y_true)) |
|
return mape |
|
|
|
def root_mean_squared_error(y_true, y_pred): |
|
'''Calculate and return the Root Mean Squared Error (RMSE) between actual values (y_true) and predicted values (y_pred).''' |
|
y_true, y_pred = np.array(y_true), np.array(y_pred) |
|
mse = np.mean((y_true - y_pred) ** 2) |
|
rmse = np.sqrt(mse) |
|
return rmse |
|
|
|
def r_squared(y_true, y_pred): |
|
'''Calculate and return the coefficient of determination (R-squared) value showing the proportion of variance in the dependent variable predictable from the independent variable.''' |
|
y_true, y_pred = np.array(y_true), np.array(y_pred) |
|
mean_y_true = np.mean(y_true) |
|
ss_total = np.sum((y_true - mean_y_true) ** 2) |
|
ss_residual = np.sum((y_true - y_pred) ** 2) |
|
r2 = 1 - (ss_residual / ss_total) |
|
return r2 |
|
|
|
def predict_and_evaluate(csv_file, days_to_predict,freq, country_name): |
|
''' |
|
The function predict_and_evaluate is designed to forecast electricity demand using the Prophet time series forecasting model and evaluate the forecast accuracy. |
|
|
|
Parameters: |
|
- csv_file: Path to the CSV file containing the historical electricity demand data with columns "ds" (datetime), "y" (target variable), and "temp" (temperature). |
|
- days_to_predict: Number of days into the future to make predictions for. |
|
- freq: Frequency of the time series data. |
|
- country_name: Name of the country code for which the forecast is being made. |
|
|
|
Steps: |
|
1. Read the CSV file into a DataFrame and parse the datetime column. |
|
2. Split the data into training and testing sets. |
|
3. Set default values for frequency, days to predict, and country name. |
|
4. Set parameters for the Prophet model including MCMC samples, changepoint prior scale, and seasonality prior scale. |
|
5. Fit the Prophet model on the training data, adding country holidays and temperature as regressors. |
|
6. Create a future DataFrame for prediction, setting regressors for both training and testing data. |
|
7. Predict future values using the fitted model and calculate forecast metrics including MAPE, RMSE, and R-squared. |
|
8. Plot the forecast using the forecast_plot function. |
|
9. Return the forecast metrics and the plot. |
|
''' |
|
|
|
df_model = pd.read_csv(csv_file) |
|
df_model.columns = ["ds", "y", "temp"] |
|
df_model['ds'] = pd.to_datetime(df_model['ds']) |
|
|
|
|
|
split_from = 90 * 12 |
|
train_data = df_model[:-split_from] |
|
test_data = df_model[-split_from:] |
|
freq = freq |
|
seasonality_prior_scale = 0.01 |
|
changepoint_prior_scale = 0.05 |
|
mcmc_samples = 50 |
|
periods = days_to_predict * 12 |
|
|
|
|
|
m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale, |
|
seasonality_prior_scale=seasonality_prior_scale) |
|
m.add_country_holidays(country_name=country_name) |
|
m.add_regressor("temp", mode="additive") |
|
m.fit(train_data) |
|
|
|
|
|
future = m.make_future_dataframe(periods=periods, freq=freq) |
|
train_idx = future["ds"].isin(train_data.ds) |
|
test_idx = ~train_idx |
|
|
|
reg = ["temp"] |
|
for r in reg: |
|
future.loc[train_idx, r] = train_data[r].to_list() |
|
for r in reg: |
|
future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list() |
|
|
|
forecast = m.predict(future) |
|
forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]] |
|
test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & ( |
|
test_data["ds"] <= forecast_days["ds"].iloc[-1])] |
|
|
|
|
|
plot = forecast_plot(forecast_days, test_days, days_to_predict) |
|
|
|
|
|
mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"]) |
|
rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"]) |
|
rsqr = r_squared(test_days["y"], forecast_days["yhat"]) |
|
|
|
metrics = { |
|
"MAPE": round(mape,3), |
|
"RMSE": round(rmse,1), |
|
"R-squared": round(rsqr,3) |
|
} |
|
|
|
return metrics,plot |
|
|
|
csv_name = "merged_data_huggingface.csv" |
|
|
|
|
|
|
|
|
|
days_to_predict = 15 |
|
country_name = "UK" |
|
freq = "2H" |
|
|
|
predict_and_evaluate(csv_name, days_to_predict, freq, country_name) |
|
|
|
|
|
''' |
|
This Gradio interface uses the `predict_and_evaluate` function to forecast electricity demand and evaluate the forecast accuracy. |
|
Users can upload a CSV file containing historical electricity demand data, specify the number of days to predict, |
|
and provide the data frequency and country code for holidays. |
|
|
|
The interface displays evaluation metrics (MAPE, RMSE, R-squared) and a plot comparing forecasted values against actual values. |
|
|
|
Example usage: |
|
- Upload the file "merged_data_huggingface.csv" |
|
- Set "Days to Predict" to 30 |
|
- Enter "2H" for data frequency |
|
- Enter "UK" for the country code |
|
''' |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_and_evaluate, |
|
inputs=[ |
|
gr.File(label="CSV File"), |
|
gr.Slider(1, 90, value=30, step=1, label="Days to Predict"), |
|
gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"), |
|
gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)") |
|
], |
|
outputs=[ |
|
gr.Textbox(label=" Evaluation Metrics"), |
|
"plot" |
|
|
|
], |
|
title="Prophet Electricty Load Forecasting Model", |
|
description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency", |
|
|
|
|
|
examples=[ |
|
["merged_data_huggingface.csv", 30, "2H", "UK"] |
|
] |
|
) |
|
|
|
iface.launch() |