sdikici's picture
Update app.py
cad7c06 verified
# -*- coding: utf-8 -*-
"""Huggingface_Prototype.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
"""
#! mkdir ~/.kaggle
#! cp kaggle.json ~/.kaggle/
#! chmod 600 ~/.kaggle/kaggle.json
#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
#! unzip merged-dataset-electricty-weather-for-modelling.zip
#pip install gradio
'''
Use the files.upload() function to upload files from the local system.
'''
#from google.colab import files
#uploaded = files.upload()
'''
Read the CSV file "merged_data.csv" into a DataFrame df.
Drop the column 'is_holiday' from the DataFrame df using the drop() function with axis=1.
Save the modified DataFrame df to a new CSV file named "merged_data_huggingface.csv" using the to_csv() function with index=False.
Download the CSV file "merged_data_huggingface.csv" using the files.download() function from the google.colab module.
'''
#df = pd.read_csv("merged_data.csv")
#df.drop('is_holiday', axis=1, inplace=True)
#df.to_csv('merged_data_huggingface.csv', index=False)
#from google.colab import files
#files.download('merged_data_huggingface.csv')
from prophet import Prophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
def forecast_plot(forecast_days,test_days, days):
'''
Plot the forecasted values from forecast_days with a green line and label "Forecast".
Plot the actual values from test_days with orange points and label "Actual".
Set the x-axis label to "Date" and the y-axis label to "MGW".
Set the title of the plot using f-string formatting, including the model number and the days horizon.
Add a legend to the plot.
Return the figure object.
'''
fig, ax = plt.subplots(figsize=(14, 4))
ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
ax.set_xlabel('Date')
ax.set_ylabel('MGW')
plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
plt.legend()
return fig
def mean_absolute_percentage_error(y_true, y_pred):
'''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true))
return mape
def root_mean_squared_error(y_true, y_pred):
'''Calculate and return the Root Mean Squared Error (RMSE) between actual values (y_true) and predicted values (y_pred).'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mse = np.mean((y_true - y_pred) ** 2)
rmse = np.sqrt(mse)
return rmse
def r_squared(y_true, y_pred):
'''Calculate and return the coefficient of determination (R-squared) value showing the proportion of variance in the dependent variable predictable from the independent variable.'''
y_true, y_pred = np.array(y_true), np.array(y_pred)
mean_y_true = np.mean(y_true)
ss_total = np.sum((y_true - mean_y_true) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2
def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):
'''
The function predict_and_evaluate is designed to forecast electricity demand using the Prophet time series forecasting model and evaluate the forecast accuracy.
Parameters:
- csv_file: Path to the CSV file containing the historical electricity demand data with columns "ds" (datetime), "y" (target variable), and "temp" (temperature).
- days_to_predict: Number of days into the future to make predictions for.
- freq: Frequency of the time series data.
- country_name: Name of the country code for which the forecast is being made.
Steps:
1. Read the CSV file into a DataFrame and parse the datetime column.
2. Split the data into training and testing sets.
3. Set default values for frequency, days to predict, and country name.
4. Set parameters for the Prophet model including MCMC samples, changepoint prior scale, and seasonality prior scale.
5. Fit the Prophet model on the training data, adding country holidays and temperature as regressors.
6. Create a future DataFrame for prediction, setting regressors for both training and testing data.
7. Predict future values using the fitted model and calculate forecast metrics including MAPE, RMSE, and R-squared.
8. Plot the forecast using the forecast_plot function.
9. Return the forecast metrics and the plot.
'''
df_model = pd.read_csv(csv_file)
df_model.columns = ["ds", "y", "temp"]
df_model['ds'] = pd.to_datetime(df_model['ds'])
#Set parameters for the Prophet model
split_from = 90 * 12
train_data = df_model[:-split_from]
test_data = df_model[-split_from:]
freq = freq
seasonality_prior_scale = 0.01
changepoint_prior_scale = 0.05
mcmc_samples = 50
periods = days_to_predict * 12
#Train and fit the Prophet model
m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
seasonality_prior_scale=seasonality_prior_scale)
m.add_country_holidays(country_name=country_name)
m.add_regressor("temp", mode="additive")
m.fit(train_data)
#Create a future DataFrame for prediction, setting regressors for both training and testing data
future = m.make_future_dataframe(periods=periods, freq=freq)
train_idx = future["ds"].isin(train_data.ds)
test_idx = ~train_idx
reg = ["temp"]
for r in reg:
future.loc[train_idx, r] = train_data[r].to_list()
for r in reg:
future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()
forecast = m.predict(future)
forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
test_data["ds"] <= forecast_days["ds"].iloc[-1])]
#Plot the forecast using the forecast_plot function
plot = forecast_plot(forecast_days, test_days, days_to_predict)
#Predict future values using the fitted model and calculate forecast metrics
mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
rsqr = r_squared(test_days["y"], forecast_days["yhat"])
metrics = {
"MAPE": round(mape,3),
"RMSE": round(rmse,1),
"R-squared": round(rsqr,3)
}
return metrics,plot
csv_name = "merged_data_huggingface.csv"
#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
#df_model = df_merged[["tsd", "settlement_date", "temp"]]
#df_model.columns = ["y", "ds", "temp"]
days_to_predict = 15 # Set the default value for days to predict
country_name = "UK" # Set the default value for country to predict
freq = "2H" # Set the default value for country to predict
predict_and_evaluate(csv_name, days_to_predict, freq, country_name)
'''
This Gradio interface uses the `predict_and_evaluate` function to forecast electricity demand and evaluate the forecast accuracy.
Users can upload a CSV file containing historical electricity demand data, specify the number of days to predict,
and provide the data frequency and country code for holidays.
The interface displays evaluation metrics (MAPE, RMSE, R-squared) and a plot comparing forecasted values against actual values.
Example usage:
- Upload the file "merged_data_huggingface.csv"
- Set "Days to Predict" to 30
- Enter "2H" for data frequency
- Enter "UK" for the country code
'''
iface = gr.Interface(
fn=predict_and_evaluate,
inputs=[
gr.File(label="CSV File"),
gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
],
outputs=[
gr.Textbox(label=" Evaluation Metrics"),
"plot"
],
title="Prophet Electricty Load Forecasting Model",
description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",
examples=[
["merged_data_huggingface.csv", 30, "2H", "UK"]
]
)
iface.launch()