File size: 8,542 Bytes
0374998
 
 
 
 
 
 
 
 
4967b34
 
 
 
 
 
 
88eb618
4967b34
 
 
 
 
88eb618
 
4967b34
 
 
 
 
 
 
 
 
 
 
 
 
 
30e1100
 
 
4967b34
30e1100
0374998
 
9689258
 
 
 
 
 
 
 
0374998
cad7c06
 
 
 
 
 
 
0374998
cad7c06
0374998
 
4967b34
0374998
 
 
 
 
4967b34
0374998
 
 
 
 
 
4967b34
0374998
 
 
 
 
 
 
 
ff30085
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0374998
 
 
 
 
4967b34
0374998
 
 
 
 
 
 
 
4967b34
0374998
 
 
 
 
 
4967b34
0374998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4967b34
0374998
 
4967b34
0374998
 
 
 
 
 
 
 
 
 
 
 
 
 
4967b34
 
 
0374998
 
 
 
 
 
 
4967b34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0374998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# -*- coding: utf-8 -*-
"""Huggingface_Prototype.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
"""

#! mkdir ~/.kaggle
#! cp kaggle.json ~/.kaggle/
#! chmod 600 ~/.kaggle/kaggle.json

#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
#! unzip merged-dataset-electricty-weather-for-modelling.zip

#pip install gradio

'''
Use the files.upload() function to upload files from the local system.
'''

#from google.colab import files
#uploaded = files.upload()

'''
Read the CSV file "merged_data.csv" into a DataFrame df.
Drop the column 'is_holiday' from the DataFrame df using the drop() function with axis=1.
Save the modified DataFrame df to a new CSV file named "merged_data_huggingface.csv" using the to_csv() function with index=False.
Download the CSV file "merged_data_huggingface.csv" using the files.download() function from the google.colab module.
'''

#df = pd.read_csv("merged_data.csv")
#df.drop('is_holiday', axis=1, inplace=True)
#df.to_csv('merged_data_huggingface.csv', index=False)
#from google.colab import files
#files.download('merged_data_huggingface.csv')

from prophet import Prophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr

def forecast_plot(forecast_days,test_days, days):
    '''
    Plot the forecasted values from forecast_days with a green line and label "Forecast".
    Plot the actual values from test_days with orange points and label "Actual".
    Set the x-axis label to "Date" and the y-axis label to "MGW".
    Set the title of the plot using f-string formatting, including the model number and the days horizon.
    Add a legend to the plot.
    Return the figure object.
    '''

    fig, ax = plt.subplots(figsize=(14, 4))
    ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
    ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
    ax.set_xlabel('Date')
    ax.set_ylabel('MGW')
    plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
    plt.legend()

    return fig

def mean_absolute_percentage_error(y_true, y_pred):
    '''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).'''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true))
    return mape

def root_mean_squared_error(y_true, y_pred):
    '''Calculate and return the Root Mean Squared Error (RMSE) between actual values (y_true) and predicted values (y_pred).'''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    return rmse

def r_squared(y_true, y_pred):
    '''Calculate and return the coefficient of determination (R-squared) value showing the proportion of variance in the dependent variable predictable from the independent variable.'''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mean_y_true = np.mean(y_true)
    ss_total = np.sum((y_true - mean_y_true) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):
    '''
    The function predict_and_evaluate is designed to forecast electricity demand using the Prophet time series forecasting model and evaluate the forecast accuracy.
    
    Parameters:
    - csv_file: Path to the CSV file containing the historical electricity demand data with columns "ds" (datetime), "y" (target variable), and "temp" (temperature).
    - days_to_predict: Number of days into the future to make predictions for.
    - freq: Frequency of the time series data.
    - country_name: Name of the country code for which the forecast is being made.
    
    Steps:
    1. Read the CSV file into a DataFrame and parse the datetime column.
    2. Split the data into training and testing sets.
    3. Set default values for frequency, days to predict, and country name.
    4. Set parameters for the Prophet model including MCMC samples, changepoint prior scale, and seasonality prior scale.
    5. Fit the Prophet model on the training data, adding country holidays and temperature as regressors.
    6. Create a future DataFrame for prediction, setting regressors for both training and testing data.
    7. Predict future values using the fitted model and calculate forecast metrics including MAPE, RMSE, and R-squared.
    8. Plot the forecast using the forecast_plot function.
    9. Return the forecast metrics and the plot.
    '''

    df_model = pd.read_csv(csv_file)
    df_model.columns = ["ds", "y", "temp"]
    df_model['ds'] = pd.to_datetime(df_model['ds'])

    #Set parameters for the Prophet model
    split_from = 90 * 12
    train_data = df_model[:-split_from]
    test_data = df_model[-split_from:]
    freq = freq
    seasonality_prior_scale = 0.01
    changepoint_prior_scale = 0.05
    mcmc_samples = 50
    periods = days_to_predict * 12
    #Train and fit the Prophet model

    m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
                seasonality_prior_scale=seasonality_prior_scale)
    m.add_country_holidays(country_name=country_name)
    m.add_regressor("temp", mode="additive")
    m.fit(train_data)
    #Create a future DataFrame for prediction, setting regressors for both training and testing data

    future = m.make_future_dataframe(periods=periods, freq=freq)
    train_idx = future["ds"].isin(train_data.ds)
    test_idx = ~train_idx

    reg = ["temp"]
    for r in reg:
        future.loc[train_idx, r] = train_data[r].to_list()
    for r in reg:
        future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()

    forecast = m.predict(future)
    forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
    test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
                test_data["ds"] <= forecast_days["ds"].iloc[-1])]
    #Plot the forecast using the forecast_plot function

    plot = forecast_plot(forecast_days, test_days, days_to_predict)
    #Predict future values using the fitted model and calculate forecast metrics

    mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
    rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
    rsqr = r_squared(test_days["y"], forecast_days["yhat"])

    metrics = {
        "MAPE": round(mape,3),
        "RMSE": round(rmse,1),
        "R-squared": round(rsqr,3)
    }

    return metrics,plot

csv_name = "merged_data_huggingface.csv"
#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
#df_model = df_merged[["tsd", "settlement_date", "temp"]]
#df_model.columns = ["y", "ds", "temp"]

days_to_predict = 15  # Set the default value for days to predict
country_name = "UK" # Set the default value for country to predict
freq = "2H" # Set the default value for country to predict

predict_and_evaluate(csv_name, days_to_predict, freq, country_name)


''' 
This Gradio interface uses the `predict_and_evaluate` function to forecast electricity demand and evaluate the forecast accuracy. 
Users can upload a CSV file containing historical electricity demand data, specify the number of days to predict, 
and provide the data frequency and country code for holidays.

The interface displays evaluation metrics (MAPE, RMSE, R-squared) and a plot comparing forecasted values against actual values.

Example usage: 
- Upload the file "merged_data_huggingface.csv"
- Set "Days to Predict" to 30
- Enter "2H" for data frequency
- Enter "UK" for the country code
'''


iface = gr.Interface(
    fn=predict_and_evaluate,
    inputs=[
        gr.File(label="CSV File"),
        gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
        gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
        gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
    ],
    outputs=[
        gr.Textbox(label=" Evaluation Metrics"),
        "plot"

    ],
    title="Prophet Electricty Load Forecasting Model",
    description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",


    examples=[
        ["merged_data_huggingface.csv", 30, "2H", "UK"]
        ]
)

iface.launch()