File size: 4,896 Bytes
0374998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015fb35
0374998
015fb35
 
0374998
 
 
 
 
 
 
30e1100
 
 
42f7885
30e1100
0374998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
"""Huggingface_Prototype.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
"""

#! mkdir ~/.kaggle
#! cp kaggle.json ~/.kaggle/
#! chmod 600 ~/.kaggle/kaggle.json

#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
#! unzip merged-dataset-electricty-weather-for-modelling.zip

#pip install gradio

#from google.colab import files
#uploaded = files.upload()

#df = pd.read_csv("merged_data.csv")
#df.drop('is_holiday', axis=1, inplace=True)
#df.to_csv('merged_data_huggingface.csv', index=False)
#from google.colab import files
#files.download('merged_data_huggingface.csv')

from prophet import Prophet
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import gradio as gr

def forecast_plot(forecast_days,test_days, days):

  fig, ax = plt.subplots(figsize=(14, 4))
  ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
  ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
  ax.set_xlabel('Date')
  ax.set_ylabel('MGW')
  plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
  plt.legend()

  return fig

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true))
    return mape

def root_mean_squared_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    return rmse

def r_squared(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mean_y_true = np.mean(y_true)
    ss_total = np.sum((y_true - mean_y_true) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):

    df_model = pd.read_csv(csv_file)
    df_model.columns = ["ds", "y", "temp"]
    df_model['ds'] = pd.to_datetime(df_model['ds'])


    split_from = 90 * 12
    train_data = df_model[:-split_from]
    test_data = df_model[-split_from:]
    freq = freq
    seasonality_prior_scale = 0.01
    changepoint_prior_scale = 0.05
    mcmc_samples = 50
    periods = days_to_predict * 12

    m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
                seasonality_prior_scale=seasonality_prior_scale)
    m.add_country_holidays(country_name=country_name)
    m.add_regressor("temp", mode="additive")
    m.fit(train_data)

    future = m.make_future_dataframe(periods=periods, freq=freq)
    train_idx = future["ds"].isin(train_data.ds)
    test_idx = ~train_idx

    reg = ["temp"]
    for r in reg:
        future.loc[train_idx, r] = train_data[r].to_list()
    for r in reg:
        future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()

    forecast = m.predict(future)
    forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
    test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
                test_data["ds"] <= forecast_days["ds"].iloc[-1])]

    plot = forecast_plot(forecast_days, test_days, days_to_predict)

    mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
    rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
    rsqr = r_squared(test_days["y"], forecast_days["yhat"])

    metrics = {
        "MAPE": round(mape,3),
        "RMSE": round(rmse,1),
        "R-squared": round(rsqr,3)
    }

    return metrics,plot

csv_name = "merged_data_huggingface.csv"
#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
#df_model = df_merged[["tsd", "settlement_date", "temp"]]
#df_model.columns = ["y", "ds", "temp"]

days_to_predict = 15  # Set the default value for days to predict
country_name = "UK" # Set the default value for country to predict
freq = "2H" # Set the default value for country to predict

predict_and_evaluate(csv_name, days_to_predict, freq, country_name)

iface = gr.Interface(
    fn=predict_and_evaluate,
    inputs=[
        gr.File(label="CSV File"),
        gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
        gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
        gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
    ],
    outputs=[
        gr.Textbox(label=" Evaluation Metrics"),
        "plot"

    ],
    title="Prophet Electricty Load Forecasting Model",
    description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",


    examples=[
        ["merged_data_huggingface.csv", 30, "2H", "UK"]
        ]
)

iface.launch()