Spaces:
Runtime error
Runtime error
File size: 4,891 Bytes
0374998 015fb35 0374998 015fb35 0374998 30e1100 0374998 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# -*- coding: utf-8 -*-
"""Huggingface_Prototype.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
"""
#! mkdir ~/.kaggle
#! cp kaggle.json ~/.kaggle/
#! chmod 600 ~/.kaggle/kaggle.json
#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
#! unzip merged-dataset-electricty-weather-for-modelling.zip
#pip install gradio
#from google.colab import files
#uploaded = files.upload()
#df = pd.read_csv("merged_data.csv")
#df.drop('is_holiday', axis=1, inplace=True)
#df.to_csv('merged_data_huggingface.csv', index=False)
#from google.colab import files
#files.download('merged_data_huggingface.csv')
from prophet import Prophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
def forecast_plot(forecast_days,test_days, days):
fig, ax = plt.subplots(figsize=(14, 4))
ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
ax.set_xlabel('Date')
ax.set_ylabel('MGW')
plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
plt.legend()
return fig
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true))
return mape
def root_mean_squared_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
mse = np.mean((y_true - y_pred) ** 2)
rmse = np.sqrt(mse)
return rmse
def r_squared(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
mean_y_true = np.mean(y_true)
ss_total = np.sum((y_true - mean_y_true) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2
def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):
df_model = pd.read_csv(csv_file)
df_model.columns = ["ds", "y", "temp"]
df_model['ds'] = pd.to_datetime(df_model['ds'])
split_from = 90 * 12
train_data = df_model[:-split_from]
test_data = df_model[-split_from:]
freq = freq
seasonality_prior_scale = 0.01
changepoint_prior_scale = 0.05
mcmc_samples = 50
periods = days_to_predict * 12
m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
seasonality_prior_scale=seasonality_prior_scale)
m.add_country_holidays(country_name=country_name)
m.add_regressor("temp", mode="additive")
m.fit(train_data)
future = m.make_future_dataframe(periods=periods, freq=freq)
train_idx = future["ds"].isin(train_data.ds)
test_idx = ~train_idx
reg = ["temp"]
for r in reg:
future.loc[train_idx, r] = train_data[r].to_list()
for r in reg:
future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()
forecast = m.predict(future)
forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
test_data["ds"] <= forecast_days["ds"].iloc[-1])]
plot = forecast_plot(forecast_days, test_days, days_to_predict)
mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
rsqr = r_squared(test_days["y"], forecast_days["yhat"])
metrics = {
"MAPE": round(mape,3),
"RMSE": round(rmse,1),
"R-squared": round(rsqr,3)
}
return metrics,plot
csv_name = "merged_data_huggingface.csv"
#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
#df_model = df_merged[["tsd", "settlement_date", "temp"]]
#df_model.columns = ["y", "ds", "temp"]
days_to_predict = 15 # Set the default value for days to predict
country_name = "UK" # Set the default value for country to predict
freq = "2H" # Set the default value for country to predict
predict_and_evaluate(csv_name, days_to_predict, freq, country_name)
iface = gr.Interface(
fn=predict_and_evaluate,
inputs=[
gr.File(label="CSV File"),
gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
],
outputs=[
gr.Textbox(label=" Evaluation Metrics"),
"plot"
],
title="Prophet Electricty Load Forecasting Model",
description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",
examples=[
["merged_data_huggingface.csv", 30, "2H", "UK"]
]
)
iface.launch() |