Spaces:

sdikici
/

Prophet_Electricty_Load_Forecasting

Running

App Files Files Community

Prophet_Electricty_Load_Forecasting / app.py

sdikici

Update app.py

cad7c06 verified 4 months ago

raw

history blame contribute delete

No virus

8.54 kB

	# -- coding: utf-8 --
	"""Huggingface_Prototype.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1i--A21QuJPKdv-HM2kUrFSwj89Qfv4cb
	"""

	#! mkdir ~/.kaggle
	#! cp kaggle.json ~/.kaggle/
	#! chmod 600 ~/.kaggle/kaggle.json

	#!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
	#! unzip merged-dataset-electricty-weather-for-modelling.zip

	#pip install gradio

	'''
	Use the files.upload() function to upload files from the local system.
	'''

	#from google.colab import files
	#uploaded = files.upload()

	'''
	Read the CSV file "merged_data.csv" into a DataFrame df.
	Drop the column 'is_holiday' from the DataFrame df using the drop() function with axis=1.
	Save the modified DataFrame df to a new CSV file named "merged_data_huggingface.csv" using the to_csv() function with index=False.
	Download the CSV file "merged_data_huggingface.csv" using the files.download() function from the google.colab module.
	'''

	#df = pd.read_csv("merged_data.csv")
	#df.drop('is_holiday', axis=1, inplace=True)
	#df.to_csv('merged_data_huggingface.csv', index=False)
	#from google.colab import files
	#files.download('merged_data_huggingface.csv')

	from prophet import Prophet
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import gradio as gr

	def forecast_plot(forecast_days,test_days, days):
	'''
	Plot the forecasted values from forecast_days with a green line and label "Forecast".
	Plot the actual values from test_days with orange points and label "Actual".
	Set the x-axis label to "Date" and the y-axis label to "MGW".
	Set the title of the plot using f-string formatting, including the model number and the days horizon.
	Add a legend to the plot.
	Return the figure object.
	'''

	fig, ax = plt.subplots(figsize=(14, 4))
	ax.plot(forecast_days['ds'], forecast_days['yhat'], label='Forecast', color='green')
	ax.scatter(test_days['ds'], test_days['y'], label='Actual', color='orange')
	ax.set_xlabel('Date')
	ax.set_ylabel('MGW')
	plt.title(f'Prophet Forecast - Model 3 - {days} days horizon')
	plt.legend()

	return fig

	def mean_absolute_percentage_error(y_true, y_pred):
	'''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).'''
	y_true, y_pred = np.array(y_true), np.array(y_pred)
	mape = np.mean(np.abs((y_true - y_pred) / y_true))
	return mape

	def root_mean_squared_error(y_true, y_pred):
	'''Calculate and return the Root Mean Squared Error (RMSE) between actual values (y_true) and predicted values (y_pred).'''
	y_true, y_pred = np.array(y_true), np.array(y_pred)
	mse = np.mean((y_true - y_pred) ** 2)
	rmse = np.sqrt(mse)
	return rmse

	def r_squared(y_true, y_pred):
	'''Calculate and return the coefficient of determination (R-squared) value showing the proportion of variance in the dependent variable predictable from the independent variable.'''
	y_true, y_pred = np.array(y_true), np.array(y_pred)
	mean_y_true = np.mean(y_true)
	ss_total = np.sum((y_true - mean_y_true) ** 2)
	ss_residual = np.sum((y_true - y_pred) ** 2)
	r2 = 1 - (ss_residual / ss_total)
	return r2

	def predict_and_evaluate(csv_file, days_to_predict,freq, country_name):
	'''
	The function predict_and_evaluate is designed to forecast electricity demand using the Prophet time series forecasting model and evaluate the forecast accuracy.

	Parameters:
	- csv_file: Path to the CSV file containing the historical electricity demand data with columns "ds" (datetime), "y" (target variable), and "temp" (temperature).
	- days_to_predict: Number of days into the future to make predictions for.
	- freq: Frequency of the time series data.
	- country_name: Name of the country code for which the forecast is being made.

	Steps:
	1. Read the CSV file into a DataFrame and parse the datetime column.
	2. Split the data into training and testing sets.
	3. Set default values for frequency, days to predict, and country name.
	4. Set parameters for the Prophet model including MCMC samples, changepoint prior scale, and seasonality prior scale.
	5. Fit the Prophet model on the training data, adding country holidays and temperature as regressors.
	6. Create a future DataFrame for prediction, setting regressors for both training and testing data.
	7. Predict future values using the fitted model and calculate forecast metrics including MAPE, RMSE, and R-squared.
	8. Plot the forecast using the forecast_plot function.
	9. Return the forecast metrics and the plot.
	'''

	df_model = pd.read_csv(csv_file)
	df_model.columns = ["ds", "y", "temp"]
	df_model['ds'] = pd.to_datetime(df_model['ds'])

	#Set parameters for the Prophet model
	split_from = 90 * 12
	train_data = df_model[:-split_from]
	test_data = df_model[-split_from:]
	freq = freq
	seasonality_prior_scale = 0.01
	changepoint_prior_scale = 0.05
	mcmc_samples = 50
	periods = days_to_predict * 12
	#Train and fit the Prophet model

	m = Prophet(mcmc_samples=mcmc_samples, changepoint_prior_scale=changepoint_prior_scale,
	seasonality_prior_scale=seasonality_prior_scale)
	m.add_country_holidays(country_name=country_name)
	m.add_regressor("temp", mode="additive")
	m.fit(train_data)
	#Create a future DataFrame for prediction, setting regressors for both training and testing data

	future = m.make_future_dataframe(periods=periods, freq=freq)
	train_idx = future["ds"].isin(train_data.ds)
	test_idx = ~train_idx

	reg = ["temp"]
	for r in reg:
	future.loc[train_idx, r] = train_data[r].to_list()
	for r in reg:
	future.loc[test_idx, r] = test_data.iloc[:periods][r].to_list()

	forecast = m.predict(future)
	forecast_days = forecast[forecast["ds"] >= test_data["ds"].iloc[0]]
	test_days = test_data[(test_data["ds"] >= test_data["ds"].iloc[0]) & (
	test_data["ds"] <= forecast_days["ds"].iloc[-1])]
	#Plot the forecast using the forecast_plot function

	plot = forecast_plot(forecast_days, test_days, days_to_predict)
	#Predict future values using the fitted model and calculate forecast metrics

	mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
	rmse = root_mean_squared_error(test_days["y"], forecast_days["yhat"])
	rsqr = r_squared(test_days["y"], forecast_days["yhat"])

	metrics = {
	"MAPE": round(mape,3),
	"RMSE": round(rmse,1),
	"R-squared": round(rsqr,3)
	}

	return metrics,plot

	csv_name = "merged_data_huggingface.csv"
	#df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
	#df_model = df_merged[["tsd", "settlement_date", "temp"]]
	#df_model.columns = ["y", "ds", "temp"]

	days_to_predict = 15 # Set the default value for days to predict
	country_name = "UK" # Set the default value for country to predict
	freq = "2H" # Set the default value for country to predict

	predict_and_evaluate(csv_name, days_to_predict, freq, country_name)


	'''
	This Gradio interface uses the `predict_and_evaluate` function to forecast electricity demand and evaluate the forecast accuracy.
	Users can upload a CSV file containing historical electricity demand data, specify the number of days to predict,
	and provide the data frequency and country code for holidays.

	The interface displays evaluation metrics (MAPE, RMSE, R-squared) and a plot comparing forecasted values against actual values.

	Example usage:
	- Upload the file "merged_data_huggingface.csv"
	- Set "Days to Predict" to 30
	- Enter "2H" for data frequency
	- Enter "UK" for the country code
	'''


	iface = gr.Interface(
	fn=predict_and_evaluate,
	inputs=[
	gr.File(label="CSV File"),
	gr.Slider(1, 90, value=30, step=1, label="Days to Predict"),
	gr.Textbox(label="Data Frequency", placeholder="Enter frequency (e.g., 2H for 2 hourly)"),
	gr.Textbox(label="Country Code", placeholder="Enter country code (e.g., UK)")
	],
	outputs=[
	gr.Textbox(label=" Evaluation Metrics"),
	"plot"

	],
	title="Prophet Electricty Load Forecasting Model",
	description="Upload a CSV file of time series data to generate electricty demand forecasts using Prophet. Update country code(eg UK or DE) for holidays and data frequency",


	examples=[
	["merged_data_huggingface.csv", 30, "2H", "UK"]
	]
	)

	iface.launch()