Spaces:

RaulSalinasHerr
/

chileanModelPrediction

Sleeping

App Files Files Community

chileanModelPrediction / app.py

RaulSalinasHerr

change path, no load data

bfe58e1 over 1 year ago

raw

history blame contribute delete

8.15 kB

	import pandas as pd
	import numpy as np
	from sktime.forecasting.theta import ThetaForecaster
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.utils.plotting import plot_series

	import matplotlib
	import matplotlib.pyplot as plt

	import gradio as gr
	from enum import Enum
	import pickle as pkl
	import os


	matplotlib.use("Agg")

	class Sector(Enum):
	Region = "Region"
	Provincia = "Provincia"
	Comuna = "Comuna"

	class TypePrediction(Enum):
	Origin = "Total Origin"
	Destiny = "Total Destiny"
	OriginDestiny = "Origin and destiny"

	type_choices = [x.value for x in TypePrediction]


	def load_data(path:str = "./data/trips.csv") -> pd.DataFrame:
	"""load trips.csv data from path"""

	read_params = {
	"encoding": "latin_1",
	"sep": ";",
	"decimal": ","
	}

	return pd.read_csv(path, **read_params)

	def to_date(month: int,year:int):
	return pd.Timestamp(day=1, month=month, year = year)

	def to_date_row(x):
	month = x["month_value"]
	year = x["Anio"]
	return to_date(month, year)

	def preprocess_data(data: pd.DataFrame, sector: Sector = Sector.Region) -> pd.DataFrame:
	"""preprocess data, choose sector to get value"""

	data.columns = [x.strip() for x in data.columns]

	col_melt = list(data.columns[-12:]) #months as cols

	mn = "month_name"

	code_month = pd.DataFrame.from_dict(
	data={
	mn: col_melt,
	"month_value": list(range(1,13))})

	col_maintain = list(data.columns[:-12])

	data_long = data.melt(
	id_vars=col_maintain,
	value_vars=col_melt,
	var_name=mn).merge(
	code_month,
	how="left",
	on=mn)

	data_long["time_stamp"] = data_long.apply(
	to_date_row,axis = 1)

	unused_date_cols = ["Anio", "month_name", "month_value"]

	data_long.drop(columns= unused_date_cols, inplace=True)

	sector_name = sector.name
	cut_sector = ["CUT {} Origen", "CUT {} Destino"]

	col_sector = [x.format(sector_name) for x in cut_sector]
	kv = ["time_stamp", "value"]

	cols = col_sector.copy()

	for lcol in kv:
	cols.append(lcol)
	# cols = col_sector.extend(kv)

	data_sector = data_long[cols].copy()

	col_sector.append("time_stamp")
	data_agg = data_sector.groupby(by = col_sector).sum()
	data_agg.value = np.int32(data_agg.value.values)
	data_agg.query("value > 0", inplace = True)


	data_agg.reset_index(inplace=True)
	data_agg.set_index("time_stamp", inplace=True)
	data_agg = data_agg.to_period("M")

	renamer = {
	data_agg.columns[0]: "sector_origin",
	data_agg.columns[1]: "sector_destiny"
	}

	data_agg.rename(columns=renamer, inplace=True)
	data_agg.reset_index(inplace=True)


	data_agg.set_index(["sector_origin", "sector_destiny","time_stamp"],inplace=True)

	return data_agg


	def predict_dataframe(data_agg: pd.DataFrame, h:int = 12, prd:int = 24) -> ThetaForecaster:
	"""predict dataframe

	Args:
	data_grouped (pd.DataFrame): grouped dataframe with values
	h (int, optional): Window to forecast, in months

	Returns:
	pd.Series: _description_
	"""




	data_flat = data_agg.reset_index().copy()
	periods = data_flat["time_stamp"].unique()
	last_2years = periods[-prd:]
	data_flat = data_flat[data_flat["time_stamp"].isin(last_2years)]
	data_flat.set_index(["sector_origin", "sector_destiny", "time_stamp"], inplace=True)


	# forecasters = [
	# # ("TBATS", TBATS(sp = 12)),
	# ("Theta", ThetaForecaster(sp= 12)),
	# ("ETS", AutoETS(sp = 12))
	# ]


	# forecaster = AutoEnsembleForecaster(
	# forecasters=forecasters, test_size= 0.15)

	forecaster = ThetaForecaster(sp=12)

	fh = ForecastingHorizon(np.arange(1,h), is_relative= True)

	forecaster = forecaster.fit(y = data_flat, fh=fh)
	return forecaster


	def create_plot(
	data_agg: pd.DataFrame,
	pred: pd.DataFrame,
	# pred_inter: pd.DataFrame,
	sector_origin: int \| None = 1,
	sector_destiny: int \| None = 1,
	type_prediction: str = TypePrediction.OriginDestiny.value):


	def to_series(
	data: pd.DataFrame,
	is_multi: bool = False) -> pd.DataFrame:

	df = data.reset_index().copy()
	if type_prediction == TypePrediction.Destiny.value:
	qry = "sector_destiny == {}".format(sector_destiny)

	elif type_prediction == TypePrediction.Origin.value:
	qry = "sector_origin == {}".format(sector_origin)

	else:
	qry = "sector_origin == {} & sector_destiny == {}".format(sector_origin, sector_destiny)

	if not is_multi:
	df.query(qry, inplace=True)
	else:
	df = df[(df.iloc[:, 0] == sector_origin) & (df.iloc[:, 1] == sector_destiny)]

	if type_prediction == TypePrediction.Origin.value:
	df = df.groupby(["sector_origin", "time_stamp"]).sum(numeric_only= True).reset_index()

	elif type_prediction == TypePrediction.Destiny.value:
	df = df.groupby(["sector_destiny", "time_stamp"]).sum(numeric_only=True).reset_index()


	drop_cols = ["sector_origin", "sector_destiny"]

	if is_multi:
	drop_cols = [(x, "", "") for x in drop_cols]

	return df.drop(columns=drop_cols).set_index("time_stamp").squeeze()


	x = to_series(data_agg)
	y = to_series(pred)

	if type_prediction == TypePrediction.Destiny.value:

	title = "Total monthly touristic travels to region {}".format(sector_destiny)


	if type_prediction == TypePrediction.Origin.value:
	title = "Total monthly touristic travels from region {}".format(sector_origin)


	elif type_prediction == TypePrediction.OriginDestiny.value:


	title = "Monthly touristic travels from region {} to region {}".format(
	sector_origin, sector_destiny)

	fig, _ = plot_series(x,y,labels=["value", "forecast"],title=title)

	return fig

	def save_object(object, path:str):
	with open(path, "wb") as file:
	pkl.dump(object, file)

	def load_object(path: str):
	with open(path, "rb") as file:
	return pkl.load(file)




	def run(argv = None):

	path_preprocessed = "./data_preprocessed.pkl"
	path_forecaster = "./forecaster.pkl"

	data_preprocessed = load_object(path_preprocessed)
	forecaster = predict_dataframe(data_preprocessed)
	pred = forecaster.predict()

	def wrapper(sector_origin, sector_destiny, type_prediction):

	sector_origin = int(sector_origin)
	sector_destiny = int(sector_destiny)

	return create_plot(
	data_preprocessed, pred,
	sector_origin, sector_destiny, type_prediction)



	params_slider = {
	"minimum": 1,
	"maximum": 16,
	"step": 1
	}


	port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))



	with gr.Blocks() as app:

	input_type = gr.components.Radio(
	choices= type_choices,
	value = type_choices[-1],
	type = "value",
	label = "Prediction Aggregation")

	with gr.Tab("Region"):

	input_origin = gr.components.Slider(
	**params_slider, label = "Origin"
	)

	input_destiny = gr.components.Slider(
	**params_slider, label= "Destiny")

	predict_region_btn = gr.Button("Predict region")

	output_plot = gr.Plot()

	predict_region_btn.click(
	fn = wrapper,
	inputs = [input_origin, input_destiny, input_type],
	outputs = output_plot,
	api_name= "predict_region"
	)


	app.launch(
	server_name= "0.0.0.0",
	server_port=port,
	share=False)


	if __name__ == "__main__":
	run()