RaulSalinasHerr's picture
change path, no load data
bfe58e1
import pandas as pd
import numpy as np
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.base import ForecastingHorizon
from sktime.utils.plotting import plot_series
import matplotlib
import matplotlib.pyplot as plt
import gradio as gr
from enum import Enum
import pickle as pkl
import os
matplotlib.use("Agg")
class Sector(Enum):
Region = "Region"
Provincia = "Provincia"
Comuna = "Comuna"
class TypePrediction(Enum):
Origin = "Total Origin"
Destiny = "Total Destiny"
OriginDestiny = "Origin and destiny"
type_choices = [x.value for x in TypePrediction]
def load_data(path:str = "./data/trips.csv") -> pd.DataFrame:
"""load trips.csv data from path"""
read_params = {
"encoding": "latin_1",
"sep": ";",
"decimal": ","
}
return pd.read_csv(path, **read_params)
def to_date(month: int,year:int):
return pd.Timestamp(day=1, month=month, year = year)
def to_date_row(x):
month = x["month_value"]
year = x["Anio"]
return to_date(month, year)
def preprocess_data(data: pd.DataFrame, sector: Sector = Sector.Region) -> pd.DataFrame:
"""preprocess data, choose sector to get value"""
data.columns = [x.strip() for x in data.columns]
col_melt = list(data.columns[-12:]) #months as cols
mn = "month_name"
code_month = pd.DataFrame.from_dict(
data={
mn: col_melt,
"month_value": list(range(1,13))})
col_maintain = list(data.columns[:-12])
data_long = data.melt(
id_vars=col_maintain,
value_vars=col_melt,
var_name=mn).merge(
code_month,
how="left",
on=mn)
data_long["time_stamp"] = data_long.apply(
to_date_row,axis = 1)
unused_date_cols = ["Anio", "month_name", "month_value"]
data_long.drop(columns= unused_date_cols, inplace=True)
sector_name = sector.name
cut_sector = ["CUT {} Origen", "CUT {} Destino"]
col_sector = [x.format(sector_name) for x in cut_sector]
kv = ["time_stamp", "value"]
cols = col_sector.copy()
for lcol in kv:
cols.append(lcol)
# cols = col_sector.extend(kv)
data_sector = data_long[cols].copy()
col_sector.append("time_stamp")
data_agg = data_sector.groupby(by = col_sector).sum()
data_agg.value = np.int32(data_agg.value.values)
data_agg.query("value > 0", inplace = True)
data_agg.reset_index(inplace=True)
data_agg.set_index("time_stamp", inplace=True)
data_agg = data_agg.to_period("M")
renamer = {
data_agg.columns[0]: "sector_origin",
data_agg.columns[1]: "sector_destiny"
}
data_agg.rename(columns=renamer, inplace=True)
data_agg.reset_index(inplace=True)
data_agg.set_index(["sector_origin", "sector_destiny","time_stamp"],inplace=True)
return data_agg
def predict_dataframe(data_agg: pd.DataFrame, h:int = 12, prd:int = 24) -> ThetaForecaster:
"""predict dataframe
Args:
data_grouped (pd.DataFrame): grouped dataframe with values
h (int, optional): Window to forecast, in months
Returns:
pd.Series: _description_
"""
data_flat = data_agg.reset_index().copy()
periods = data_flat["time_stamp"].unique()
last_2years = periods[-prd:]
data_flat = data_flat[data_flat["time_stamp"].isin(last_2years)]
data_flat.set_index(["sector_origin", "sector_destiny", "time_stamp"], inplace=True)
# forecasters = [
# # ("TBATS", TBATS(sp = 12)),
# ("Theta", ThetaForecaster(sp= 12)),
# ("ETS", AutoETS(sp = 12))
# ]
# forecaster = AutoEnsembleForecaster(
# forecasters=forecasters, test_size= 0.15)
forecaster = ThetaForecaster(sp=12)
fh = ForecastingHorizon(np.arange(1,h), is_relative= True)
forecaster = forecaster.fit(y = data_flat, fh=fh)
return forecaster
def create_plot(
data_agg: pd.DataFrame,
pred: pd.DataFrame,
# pred_inter: pd.DataFrame,
sector_origin: int | None = 1,
sector_destiny: int | None = 1,
type_prediction: str = TypePrediction.OriginDestiny.value):
def to_series(
data: pd.DataFrame,
is_multi: bool = False) -> pd.DataFrame:
df = data.reset_index().copy()
if type_prediction == TypePrediction.Destiny.value:
qry = "sector_destiny == {}".format(sector_destiny)
elif type_prediction == TypePrediction.Origin.value:
qry = "sector_origin == {}".format(sector_origin)
else:
qry = "sector_origin == {} & sector_destiny == {}".format(sector_origin, sector_destiny)
if not is_multi:
df.query(qry, inplace=True)
else:
df = df[(df.iloc[:, 0] == sector_origin) & (df.iloc[:, 1] == sector_destiny)]
if type_prediction == TypePrediction.Origin.value:
df = df.groupby(["sector_origin", "time_stamp"]).sum(numeric_only= True).reset_index()
elif type_prediction == TypePrediction.Destiny.value:
df = df.groupby(["sector_destiny", "time_stamp"]).sum(numeric_only=True).reset_index()
drop_cols = ["sector_origin", "sector_destiny"]
if is_multi:
drop_cols = [(x, "", "") for x in drop_cols]
return df.drop(columns=drop_cols).set_index("time_stamp").squeeze()
x = to_series(data_agg)
y = to_series(pred)
if type_prediction == TypePrediction.Destiny.value:
title = "Total monthly touristic travels to region {}".format(sector_destiny)
if type_prediction == TypePrediction.Origin.value:
title = "Total monthly touristic travels from region {}".format(sector_origin)
elif type_prediction == TypePrediction.OriginDestiny.value:
title = "Monthly touristic travels from region {} to region {}".format(
sector_origin, sector_destiny)
fig, _ = plot_series(x,y,labels=["value", "forecast"],title=title)
return fig
def save_object(object, path:str):
with open(path, "wb") as file:
pkl.dump(object, file)
def load_object(path: str):
with open(path, "rb") as file:
return pkl.load(file)
def run(argv = None):
path_preprocessed = "./data_preprocessed.pkl"
path_forecaster = "./forecaster.pkl"
data_preprocessed = load_object(path_preprocessed)
forecaster = predict_dataframe(data_preprocessed)
pred = forecaster.predict()
def wrapper(sector_origin, sector_destiny, type_prediction):
sector_origin = int(sector_origin)
sector_destiny = int(sector_destiny)
return create_plot(
data_preprocessed, pred,
sector_origin, sector_destiny, type_prediction)
params_slider = {
"minimum": 1,
"maximum": 16,
"step": 1
}
port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
with gr.Blocks() as app:
input_type = gr.components.Radio(
choices= type_choices,
value = type_choices[-1],
type = "value",
label = "Prediction Aggregation")
with gr.Tab("Region"):
input_origin = gr.components.Slider(
**params_slider, label = "Origin"
)
input_destiny = gr.components.Slider(
**params_slider, label= "Destiny")
predict_region_btn = gr.Button("Predict region")
output_plot = gr.Plot()
predict_region_btn.click(
fn = wrapper,
inputs = [input_origin, input_destiny, input_type],
outputs = output_plot,
api_name= "predict_region"
)
app.launch(
server_name= "0.0.0.0",
server_port=port,
share=False)
if __name__ == "__main__":
run()