CSV / app.py
yokoha's picture
Create app.py
ab46b5d verified
raw
history blame
5.57 kB
import pandas as pd
import numpy as np
import datetime as dt
import warnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import plotly.graph_objects as go
import gradio as gr
warnings.filterwarnings("ignore")
# -----------------------------
# CONFIG
# -----------------------------
DATA_FILE = "202503-domae.parquet" # ๊ฐ™์€ ๊ฒฝ๋กœ์— ๋†“์—ฌ ์žˆ์–ด์•ผ ํ•จ
FORECAST_END_YEAR = 2030 # ์˜ˆ์ธก ์ข…๋ฃŒ ์—ฐ๋„(12์›”๊นŒ์ง€)
SEASONAL_PERIODS = 12 # ์›”๋ณ„ seasonality
# -----------------------------
# 1. ๋ฐ์ดํ„ฐ ์ ์žฌ & ์ „์ฒ˜๋ฆฌ
# -----------------------------
def load_data(path: str) -> pd.DataFrame:
"""Parquet โ†’ ์›”๋ณ„ ํ”ผ๋ฒ— ํ…Œ์ด๋ธ”(DateIndex, ์—ด: ํ’ˆ๋ชฉ, ๊ฐ’: ๊ฐ€๊ฒฉ)."""
df = pd.read_parquet(path)
# ๋‚ ์งœ ์ปฌ๋Ÿผ ์ƒ์„ฑ/์ •๊ทœํ™” (๋‘ ๊ฐ€์ง€ ์ผ€์ด์Šค ์ง€์›)
if "date" in df.columns:
df["date"] = pd.to_datetime(df["date"])
elif "PRCE_REG_MM" in df.columns:
df["date"] = pd.to_datetime(df["PRCE_REG_MM"].astype(str), format="%Y%m")
else:
raise ValueError("์ง€์›๋˜์ง€ ์•Š๋Š” ๋‚ ์งœ ์ปฌ๋Ÿผ ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
# ๊ธฐ๋ณธ ์ปฌ๋Ÿผ๋ช… ํ†ต์ผ
item_col = "PDLT_NM" if "PDLT_NM" in df.columns else "item"
price_col = "AVRG_PRCE" if "AVRG_PRCE" in df.columns else "price"
monthly = (
df.groupby(["date", item_col])[price_col]
.mean()
.reset_index()
)
pivot = (
monthly
.pivot(index="date", columns=item_col, values=price_col)
.sort_index()
)
# ์›” ์‹œ์ž‘์ผ MS ๋นˆ๋„๋กœ ์ •๋ ฌ
pivot.index = pd.to_datetime(pivot.index).to_period("M").to_timestamp()
return pivot
pivot = load_data(DATA_FILE)
products = pivot.columns.tolist()
# -----------------------------
# 2. ๊ณ ์œ  ๋ชจ๋ธ ์ •์˜ (Holtโ€‘Winters + fallback)
# -----------------------------
def _fit_forecast(series: pd.Series) -> pd.Series:
"""์›”๋ณ„ ์‹œ๊ณ„์—ด โ†’ 2025โ€‘04 ์ดํ›„ FORECAST_END_YEARโ€‘12๊นŒ์ง€ ์˜ˆ์ธก."""
# Ensure Monthly Start frequency
series = series.asfreq("MS")
# ์˜ˆ์ธก ๊ธฐ๊ฐ„ ๊ณ„์‚ฐ
last_date = series.index[-1]
end_date = dt.datetime(FORECAST_END_YEAR, 12, 1)
horizon = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
if horizon <= 0:
return pd.Series(dtype=float)
try:
model = ExponentialSmoothing(
series,
trend="add",
seasonal="mul",
seasonal_periods=SEASONAL_PERIODS,
initialization_method="estimated",
)
res = model.fit(optimized=True)
fc = res.forecast(horizon)
except Exception:
# ํ™€ํŠธ์œˆํ„ฐ์Šค ํ•™์Šต ์‹คํŒจ ์‹œ ๋‹จ์ˆœ CAGR ๊ธฐ๋ฐ˜ ์˜ˆ์ธก
growth = series.pct_change().fillna(0).mean()
fc = pd.Series(
[series.iloc[-1] * (1 + growth) ** i for i in range(1, horizon + 1)],
index=pd.date_range(
series.index[-1] + pd.DateOffset(months=1),
periods=horizon,
freq="MS",
),
)
return fc
# ํ’ˆ๋ชฉ๋ณ„ ์ „์ฒด ์‹œ๋ฆฌ์ฆˆ(๊ณผ๊ฑฐ+์˜ˆ์ธก) ์‚ฌ์ „ ๊ตฌ์ถ• โ†’ ์•ฑ ๋ฐ˜์‘ ์†๋„ ๊ฐœ์„ 
FULL_SERIES = {}
FORECASTS = {}
for item in products:
hist = pivot[item].dropna()
fc = _fit_forecast(hist)
FULL_SERIES[item] = pd.concat([hist, fc])
FORECASTS[item] = fc
# -----------------------------
# 3. ๋‚ด์ผ ๊ฐ€๊ฒฉ ์˜ˆ์ธก ํ•จ์ˆ˜
# -----------------------------
today = dt.date.today()
tomorrow = today + dt.timedelta(days=1)
def build_tomorrow_df() -> pd.DataFrame:
"""๋‚ด์ผ(์ผ ๋‹จ์œ„) ์˜ˆ์ƒ ๊ฐ€๊ฒฉ DataFrame ๋ฐ˜ํ™˜."""
preds = {}
for item, series in FULL_SERIES.items():
# ์ผ ๋‹จ์œ„ ์„ ํ˜• ๋ณด๊ฐ„
daily = series.resample("D").interpolate("linear")
preds[item] = round(daily.loc[tomorrow], 2) if tomorrow in daily.index else np.nan
return (
pd.DataFrame.from_dict(preds, orient="index", columns=[f"๋‚ด์ผ({tomorrow}) ์˜ˆ์ƒ๊ฐ€(KRW)"])
.sort_index()
)
tomorrow_df = build_tomorrow_df()
# -----------------------------
# 4. ์‹œ๊ฐํ™” ํ•จ์ˆ˜
# -----------------------------
def plot_item(item: str):
hist = pivot[item].dropna().asfreq("MS")
fc = FORECASTS[item]
fig = go.Figure()
fig.add_trace(go.Scatter(x=hist.index, y=hist.values, mode="lines", name="Historical"))
fig.add_trace(go.Scatter(x=fc.index, y=fc.values, mode="lines", name="Forecast"))
fig.update_layout(
title=f"{item} โ€“ Monthly Avg Price (1996โ€‘2025) & Forecast(2025โ€‘04โ†’2030โ€‘12)",
xaxis_title="Date",
yaxis_title="Price (KRW)",
legend=dict(orientation="h", y=1.02, x=0.01),
margin=dict(l=40, r=20, t=60, b=40),
)
return fig
# -----------------------------
# 5. Gradio UI
# -----------------------------
with gr.Blocks(title="๋„๋งค ๊ฐ€๊ฒฉ ์˜ˆ์ธกย App") as demo:
gr.Markdown("## ๐Ÿ“ˆ ๋„๋งค ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ (1996โ€‘2030)")
# ํ’ˆ๋ชฉ ์„ ํƒ โ†’ ๊ทธ๋ž˜ํ”„ ์—…๋ฐ์ดํŠธ
item_dd = gr.Dropdown(products, value=products[0], label="ํ’ˆ๋ชฉ ์„ ํƒ")
chart_out = gr.Plot(label="๊ฐ€๊ฒฉ ์ถ”์„ธ")
# ๋‚ด์ผ ๊ฐ€๊ฒฉ ํ‘œ (์ดˆ๊ธฐ ๊ณ ์ •)
gr.Markdown(f"### ๋‚ด์ผ({tomorrow}) ๊ฐ ํ’ˆ๋ชฉ ์˜ˆ์ƒ๊ฐ€ (KRW)")
tomorrow_table = gr.Dataframe(tomorrow_df, interactive=False, height=400)
def update_chart(product):
return plot_item(product)
item_dd.change(update_chart, inputs=item_dd, outputs=chart_out, queue=False)
# -----------------------------
# 6. ์‹คํ–‰ ์Šคํฌ๋ฆฝํŠธ ์—”ํŠธ๋ฆฌํฌ์ธํŠธ
# -----------------------------
if __name__ == "__main__":
demo.launch()