|
import pandas as pd |
|
import numpy as np |
|
import gradio as gr |
|
import datetime |
|
import calendar |
|
import matplotlib.pyplot as plt |
|
import japanize_matplotlib |
|
import matplotlib.dates as mdates |
|
from dateutil.relativedelta import relativedelta |
|
import datetime |
|
import datarobot as dr |
|
from function import get_fish_qty, get_estat, dr_prediction_deployment, prediction_func, train_modeling |
|
|
|
import yaml |
|
with open('config.yaml') as file: |
|
config = yaml.safe_load(file.read()) |
|
|
|
def retrain(): |
|
model_management_df = train_modeling.modeling() |
|
|
|
model = dr.Model.get(project = dr.Project.get(model_management_df.iloc[0, :]['model_url'].split('/')[4]), |
|
model_id = model_management_df.iloc[0, :]['model_url'].split('/')[-1]) |
|
feature_impact = pd.DataFrame(model.get_or_request_feature_impact()) |
|
feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True) |
|
feature_impact = feature_impact.iloc[:20, :] |
|
for i in range(len(feature_impact)): |
|
feature_impact['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact['featureName'][i] |
|
|
|
return model_management_df.iloc[0, :]['model_type'], model.metrics['RMSE']['holdout'], feature_impact |
|
|
|
|
|
def get_prediction_result(): |
|
today = datetime.datetime.now() |
|
prediction_month = (today+relativedelta(months=1)).strftime('%Y%m') |
|
month_days = month_days = [pd.to_datetime(prediction_month + str(i+1).zfill(2)) for i in range(calendar.monthrange((today+relativedelta(months=1)).year, (today+relativedelta(months=1)).month)[1])] |
|
dfc = pd.DataFrame({'target_date':month_days}) |
|
df = prediction_func.prediction_to_dr(config['oil_price_url'], config['fuel_procurement_cost_url']) |
|
df = df.loc[df['target_date'].astype(str).str[:6]==prediction_month] |
|
df['target_date'] = pd.to_datetime(df['target_date'].astype(str)) |
|
df['forecast_point'] = pd.to_datetime(df['forecast_point'].astype(str)) |
|
df = pd.merge(dfc, |
|
df, |
|
on='target_date', |
|
how='left') |
|
df.loc[df['forecast_point'].isnull(), 'forecast_point'] = df['target_date'].apply(lambda x:x-relativedelta(months=1)) |
|
df = df.loc[~((df['target_date']<(today+relativedelta(months=1)))&(df['電気代'].isnull()))] |
|
df = df.rename(columns={'電気代':'電気代_予測'}) |
|
return df[['forecast_point', 'target_date', '電気代_予測']] |
|
|
|
def plot_prediction_result(): |
|
update = gr.LinePlot.update( |
|
value=get_prediction_result(), |
|
x="target_date", |
|
y="電気代_予測", |
|
title="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移", |
|
width=500, |
|
height=300, |
|
) |
|
return update |
|
|
|
def get_model_infomation(): |
|
token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9' |
|
endpoint = 'https://app.datarobot.com/api/v2' |
|
dr.Client( |
|
endpoint=endpoint, |
|
token=token |
|
) |
|
project = dr.Project.get([i for i in dr.Project.list() if '電気代予測' in str(i)][0].id) |
|
|
|
model_df = pd.DataFrame( |
|
[[model.id, |
|
model.model_type, |
|
model.metrics['RMSE']['validation'], |
|
model.metrics['RMSE']['backtesting'], |
|
model.metrics['RMSE']['holdout'], |
|
model] for model in project.get_datetime_models() if model.model_type != 'Baseline Predictions Using Most Recent Value'], |
|
columns=['ID', 'モデル名', 'バックテスト1', '全てのバックテスト', 'holdout', 'model']) |
|
model_df = model_df.sort_values('holdout').reset_index(drop=True) |
|
|
|
model = model_df['model'][0] |
|
model_info = {} |
|
model_info['RMSE'] = model.metrics['RMSE']['holdout'] |
|
model_info['model_type'] = model.model_type |
|
model_info['model_type'] = model.model_type |
|
|
|
feature_impact = pd.DataFrame(model.get_or_request_feature_impact()) |
|
feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True) |
|
feature_impact = feature_impact.iloc[:20, :] |
|
|
|
|
|
return model_info, feature_impact |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as electoric_ploting: |
|
gr.Markdown( |
|
""" |
|
# その日の魚の卸売り量から、来月の家計データ月別支出の電気代を予測するAI |
|
使用データ |
|
* 東京卸売市場日報 |
|
* 家計調査の月別支出 |
|
* 原油価格データ |
|
* 燃料調達価格データ |
|
why |
|
電気代のtrendは原油価格などが大きく影響するが、細かい変化は気候に影響し、気候はある程度海水温に関連性があると考えられる。 |
|
また、魚の卸売量は水揚げ量に関係し、水揚げ量は海水温に関係するという考えからモデルを作成。 |
|
""" |
|
) |
|
with gr.Tab("予測結果"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
plot = gr.LinePlot(show_label=False) |
|
|
|
with gr.Column(): |
|
df = get_prediction_result() |
|
gr.Textbox(df['電気代_予測'].max(), |
|
label='現在までの予測値の最大値') |
|
gr.Textbox(df['電気代_予測'].min(), |
|
label='現在までの予測値の最小値') |
|
gr.Textbox(df['電気代_予測'].mean(), |
|
label='現在までの予測値の平均値') |
|
gr.Textbox(df['電気代_予測'].median(), |
|
label='現在までの予測値の中央値') |
|
with gr.Row(): |
|
gr.DataFrame(get_prediction_result) |
|
|
|
|
|
with gr.Tab("モデル情報"): |
|
gr.Markdown( |
|
""" |
|
注意: |
|
再学習後はモデルのデプロイが自動で行われます。 |
|
huggingfaceの使用上csvを上書きできないため。 |
|
""" |
|
) |
|
retrain_btn= gr.Button(value="再学習") |
|
with gr.Row(): |
|
with gr.Column(): |
|
model_info, feature_impact_df = get_model_infomation() |
|
gr.Textbox(model_info['model_type'], label='現在のモデル') |
|
|
|
with gr.Column(): |
|
output_model_type = gr.Textbox(label='再学習後のモデル') |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Textbox(model_info['RMSE'],label=f'Holdout RMSE精度') |
|
with gr.Column(): |
|
output_acc = gr.Textbox(label='再学習後のHoldout RMSE精度') |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
for i in range(len(feature_impact_df)): |
|
feature_impact_df['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact_df['featureName'][i] |
|
gr.BarPlot(value = feature_impact_df, |
|
title = '特徴量インパクト上位20', |
|
x = 'featureName', |
|
y = 'impactNormalized', |
|
tooltip=['impactNormalized'], |
|
x_title = '特徴量名', |
|
y_title = '特徴量インパクト_相対値', |
|
vertical=False, |
|
y_lim=[0, 1.2], |
|
width=400, |
|
height=300) |
|
with gr.Column(): |
|
output_plot = gr.BarPlot(title = '再学習後特徴量インパクト上位20', |
|
x = 'featureName', |
|
y = 'impactNormalized', |
|
tooltip=['impactNormalized'], |
|
x_title = '特徴量名', |
|
y_title = '特徴量インパクト_相対値', |
|
vertical=False, |
|
y_lim=[0, 1.2], |
|
width=400, |
|
height=300) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
retrain_btn.click(retrain, inputs=None, outputs = [output_model_type, output_acc, output_plot]) |
|
|
|
electoric_ploting.load(lambda: datetime.datetime.now(), |
|
None, |
|
|
|
every=3600) |
|
dep = electoric_ploting.load(plot_prediction_result, None, plot, every=3600) |
|
|
|
electoric_ploting.queue().launch() |
|
|
|
plt.close() |