Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
import datetime | |
import calendar | |
import matplotlib.pyplot as plt | |
import japanize_matplotlib | |
import matplotlib.dates as mdates | |
from dateutil.relativedelta import relativedelta | |
import datetime | |
import datarobot as dr | |
from function import get_fish_qty, get_estat, dr_prediction_deployment, prediction_func, train_modeling | |
import yaml | |
with open('config.yaml') as file: | |
config = yaml.safe_load(file.read()) | |
def retrain(): | |
model_management_df = train_modeling.modeling() | |
model = dr.Model.get(project = dr.Project.get(model_management_df.iloc[0, :]['model_url'].split('/')[4]), | |
model_id = model_management_df.iloc[0, :]['model_url'].split('/')[-1]) | |
feature_impact = pd.DataFrame(model.get_or_request_feature_impact()) | |
feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True) | |
feature_impact = feature_impact.iloc[:20, :] | |
for i in range(len(feature_impact)): | |
feature_impact['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact['featureName'][i] | |
return model_management_df.iloc[0, :]['model_type'], model.metrics['RMSE']['holdout'], feature_impact | |
def get_prediction_result(): | |
today = datetime.datetime.now() | |
prediction_month = (today+relativedelta(months=1)).strftime('%Y%m') | |
month_days = month_days = [pd.to_datetime(prediction_month + str(i+1).zfill(2)) for i in range(calendar.monthrange((today+relativedelta(months=1)).year, (today+relativedelta(months=1)).month)[1])] | |
dfc = pd.DataFrame({'target_date':month_days}) | |
df = prediction_func.prediction_to_dr(config['oil_price_url'], config['fuel_procurement_cost_url']) | |
df = df.loc[df['target_date'].astype(str).str[:6]==prediction_month] | |
df['target_date'] = pd.to_datetime(df['target_date'].astype(str)) | |
df['forecast_point'] = pd.to_datetime(df['forecast_point'].astype(str)) | |
df = pd.merge(dfc, | |
df, | |
on='target_date', | |
how='left') | |
df.loc[df['forecast_point'].isnull(), 'forecast_point'] = df['target_date'].apply(lambda x:x-relativedelta(months=1)) | |
df = df.loc[~((df['target_date']<(today+relativedelta(months=1)))&(df['電気代'].isnull()))] | |
df = df.rename(columns={'電気代':'電気代_予測'}) | |
return df[['forecast_point', 'target_date', '電気代_予測']] | |
def plot_prediction_result(): | |
update = gr.LinePlot.update( | |
value=get_prediction_result(), | |
x="target_date", | |
y="電気代_予測", | |
title="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移", | |
width=500, | |
height=300, | |
) | |
return update | |
def get_model_infomation(): | |
token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9' | |
endpoint = 'https://app.datarobot.com/api/v2' | |
dr.Client( | |
endpoint=endpoint, | |
token=token | |
) | |
project = dr.Project.get([i for i in dr.Project.list() if '電気代予測' in str(i)][0].id) | |
model_df = pd.DataFrame( | |
[[model.id, | |
model.model_type, | |
model.metrics['RMSE']['validation'], | |
model.metrics['RMSE']['backtesting'], | |
model.metrics['RMSE']['holdout'], | |
model] for model in project.get_datetime_models() if model.model_type != 'Baseline Predictions Using Most Recent Value'], | |
columns=['ID', 'モデル名', 'バックテスト1', '全てのバックテスト', 'holdout', 'model']) | |
model_df = model_df.sort_values('holdout').reset_index(drop=True) | |
model = model_df['model'][0] | |
model_info = {} | |
model_info['RMSE'] = model.metrics['RMSE']['holdout'] | |
model_info['model_type'] = model.model_type | |
model_info['model_type'] = model.model_type | |
feature_impact = pd.DataFrame(model.get_or_request_feature_impact()) | |
feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True) | |
feature_impact = feature_impact.iloc[:20, :] | |
return model_info, feature_impact | |
def get_featuredrift(): | |
deployment = dr.Deployment.get(deployment_id='640d791796a6a52d92c368a0') | |
target_drift = dr.models.TargetDrift.get(deployment.id) | |
feature_drift_list = dr.models.FeatureDrift.list(deployment.id) | |
drift_df = pd.DataFrame( | |
{ | |
'feature_name':[target_drift.target_name], | |
'drift_score':[target_drift.drift_score], | |
'feature_impact':[1] | |
} | |
) | |
drift_df = pd.concat([ | |
drift_df, | |
pd.DataFrame( | |
[[ | |
feature_drift.name, | |
feature_drift.drift_score, | |
feature_drift.feature_impact | |
] for feature_drift in feature_drift_list | |
], | |
columns=[ 'feature_name', 'drift_score', 'feature_impact'] | |
) | |
]) | |
start_point = (target_drift.period['start']+relativedelta(hours=9)).strftime("%Y / %m / %d %H:%M:%S") | |
end_point = (target_drift.period['end']+relativedelta(hours=9)).strftime("%Y / %m / %d %H:%M:%S") | |
# drift_df.loc[(drift_df['drift_score']>drift_threshold)&(drift_df['feature_impact']>impact_threshold), 'alert'] = '重要性の高く、大きなドリフト' | |
# drift_df.loc[(drift_df['drift_score']>drift_threshold)&(drift_df['feature_impact']<=impact_threshold), 'alert'] = '重要性低いが、大きなドリフト' | |
# drift_df.loc[(drift_df['drift_score']<=drift_threshold), 'alert'] = '正常' | |
return drift_df, start_point, end_point | |
with gr.Blocks() as electoric_ploting: | |
gr.Markdown( | |
""" | |
# その日の魚の卸売り量から、来月の家計データ月別支出の電気代を予測するAI | |
使用データ | |
* 東京卸売市場日報 | |
* 家計調査の月別支出 | |
* 原油価格データ | |
* 燃料調達価格データ | |
why | |
電気代のtrendは原油価格などが大きく影響するが、細かい変化は気候に影響し、気候はある程度海水温に関連性があると考えられる。 | |
また、魚の卸売量は水揚げ量に関係し、水揚げ量は海水温に関係するという考えからモデルを作成。 | |
""" | |
) | |
with gr.Tab("予測結果"): | |
with gr.Row(): | |
with gr.Column(): | |
plot = gr.LinePlot(show_label=False) | |
# plot = gr.Plot(label="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移") | |
with gr.Column(): | |
df = get_prediction_result() | |
gr.Textbox(df['電気代_予測'].max(), | |
label='現在までの予測値の最大値') | |
gr.Textbox(df['電気代_予測'].min(), | |
label='現在までの予測値の最小値') | |
gr.Textbox(df['電気代_予測'].mean(), | |
label='現在までの予測値の平均値') | |
gr.Textbox(df['電気代_予測'].median(), | |
label='現在までの予測値の中央値') | |
with gr.Row(): | |
gr.DataFrame(get_prediction_result) | |
with gr.Tab("モデル情報"): | |
gr.Markdown( | |
""" | |
注意: | |
再学習後はモデルのデプロイが自動で行われます。 | |
huggingfaceの使用上csvを上書きできないため。 | |
""" | |
) | |
retrain_btn= gr.Button(value="再学習") | |
with gr.Row(): | |
with gr.Column(): | |
model_info, feature_impact_df = get_model_infomation() | |
gr.Textbox(model_info['model_type'], label='現在のモデル') | |
with gr.Column(): | |
output_model_type = gr.Textbox(label='再学習後のモデル') | |
with gr.Row(): | |
with gr.Column(): | |
gr.Textbox(model_info['RMSE'],label=f'Holdout RMSE精度') | |
with gr.Column(): | |
output_acc = gr.Textbox(label='再学習後のHoldout RMSE精度') | |
with gr.Row(): | |
with gr.Column(): | |
for i in range(len(feature_impact_df)): | |
feature_impact_df['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact_df['featureName'][i] | |
gr.BarPlot(value = feature_impact_df, | |
title = '特徴量インパクト上位20', | |
x = 'featureName', | |
y = 'impactNormalized', | |
tooltip=['impactNormalized'], | |
x_title = '特徴量名', | |
y_title = '特徴量インパクト_相対値', | |
vertical=False, | |
y_lim=[0, 1.2], | |
width=400, | |
height=300) | |
with gr.Column(): | |
output_plot = gr.BarPlot(title = '再学習後特徴量インパクト上位20', | |
x = 'featureName', | |
y = 'impactNormalized', | |
tooltip=['impactNormalized'], | |
x_title = '特徴量名', | |
y_title = '特徴量インパクト_相対値', | |
vertical=False, | |
y_lim=[0, 1.2], | |
width=400, | |
height=300) | |
with gr.Tab("データドリフト情報"): | |
result = get_featuredrift() | |
with gr.Row(): | |
gr.Markdown( | |
""" | |
こちらの図はデータドリフトと特徴量の有用性を表した図になっています。 | |
味方は以下の通り | |
* ドリフトスコア:予測データに含まれるデータが、どれぐらい過去のデータに比べてずれが発生しているかを表しており、上に行けば行くほどズレが大きい | |
* 特徴量の有用性:ターゲットの有用性を1とした時に、どれぐらいそれぞれの特徴量の有用性が高いかを表したもので、右に行くほど有用性が高い | |
""" | |
) | |
with gr.Row(): | |
drift_df = result[0] | |
start_point = result[1] | |
end_point = result[2] | |
gr.Textbox(f"{start_point}〜{end_point}",label=f'データドリフト確認期間') | |
with gr.Row(): | |
if len(drift_df["drift_score"].unique())!=1: | |
gr.ScatterPlot( | |
drift_df, | |
x="feature_impact", | |
y="drift_score", | |
title="データドリフトとデータの有用性", | |
color_legend_title="Species", | |
x_title="特徴量の有用性", | |
y_title="ドリフトスコア", | |
x_lim = [-0.1, drift_df["feature_impact"].max()*1.4], | |
y_lim = [-0.1, drift_df["drift_score"].max()*1.4], | |
tooltip=["feature_name", "feature_impact", "drift_score"], | |
caption="", | |
height=500, | |
width=500 | |
) | |
else: | |
gr.Markdown( | |
""" | |
モデルの入れ替え後に予測が実行されていないためdriftは表示できません。 | |
""" | |
) | |
retrain_btn.click(retrain, inputs=None, outputs = [output_model_type, output_acc, output_plot]) | |
electoric_ploting.load(lambda: datetime.datetime.now(), | |
None, | |
# c_time2, | |
every=3600) | |
dep = electoric_ploting.load(plot_prediction_result, None, plot, every=3600) | |
electoric_ploting.launch() | |
plt.close() |