Spaces:

NaokiOkamoto
/

ESTYLEU_graduation_assignment

Runtime error

App Files Files Community

NaokiOkamoto commited on Mar 31, 2023

Commit

ec74bc1

•

1 Parent(s): 9ddce62

Create app.py

Browse files

Files changed (1) hide show

app.py +253 -0

app.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import pandas as pd
+import numpy as np
+import gradio as gr
+import datetime
+import calendar
+import matplotlib.pyplot as plt
+import japanize_matplotlib
+import matplotlib.dates as mdates
+from dateutil.relativedelta import relativedelta
+import datetime
+import datarobot as dr
+from function import get_fish_qty, get_estat, dr_prediction_deployment, prediction_func, train_modeling
+import yaml
+with open('config.yaml') as file:
+    config = yaml.safe_load(file.read())
+def retrain():
+    model_management_df = train_modeling.modeling()
+    model = dr.Model.get(project = dr.Project.get(model_management_df.iloc[0, :]['model_url'].split('/')[4]),
+                                         model_id = model_management_df.iloc[0, :]['model_url'].split('/')[-1])
+    feature_impact = pd.DataFrame(model.get_or_request_feature_impact())
+    feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True)
+    feature_impact = feature_impact.iloc[:20, :]
+    for i in range(len(feature_impact)):
+                feature_impact['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact['featureName'][i]
+    return model_management_df.iloc[0, :]['model_type'], model.metrics['RMSE']['holdout'], feature_impact
+def get_prediction_result():
+    today = datetime.datetime.now()
+    prediction_month = (today+relativedelta(months=1)).strftime('%Y%m')
+    month_days = month_days = [pd.to_datetime(prediction_month + str(i+1).zfill(2)) for i in range(calendar.monthrange((today+relativedelta(months=1)).year, (today+relativedelta(months=1)).month)[1])]
+    dfc = pd.DataFrame({'target_date':month_days})
+    df = prediction_func.prediction_to_dr(config['oil_price_url'], config['fuel_procurement_cost_url'])
+    df = df.loc[df['target_date'].astype(str).str[:6]==prediction_month]
+    df['target_date'] = pd.to_datetime(df['target_date'].astype(str))
+    df['forecast_point'] = pd.to_datetime(df['forecast_point'].astype(str))
+    df = pd.merge(dfc,
+                  df,
+                  on='target_date',
+                  how='left')
+    df.loc[df['forecast_point'].isnull(), 'forecast_point'] = df['target_date'].apply(lambda x:x-relativedelta(months=1))
+    df = df.loc[~((df['target_date']<(today+relativedelta(months=1)))&(df['電気代'].isnull()))]
+    df = df.rename(columns={'電気代':'電気代_予測'})
+    return df[['forecast_point', 'target_date', '電気代_予測']]
+def plot_prediction_result():
+    update = gr.LinePlot.update(
+        value=get_prediction_result(),
+        x="target_date",
+        y="電気代_予測",
+        title="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移",
+        width=500,
+        height=300,
+    )
+    return update
+def get_model_infomation():
+    token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'
+    endpoint = 'https://app.datarobot.com/api/v2'
+    dr.Client(
+                        endpoint=endpoint,
+                        token=token
+                    )
+    project = dr.Project.get([i for i in dr.Project.list() if '電気代予測' in str(i)][0].id)
+    model_df = pd.DataFrame(
+        [[model.id,
+          model.model_type,
+          model.metrics['RMSE']['validation'],
+          model.metrics['RMSE']['backtesting'],
+          model.metrics['RMSE']['holdout'],
+          model] for model in project.get_datetime_models() if model.model_type != 'Baseline Predictions Using Most Recent Value'],
+        columns=['ID', 'モデル名', 'バックテスト1', '全てのバックテスト', 'holdout', 'model'])
+    model_df = model_df.sort_values('holdout').reset_index(drop=True)
+    model = model_df['model'][0]
+    model_info = {}
+    model_info['RMSE'] = model.metrics['RMSE']['holdout']
+    model_info['model_type'] = model.model_type
+    model_info['model_type'] = model.model_type
+    feature_impact = pd.DataFrame(model.get_or_request_feature_impact())
+    feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True)
+    feature_impact = feature_impact.iloc[:20, :]
+    return model_info, feature_impact
+# def get_featuredrift():
+#     deployment = dr.Deployment.get(deployment_id='640d791796a6a52d92c368a0')
+#     target_drift = dr.models.TargetDrift.get(deployment.id)
+#     feature_drift_list = dr.models.FeatureDrift.list(deployment.id)
+#     drift_df = pd.DataFrame(
+#                                             {
+#                                             'feature_name':[target_drift.target_name],
+#                                             'drift_score':[target_drift.drift_score],
+#                                             'feature_impact':[1]
+#                                             }
+#                                         )
+#     drift_df = pd.concat([
+#                                     drift_df,
+#                                     pd.DataFrame(
+#                                                         [[
+#                                                             feature_drift.name,
+#                                                             feature_drift.drift_score,
+#                                                             feature_drift.feature_impact
+#                                                          ] for feature_drift in feature_drift_list
+#                                     ],
+#                                         columns=[ 'feature_name', 'drift_score', 'feature_impact']
+#                                     )
+#     ])
+#     start_point = (target_drift.period['start']+relativedelta(hours=9)).strftime("%Y / %m / %d %H:%M:%S")
+#     end_point = (target_drift.period['end']+relativedelta(hours=9)).strftime("%Y / %m / %d %H:%M:%S")
+#     return drift_df, start_point, end_point
+with gr.Blocks() as electoric_ploting:
+    gr.Markdown(
+                            """
+                            # その日の魚の卸売り量から、来月の家計データ月別支出の電気代を予測するAI
+                            使用データ
+                             * 東京卸売市場日報
+                             * 家計調査の月別支出
+                             * 原油価格データ
+                             * 燃料調達価格データ
+                            why
+                            電気代のtrendは原油価格などが大きく影響するが、細かい変化は気候に影響し、気候はある程度海水温に関連性があると考えられる。
+                            また、魚の卸売量は水揚げ量に関係し、水揚げ量は海水温に関係するという考えからモデルを作成。
+                             """
+                        )
+    with gr.Tab("予測結果"):
+        with gr.Row():
+            with gr.Column():
+                plot = gr.LinePlot(show_label=False)
+    #             plot = gr.Plot(label="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移")
+            with gr.Column():
+                df = get_prediction_result()
+                gr.Textbox(df['電気代_予測'].max(),
+                                label='現在までの予測値の最大値')
+                gr.Textbox(df['電気代_予測'].min(),
+                                label='現在までの予測値の最小値')
+                gr.Textbox(df['電気代_予測'].mean(),
+                                label='現在までの予測値の平均値')
+                gr.Textbox(df['電気代_予測'].median(),
+                                label='現在までの予測値の中央値')
+        with gr.Row():
+            gr.DataFrame(get_prediction_result)
+    with gr.Tab("モデル情報"):
+        gr.Markdown(
+                            """
+                            注意：
+                            再学習後はモデルのデプロイが自動で行われます。
+                            huggingfaceの使用上csvを上書きできないため。
+                             """
+                        )
+        retrain_btn= gr.Button(value="再学習")
+        with gr.Row():
+            with gr.Column():
+                model_info, feature_impact_df = get_model_infomation()
+                gr.Textbox(model_info['model_type'], label='現在のモデル')
+            with gr.Column():
+                output_model_type = gr.Textbox(label='再学習後のモデル')
+        with gr.Row():
+            with gr.Column():
+                gr.Textbox(model_info['RMSE'],label=f'Holdout RMSE精度')
+            with gr.Column():
+                output_acc = gr.Textbox(label='再学習後のHoldout RMSE精度')
+        with gr.Row():
+            with gr.Column():
+                for i in range(len(feature_impact_df)):
+                    feature_impact_df['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact_df['featureName'][i]
+                gr.BarPlot(value = feature_impact_df,
+                                title = '特徴量インパクト上位20',
+                                x = 'featureName',
+                                y = 'impactNormalized',
+                                tooltip=['impactNormalized'],
+                                x_title = '特徴量名',
+                                y_title = '特徴量インパクト_相対値',
+                                vertical=False,
+                                y_lim=[0, 1.2],
+                                width=400,
+                                height=300)
+            with gr.Column():
+                 output_plot = gr.BarPlot(title = '再学習後特徴量インパクト上位20',
+                                                    x = 'featureName',
+                                                    y = 'impactNormalized',
+                                                    tooltip=['impactNormalized'],
+                                                    x_title = '特徴量名',
+                                                    y_title = '特徴量インパクト_相対値',
+                                                    vertical=False,
+                                                    y_lim=[0, 1.2],
+                                                    width=400,
+                                                    height=300)
+    # with gr.Tab("データドリフト情報"):
+    #     result = get_featuredrift()
+    #     with gr.Row():
+    #         gr.Markdown(
+    #                     """
+    #                     こちらの図はデータドリフトと特徴量の有用性を表した図になっています。
+    #                     味方は以下の通り
+    #                      * ドリフトスコア：予測データに含まれるデータが、どれぐらい過去のデータに比べてずれが発生しているかを表しており、上に行けば行くほどズレが大きい
+    #                      * 特徴量の有用性：ターゲットの有用性を1とした時に、どれぐらいそれぞれの特徴量の有用性が高いかを表したもので、右に行くほど有用性が高い
+    #                      """
+    #                 )
+    #     with gr.Row():
+    #         drift_df = result[0]
+    #         start_point = result[1]
+    #         end_point = result[2]
+    #         gr.Textbox(f"{start_point}〜{end_point}",label=f'データドリフト確認期間')
+    #     with gr.Row():
+    #         if len(drift_df["drift_score"].unique())!=1:
+    #             gr.ScatterPlot(
+    #                                     drift_df,
+    #                                     x="feature_impact",
+    #                                     y="drift_score",
+    #                                     title="データドリフトとデータの有用性",
+    #                                     color_legend_title="Species",
+    #                                     x_title="特徴量の有用性",
+    #                                     y_title="ドリフトスコア",
+    #                                     x_lim = [-0.1, drift_df["feature_impact"].max()*1.4],
+    #                                     y_lim = [-0.1, drift_df["drift_score"].max()*1.4],
+    #                                     tooltip=["feature_name", "feature_impact", "drift_score"],
+    #                                     caption="",
+    #                                     height=500,
+    #                                     width=500
+    #                                 )
+    #         else:
+    #             gr.Markdown(
+    #                 """
+    #                 モデルの入れ替え後に予測が実行されていないためdriftは表示できません。
+    #                  """
+    #             )
+    retrain_btn.click(retrain, inputs=None, outputs = [output_model_type, output_acc, output_plot])
+    electoric_ploting.load(lambda: datetime.datetime.now(),
+                           None,
+                           # c_time2,
+                           every=3600)
+    dep = electoric_ploting.load(plot_prediction_result, None, plot, every=3600)
+    electoric_ploting.queue().launch()
+plt.close()