import pandas as pd import numpy as np import gradio as gr import datetime import calendar import matplotlib.pyplot as plt import japanize_matplotlib import matplotlib.dates as mdates from dateutil.relativedelta import relativedelta import datetime import datarobot as dr from function import get_fish_qty, get_estat, dr_prediction_deployment, prediction_func, train_modeling import yaml with open('config.yaml') as file: config = yaml.safe_load(file.read()) with open('modeling_config.yaml', 'w', encoding='utf-8') as f: config['A'] = 'A' yaml.dump(config, f, encoding='utf-8', allow_unicode=True) def retrain(): get_prediction_result(retrain = True) def get_prediction_result(retrain = False): today = datetime.datetime.now() if retrain: train_modeling.modeling() prediction_month = (today+relativedelta(months=1)).strftime('%Y%m') month_days = month_days = [pd.to_datetime(prediction_month + str(i+1).zfill(2)) for i in range(calendar.monthrange((today+relativedelta(months=1)).year, (today+relativedelta(months=1)).month)[1])] dfc = pd.DataFrame({'target_date':month_days}) df = prediction_func.prediction_to_dr(config['oil_price_url'], config['fuel_procurement_cost_url']) df = df.loc[df['target_date'].astype(str).str[:6]==prediction_month] df['target_date'] = pd.to_datetime(df['target_date'].astype(str)) df['forecast_point'] = pd.to_datetime(df['forecast_point'].astype(str)) df = pd.merge(dfc, df, on='target_date', how='left') df.loc[df['forecast_point'].isnull(), 'forecast_point'] = df['target_date'].apply(lambda x:x-relativedelta(months=1)) df = df.loc[~((df['target_date']<(today+relativedelta(months=1)))&(df['電気代'].isnull()))] df = df.rename(columns={'電気代':'電気代_予測'}) return df[['forecast_point', 'target_date', '電気代_予測']] def plot_prediction_result(): update = gr.LinePlot.update( value=get_prediction_result(), x="target_date", y="電気代_予測", title="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移", width=500, height=300, ) return update def get_train_newest_target_period(): df = pd.read_csv('data/train.csv') train_max_yearmonth = pd.to_datetime(str(df['年月'].max()), format='%Y%m').strftime('%Y年%m月') return train_max_yearmonth def get_newest_target_period(): df = get_estat.get_household_survey() expence_df = pd.DataFrame({'年月':df['時間軸(月次)'].unique()}) cate='3.1 電気代' temp_df = df.loc[df['品目分類(2020年改定)'] == cate] unit = temp_df['unit'].unique()[0] temp_df = temp_df.rename(columns={'value':f'{cate}_({unit})'}) expence_df = pd.merge(expence_df, temp_df[['時間軸(月次)', f'{cate}_({unit})']].rename(columns={'時間軸(月次)':'年月'}), on='年月', how='left') expence_df = expence_df.rename(columns={'3.1 電気代_(円)':'電気代'}) expence_df['年月'] = pd.to_datetime(expence_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int) target_max_yearmonth = pd.to_datetime(str(expence_df['年月'].max()), format='%Y%m').strftime('%Y年%m月') return target_max_yearmonth def get_model_infomation(): token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9' ### デモ環境これっぽい endpoint = 'https://app.datarobot.com/api/v2' dr.Client( endpoint=endpoint, token=token ) model_info = pd.read_csv('data/model_management.csv').iloc[-1, :] model = dr.Model.get(project = dr.Project.get(model_info['model_url'].split('/')[4]), model_id = model_info['model_url'].split('/')[-1]) feature_impact = pd.DataFrame(model.get_or_request_feature_impact()) feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True) feature_impact = feature_impact.iloc[:20, :] return model_info, feature_impact with gr.Blocks() as electoric_ploting: print('out put') pd.DataFrame({'A':[1]}).to_pickle('data.pickle') gr.Markdown( """ # その日の魚の卸売り量から、来月の家計データ月別支出の電気代を予測するAI 使用データ * 東京卸売市場日報 * 家計調査の月別支出 * 原油価格データ * 燃料調達価格データ why 電気代のtrendは原油価格などが大きく影響するが、細かい変化は気候に影響し、気候はある程度海水温に関連性があると考えられる。 また、魚の卸売量は水揚げ量に関係し、水揚げ量は海水温に関係するという考えからモデルを作成。 """ ) with gr.Row(): with gr.Column(): plot = gr.LinePlot(show_label=False) # plot = gr.Plot(label="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移") with gr.Column(): df = get_prediction_result() gr.Textbox(df['電気代_予測'].max(), label='現在までの予測値の最大値') gr.Textbox(df['電気代_予測'].min(), label='現在までの予測値の最小値') gr.Textbox(df['電気代_予測'].mean(), label='現在までの予測値の平均値') gr.Textbox(df['電気代_予測'].median(), label='現在までの予測値の中央値') with gr.Row(): gr.DataFrame(get_prediction_result) with gr.Column(): gr.Textbox(get_train_newest_target_period, label='現在の学習済みのターゲット値最新月') gr.Textbox(get_newest_target_period, label='現在の取得可能ターゲット値最新月') btn= gr.Button(value="再学習") btn.click(retrain, inputs=None, outputs=None) with gr.Row(): model_info, feature_impact_df = get_model_infomation() gr.Textbox(model_info['model_type'], label='現在のモデル') with gr.Row(): for i in range(len(feature_impact_df)): feature_impact_df['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact_df['featureName'][i] gr.BarPlot(value = feature_impact_df, title = '特徴量インパクト上位20', x = 'featureName', y = 'impactNormalized', tooltip=['impactNormalized'], x_title = '特徴量名', y_title = '特徴量インパクト_相対値', vertical=False, y_lim=[0, 1.2], width=400, height=300) # demo.load(make_plot, inputs=[button], outputs=[plot]) electoric_ploting.load(lambda: datetime.datetime.now(), None, # c_time2, every=3600) dep = electoric_ploting.load(plot_prediction_result, None, plot, every=3600) electoric_ploting.queue().launch() plt.close()