Spaces:

NaokiOkamoto
/

ESTYLEU_graduation_assignment_a

Paused

File size: 8,077 Bytes

import pandas as pd
import numpy as np
import gradio as gr
import datetime
import calendar
import matplotlib.pyplot as plt
import japanize_matplotlib
import matplotlib.dates as mdates
from dateutil.relativedelta import relativedelta
import datetime
import datarobot as dr
from function import get_fish_qty, get_estat, dr_prediction_deployment, prediction_func, train_modeling

import yaml
with open('config.yaml') as file:
    config = yaml.safe_load(file.read())

with open('modeling_config.yaml', 'w', encoding='utf-8') as f:
    config['A'] = 'A'
    yaml.dump(config, f, encoding='utf-8', allow_unicode=True)
    
def retrain():
    get_prediction_result(retrain = True)

def get_prediction_result(retrain = False):
    today = datetime.datetime.now()
    if retrain:
        train_modeling.modeling()
    prediction_month = (today+relativedelta(months=1)).strftime('%Y%m')
    month_days = month_days = [pd.to_datetime(prediction_month + str(i+1).zfill(2)) for i in range(calendar.monthrange((today+relativedelta(months=1)).year, (today+relativedelta(months=1)).month)[1])]
    dfc = pd.DataFrame({'target_date':month_days})
    df = prediction_func.prediction_to_dr(config['oil_price_url'], config['fuel_procurement_cost_url'])
    df = df.loc[df['target_date'].astype(str).str[:6]==prediction_month]
    df['target_date'] = pd.to_datetime(df['target_date'].astype(str))
    df['forecast_point'] = pd.to_datetime(df['forecast_point'].astype(str))
    df = pd.merge(dfc,
                  df,
                  on='target_date',
                  how='left')
    df.loc[df['forecast_point'].isnull(), 'forecast_point'] = df['target_date'].apply(lambda x:x-relativedelta(months=1))
    df = df.loc[~((df['target_date']<(today+relativedelta(months=1)))&(df['電気代'].isnull()))]
    df = df.rename(columns={'電気代':'電気代_予測'})
    return df[['forecast_point', 'target_date', '電気代_予測']]

def plot_prediction_result():
    update = gr.LinePlot.update(
        value=get_prediction_result(),
        x="target_date",
        y="電気代_予測",
        title="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移",
        width=500,
        height=300,
    )
    return update
    
def get_train_newest_target_period():
    df = pd.read_csv('data/train.csv')
    train_max_yearmonth = pd.to_datetime(str(df['年月'].max()), format='%Y%m').strftime('%Y年%m月')
    return train_max_yearmonth
    
def get_newest_target_period():
    df = get_estat.get_household_survey()
    expence_df = pd.DataFrame({'年月':df['時間軸（月次）'].unique()})
    cate='3.1 電気代'
    temp_df = df.loc[df['品目分類（2020年改定）'] == cate]
    unit = temp_df['unit'].unique()[0]
    temp_df = temp_df.rename(columns={'value':f'{cate}_({unit})'})
    expence_df = pd.merge(expence_df,
                          temp_df[['時間軸（月次）', f'{cate}_({unit})']].rename(columns={'時間軸（月次）':'年月'}),
                          on='年月',
                          how='left')
    expence_df = expence_df.rename(columns={'3.1 電気代_(円)':'電気代'})
    expence_df['年月'] = pd.to_datetime(expence_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
    target_max_yearmonth = pd.to_datetime(str(expence_df['年月'].max()), format='%Y%m').strftime('%Y年%m月')
    
    return target_max_yearmonth

def get_model_infomation():
    token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'
    ### デモ環境これっぽい
    endpoint = 'https://app.datarobot.com/api/v2' 
    dr.Client(
                        endpoint=endpoint,
                        token=token
                    )
    model_info = pd.read_csv('data/model_management.csv').iloc[-1, :]
    model = dr.Model.get(project = dr.Project.get(model_info['model_url'].split('/')[4]), model_id = model_info['model_url'].split('/')[-1])
    
    feature_impact = pd.DataFrame(model.get_or_request_feature_impact())
    feature_impact = feature_impact.sort_values('impactNormalized', ascending=False).reset_index(drop=True)
    feature_impact = feature_impact.iloc[:20, :]
    
    return model_info, feature_impact

with gr.Blocks() as electoric_ploting:
    print('out put')
    pd.DataFrame({'A':[1]}).to_pickle('data.pickle')
    gr.Markdown(
                            """
                            # その日の魚の卸売り量から、来月の家計データ月別支出の電気代を予測するAI
                            使用データ  
                             * 東京卸売市場日報
                             * 家計調査の月別支出
                             * 原油価格データ
                             * 燃料調達価格データ  
                            why  
                            電気代のtrendは原油価格などが大きく影響するが、細かい変化は気候に影響し、気候はある程度海水温に関連性があると考えられる。
                            また、魚の卸売量は水揚げ量に関係し、水揚げ量は海水温に関係するという考えからモデルを作成。
                             """
                        )
    with gr.Row():
        with gr.Column():
            plot = gr.LinePlot(show_label=False)
#             plot = gr.Plot(label="昨日までの魚の卸売り量から予測された、来月の2人世帯の平均電気料金の推移")
        with gr.Column():
            df = get_prediction_result()
            gr.Textbox(df['電気代_予測'].max(),
                            label='現在までの予測値の最大値')
            gr.Textbox(df['電気代_予測'].min(),
                            label='現在までの予測値の最小値')
            gr.Textbox(df['電気代_予測'].mean(),
                            label='現在までの予測値の平均値')
            gr.Textbox(df['電気代_予測'].median(),
                            label='現在までの予測値の中央値')
    with gr.Row():
        gr.DataFrame(get_prediction_result)

        with gr.Column():
            
            gr.Textbox(get_train_newest_target_period,
                            label='現在の学習済みのターゲット値最新月')
            gr.Textbox(get_newest_target_period,
                            label='現在の取得可能ターゲット値最新月')
            btn= gr.Button(value="再学習")
            btn.click(retrain, inputs=None, outputs=None)
            
            with gr.Row():
                model_info, feature_impact_df = get_model_infomation()
                gr.Textbox(model_info['model_type'],
                            label='現在のモデル')
            with gr.Row():
                for i in range(len(feature_impact_df)):
                    feature_impact_df['featureName'][i] = str(i+1).zfill(2) + '_' + feature_impact_df['featureName'][i]
                gr.BarPlot(value = feature_impact_df,
                                title = '特徴量インパクト上位20',
                                x = 'featureName',
                                y = 'impactNormalized',
                                tooltip=['impactNormalized'],
                                x_title = '特徴量名',
                                y_title = '特徴量インパクト_相対値',
                                vertical=False,
                                y_lim=[0, 1.2],
                                width=400,
                                height=300)
                
        

    # demo.load(make_plot, inputs=[button], outputs=[plot])
    
    electoric_ploting.load(lambda: datetime.datetime.now(), 
                           None,
                           # c_time2,
                           every=3600)
    dep = electoric_ploting.load(plot_prediction_result, None, plot, every=3600)

    electoric_ploting.queue().launch()
    
plt.close()