NaokiOkamoto's picture
Update function/prediction_func.py
71dcd58
raw
history blame
No virus
6.61 kB
import pandas as pd
import numpy as np
import gradio as gr
import datetime
from dateutil.relativedelta import relativedelta
from function import get_fish_qty, get_estat, dr_prediction_deployment
import yaml
with open('config.yaml') as file:
config = yaml.safe_load(file.read())
def create_prediction_data(fish_sell_ach, oil_price_url, fuel_procurement_cost_url):
oil_price_df = pd.read_excel(oil_price_url, header=5)
oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月')
oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:x+relativedelta(months=3))
oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
oil_price_df = oil_price_df.drop(['年', '月'], axis=1)
for i in oil_price_df.columns:
if i != '年月':
oil_price_df = oil_price_df.rename(columns={i:f'{i}_lag3'})
oil_price_df[f'{i}_lag3'] = oil_price_df[f'{i}_lag3'].shift(1)
fuel_procurement_cost_df = pd.read_excel(fuel_procurement_cost_url, header=4)
fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
for i in fuel_procurement_cost_df.columns:
if '\n' in i:
fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
col_list=['するめいか_卸売数量計(kg)',
'いわし_卸売数量計(kg)',
'ぶり・わらさ_卸売数量計(kg)',
'冷さけ_卸売数量計(kg)',
'塩さけ_卸売数量計(kg)',
'さけます類_卸売数量計(kg)',
'全卸売数量計(kg)']
for shift_i in [7, 14, 21, 28]:
change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
fish_sell_ach[change_col_list] = fish_sell_ach[col_list].shift(shift_i)
fish_sell_ach['target_date'] = fish_sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
fish_sell_ach['年月'] = fish_sell_ach['target_date'].astype(str).str[:6].astype(int)
prediction_df = pd.merge(fish_sell_ach,
oil_price_df,
on='年月',
how='left')
for kind in fuel_procurement_cost_df['種別'].unique():
temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
for i in temp_df.columns:
if i != '年月':
temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
temp_df['年月'] = pd.to_datetime(temp_df['年月'], format='%Y%m')
temp_df['年月'] = temp_df['年月'].apply(lambda x:x+relativedelta(months=1))
temp_df['年月'] = temp_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
prediction_df = pd.merge(prediction_df,
temp_df,
on='年月')
prediction_df = prediction_df.rename(columns={'date':'forecast_point'})
return prediction_df
def prediction_to_dr(oil_price_url, fuel_procurement_cost_url):
today = datetime.datetime.now()
last_prediction_result = pd.read_csv('data/prediction_result.csv')
last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
if (str(last_prediction_result['forecast_point'].max()) == today.strftime('%Y%m%d'))|(str(last_time_fish_arch['date'].max()) == int(today.strftime('%Y%m%d'))):
pass
else:
start_date = pd.to_datetime(str(last_time_fish_arch['date'].max()))
end_date = pd.to_datetime(today + relativedelta(days=1))
use_fish_list = config['use_fish_list']
temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
if str(temp_sell_ach['date'].max()) != today.strftime('%Y%m%d'):
pass
else:
temp_sell_ach = pd.concat([last_time_fish_arch,
temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
temp_sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
prediction_df = create_prediction_data(temp_sell_ach,
oil_price_url,
fuel_procurement_cost_url)
prediction_df = prediction_df.loc[(prediction_df['forecast_point'].astype(int)>last_prediction_result['forecast_point'].max())
&
(prediction_df['forecast_point'].astype(int)<=int(today.strftime('%Y%m%d')))].reset_index(drop=True)
DEPLOYMENT_ID = '640d791796a6a52d92c368a0'
prediction_df.to_csv('data/temp_prediction.csv', index=False)
prediction_json = dr_prediction_deployment.main('data/temp_prediction.csv', DEPLOYMENT_ID)
prediction_result = pd.DataFrame({
'target_date':prediction_df['target_date'],
'forecast_point':prediction_df['forecast_point'],
'電気代':pd.json_normalize(prediction_json['data'])['prediction']
})
prediction_result = pd.merge(prediction_df,
prediction_result,
on=['target_date', 'forecast_point'])
last_prediction_result = pd.concat([last_prediction_result,
prediction_result])
last_prediction_result.to_csv('data/prediction_result.csv', index=False)
return last_prediction_result