NaokiOkamoto's picture
Upload 6 files
a4b2e63
raw
history blame
No virus
6.65 kB
import pandas as pd
import numpy as np
import gradio as gr
import datetime
from dateutil.relativedelta import relativedelta
from function import get_fish_qty, get_estat, dr_prediction_deployment
import yaml
with open('config.yaml') as file:
config = yaml.safe_load(file.read())
def create_prediction_data(fish_sell_ach, oil_price_url, fuel_procurement_cost_url):
oil_price_df = pd.read_excel(oil_price_url, header=5)
oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月')
oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:x+relativedelta(months=3))
oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
oil_price_df = oil_price_df.drop(['年', '月'], axis=1)
for i in oil_price_df.columns:
if i != '年月':
oil_price_df = oil_price_df.rename(columns={i:f'{i}_lag3'})
oil_price_df[f'{i}_lag3'] = oil_price_df[f'{i}_lag3'].shift(1)
fuel_procurement_cost_df = pd.read_excel(fuel_procurement_cost_url, header=4)
fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
for i in fuel_procurement_cost_df.columns:
if '\n' in i:
fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
col_list=['するめいか_卸売数量計(kg)',
'いわし_卸売数量計(kg)',
'ぶり・わらさ_卸売数量計(kg)',
'冷さけ_卸売数量計(kg)',
'塩さけ_卸売数量計(kg)',
'さけます類_卸売数量計(kg)',
'全卸売数量計(kg)']
for shift_i in [7, 14, 21, 28]:
change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
fish_sell_ach[change_col_list] = fish_sell_ach[col_list].shift(shift_i)
fish_sell_ach['target_date'] = fish_sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
fish_sell_ach['年月'] = fish_sell_ach['target_date'].astype(str).str[:6].astype(int)
prediction_df = pd.merge(fish_sell_ach,
oil_price_df,
on='年月',
how='left')
for kind in fuel_procurement_cost_df['種別'].unique():
temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
for i in temp_df.columns:
if i != '年月':
temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
temp_df['年月'] = pd.to_datetime(temp_df['年月'], format='%Y%m')
temp_df['年月'] = temp_df['年月'].apply(lambda x:x+relativedelta(months=1))
temp_df['年月'] = temp_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
prediction_df = pd.merge(prediction_df,
temp_df,
on='年月')
prediction_df = prediction_df.rename(columns={'date':'forecast_point'})
return prediction_df
def prediction_to_dr(oil_price_url, fuel_procurement_cost_url):
today = datetime.datetime.now()
last_prediction_result = pd.read_csv('data/prediction_result.csv')
last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
if (str(last_prediction_result['forecast_point'].max()) == today.strftime('%Y%m%d'))|(str(last_time_fish_arch['date'].max()) == int(today.strftime('%Y%m%d'))):
pass
else:
start_date = pd.to_datetime(str(last_time_fish_arch['date'].max()))
end_date = pd.to_datetime(today + relativedelta(days=1))
use_fish_list = config['use_fish_list']
temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
if str(temp_sell_ach['date'].max()) != today.strftime('%Y%m%d'):
pass
else:
temp_sell_ach = pd.concat([last_time_fish_arch,
temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
temp_sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
prediction_df = create_prediction_data(temp_sell_ach,
oil_price_url,
fuel_procurement_cost_url)
prediction_df = prediction_df.loc[(prediction_df['forecast_point'].astype(int)>last_prediction_result['forecast_point'].max())
&
(prediction_df['forecast_point'].astype(int)<=int(today.strftime('%Y%m%d')))].reset_index(drop=True)
display(prediction_df)
DEPLOYMENT_ID = '640d791796a6a52d92c368a0'
prediction_df.to_csv('data/temp_prediction.csv', index=False)
prediction_json = dr_prediction_deployment.main('data/temp_prediction.csv', DEPLOYMENT_ID)
prediction_result = pd.DataFrame({
'target_date':prediction_df['target_date'],
'forecast_point':prediction_df['forecast_point'],
'電気代':pd.json_normalize(prediction_json['data'])['prediction']
})
prediction_result = pd.merge(prediction_df,
prediction_result,
on=['target_date', 'forecast_point'])
last_prediction_result = pd.concat([last_prediction_result,
prediction_result])
last_prediction_result.to_csv('data/prediction_result.csv', index=False)
return last_prediction_result