NaokiOkamoto commited on
Commit
a4b2e63
1 Parent(s): 1f8fd78

Upload 6 files

Browse files
function/.DS_Store ADDED
Binary file (6.15 kB). View file
 
function/dr_prediction_deployment.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import json
3
+ import requests
4
+
5
+ class DataRobotPredictionError(Exception):
6
+ """Raised if there are issues getting predictions from DataRobot"""
7
+
8
+
9
+ def make_datarobot_deployment_predictions(data, deployment_id):
10
+ """
11
+ Make predictions on data provided using DataRobot deployment_id provided.
12
+ See docs for details:
13
+ https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html
14
+
15
+ Parameters
16
+ ----------
17
+ data : str
18
+ If using CSV as input:
19
+ Feature1,Feature2
20
+ numeric_value,string
21
+
22
+ Or if using JSON as input:
23
+ [{"Feature1":numeric_value,"Feature2":"string"}]
24
+
25
+ deployment_id : str
26
+ The ID of the deployment to make predictions with.
27
+
28
+ Returns
29
+ -------
30
+ Response schema:
31
+ https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html#response-schema
32
+
33
+ Raises
34
+ ------
35
+ DataRobotPredictionError if there are issues getting predictions from DataRobot
36
+ """
37
+ # Set HTTP headers. The charset should match the contents of the file.
38
+ headers = {
39
+ # As default, we expect CSV as input data.
40
+ # Should you wish to supply JSON instead,
41
+ # comment out the line below and use the line after that instead:
42
+ 'Content-Type': 'text/plain; charset=UTF-8',
43
+ # 'Content-Type': 'application/json; charset=UTF-8',
44
+
45
+ 'Authorization': 'Bearer {}'.format('NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'),
46
+ 'DataRobot-Key': '84f96e49-d400-ec9c-92fc-30fc6e9329d1',
47
+ }
48
+ API_URL = 'https://jppdemo.orm.datarobot.com/predApi/v1.0/deployments/{deployment_id}/predictions'
49
+ url = API_URL.format(deployment_id=deployment_id)
50
+
51
+ # Prediction Explanations:
52
+ # See the documentation for more information:
53
+ # https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html#request-pred-explanations
54
+ # Should you wish to include Prediction Explanations or Prediction Warnings in the result,
55
+ # Change the parameters below accordingly, and remove the comment from the params field below:
56
+
57
+ params = {
58
+ # If explanations are required, uncomment the line below
59
+ # 'maxExplanations': 3,
60
+ # 'thresholdHigh': 0.5,
61
+ # 'thresholdLow': 0.15,
62
+ # If text explanations are required, uncomment the line below.
63
+ # 'maxNgramExplanations': 'all',
64
+ # Uncomment this for Prediction Warnings, if enabled for your deployment.
65
+ # 'predictionWarningEnabled': 'true',
66
+ }
67
+ # Make API request for predictions
68
+ predictions_response = requests.post(
69
+ url,
70
+ data=data,
71
+ headers=headers,
72
+ # Prediction Explanations:
73
+ # Uncomment this to include explanations in your prediction
74
+ # params=params,
75
+ )
76
+ _raise_dataroboterror_for_status(predictions_response)
77
+ # Return a Python dict following the schema in the documentation
78
+ return predictions_response.json()
79
+
80
+
81
+ def _raise_dataroboterror_for_status(response):
82
+ """Raise DataRobotPredictionError if the request fails along with the response returned"""
83
+ try:
84
+ response.raise_for_status()
85
+ except requests.exceptions.HTTPError:
86
+ err_msg = '{code} Error: {msg}'.format(
87
+ code=response.status_code, msg=response.text)
88
+ raise DataRobotPredictionError(err_msg)
89
+
90
+
91
+ def main(filename, deployment_id):
92
+ """
93
+ Return an exit code on script completion or error. Codes > 0 are errors to the shell.
94
+ Also useful as a usage demonstration of
95
+ `make_datarobot_deployment_predictions(data, deployment_id)`
96
+ """
97
+ MAX_PREDICTION_FILE_SIZE_BYTES = 52428800 # 50 MB
98
+ if not filename:
99
+ print(
100
+ 'Input file is required argument. '
101
+ 'Usage: python datarobot-predict.py <input-file.csv>')
102
+ return 1
103
+ data = open(filename, 'rb').read()
104
+ data_size = sys.getsizeof(data)
105
+ if data_size >= MAX_PREDICTION_FILE_SIZE_BYTES:
106
+ print((
107
+ 'Input file is too large: {} bytes. '
108
+ 'Max allowed size is: {} bytes.'
109
+ ).format(data_size, MAX_PREDICTION_FILE_SIZE_BYTES))
110
+ return 1
111
+ try:
112
+ predictions = make_datarobot_deployment_predictions(data, deployment_id)
113
+ except DataRobotPredictionError as exc:
114
+ print(exc)
115
+ return 1
116
+ return predictions
117
+
118
+ # def prediction_formatting_to_dataframe(prediction_json):
119
+ # prediction_df = pd.jason_normalize(prediction_json)[['rowId'
function/get_estat.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import datetime
3
+
4
+
5
+ def get_household_survey():
6
+ # e-Statにユーザー登録し、APIキーを取得しておくこと
7
+ # URL: https://www.e-stat.go.jp/api/
8
+ API_KEY = "ddc1349cf530bdee69ca6a7ad6c0e2301aeb0780"
9
+
10
+ # 取得年月の設定
11
+ latest_year = int(datetime.datetime.now().strftime('%Y'))
12
+ year_period = 5
13
+ years = list(range(latest_year, latest_year - year_period, -1))
14
+ months = range(1, 13)
15
+ periods = []
16
+ for y in years:
17
+ y = y * 1_000_000
18
+ for m in months:
19
+ ym = y + m * 100 + m
20
+ periods.append(str(ym))
21
+ periods = ("%2C").join(periods)
22
+
23
+ # データ取得
24
+ url = f"http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?cdTab=01&cdCat02=03&cdArea=00000&cdTime={periods}&appId={API_KEY}&lang=J&statsDataId=0003343671&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y&sectionHeaderFlg=1&replaceSpChars=0"
25
+ df = pd.read_csv(url, header=28)
26
+ return df
27
+
28
+
29
+ if __name__ == "__main__":
30
+ df = get_household_survey()
31
+ df.to_csv("household_survey.csv", index=False)
function/get_fish_qty.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import codecs
2
+ import io
3
+ import random
4
+ import requests
5
+ import time
6
+ from datetime import date, timedelta
7
+ from tqdm import tqdm
8
+ from typing import Generator, Tuple
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+
14
+ def date_range(
15
+ start: date, stop: date, step: timedelta = timedelta(1)
16
+ ) -> Generator[date, None, None]:
17
+ """startからendまで日付をstep日ずつループさせるジェネレータ"""
18
+ current = start
19
+ while current < stop:
20
+ yield current
21
+ current += step
22
+
23
+
24
+ def get_url(download_date: date) -> Tuple[str, str]:
25
+ """ダウンロードするURLと日付の文字列を返す"""
26
+ month = download_date.strftime("%Y%m")
27
+ day = download_date.strftime("%Y%m%d")
28
+ return (
29
+ f"https://www.shijou-nippo.metro.tokyo.lg.jp/SN/{month}/{day}/Sui/Sui_K1.csv",
30
+ day,
31
+ )
32
+
33
+
34
+ def content_wrap(content):
35
+ """1行目にヘッダ行が来るまでスキップする"""
36
+ buffer = ""
37
+ first = True
38
+ for line in io.BytesIO(content):
39
+ line_str = codecs.decode(line, "shift-jis")
40
+ if first:
41
+ if "品名" in line_str:
42
+ first = False
43
+ buffer = line_str
44
+ else:
45
+ continue
46
+ else:
47
+ buffer += line_str
48
+ return io.StringIO(buffer)
49
+
50
+
51
+ def to_numeric(x):
52
+ """文字列を数値に変換する"""
53
+ if isinstance(x, str):
54
+ return float(x)
55
+ else:
56
+ return x
57
+
58
+
59
+ def get_fish_price_data(start_date: date, end_date: date, use_fish_list) -> pd.core.frame.DataFrame:
60
+ """
61
+ 東京卸売市場からデータを引っ張ってくる
62
+
63
+ :param start_date: 開始日
64
+ :param end_date: 終了日
65
+ :return: あじの値段を結合したデータ
66
+ """
67
+ columns = ['date'] + [i + '_卸売数量計(kg)' for i in use_fish_list] + ['全卸売数量計(kg)']
68
+ fish_qty_df = pd.DataFrame(columns=columns)
69
+
70
+ iterator = tqdm(
71
+ date_range(start_date, end_date), total=(end_date - start_date).days
72
+ )
73
+
74
+ for download_date in iterator:
75
+ url, day = get_url(download_date)
76
+ iterator.set_description(day)
77
+ response = requests.get(url)
78
+
79
+ # URLが存在しないとき
80
+ temp_df = pd.DataFrame([{'date':day}])
81
+ if response.status_code == 404:
82
+ continue
83
+ assert (
84
+ response.status_code == 200
85
+ ), f"Unexpected HTTP response. Please check the website {url}."
86
+
87
+ df = pd.read_csv(content_wrap(response.content))
88
+
89
+
90
+ for i in use_fish_list:
91
+ temp = df.loc[df["品名"] == i, ['卸売数量計']]
92
+
93
+ # display(temp)
94
+
95
+ if len(temp) == 0:
96
+ temp_df[f'{i}_卸売数量計(kg)'] = 0
97
+
98
+
99
+ temp_df[f'{i}_卸売数量計(kg)'] = temp['卸売数量計'].sum()
100
+
101
+ all_qty = df[['卸売数量計']].dropna().values[-1][0]
102
+
103
+ temp_df['全卸売数量計(kg)'] = all_qty
104
+
105
+ fish_qty_df = pd.concat([fish_qty_df, temp_df])
106
+ time.sleep(max(0.5 + random.normalvariate(0, 0.3), 0.1))
107
+ return fish_qty_df
function/prediction_func.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ import datetime
5
+ from dateutil.relativedelta import relativedelta
6
+ from function import get_fish_qty, get_estat, dr_prediction_deployment
7
+
8
+ import yaml
9
+ with open('config.yaml') as file:
10
+ config = yaml.safe_load(file.read())
11
+
12
+ def create_prediction_data(fish_sell_ach, oil_price_url, fuel_procurement_cost_url):
13
+ oil_price_df = pd.read_excel(oil_price_url, header=5)
14
+ oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
15
+ oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
16
+ oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
17
+ oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月')
18
+ oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:x+relativedelta(months=3))
19
+ oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
20
+ oil_price_df = oil_price_df.drop(['年', '月'], axis=1)
21
+ for i in oil_price_df.columns:
22
+ if i != '年月':
23
+ oil_price_df = oil_price_df.rename(columns={i:f'{i}_lag3'})
24
+ oil_price_df[f'{i}_lag3'] = oil_price_df[f'{i}_lag3'].shift(1)
25
+
26
+ fuel_procurement_cost_df = pd.read_excel(fuel_procurement_cost_url, header=4)
27
+ fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
28
+ for i in fuel_procurement_cost_df.columns:
29
+ if '\n' in i:
30
+ fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
31
+
32
+ fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
33
+ fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
34
+ format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
35
+
36
+ col_list=['するめいか_卸売数量計(kg)',
37
+ 'いわし_卸売数量計(kg)',
38
+ 'ぶり・わらさ_卸売数量計(kg)',
39
+ '冷さけ_卸売数量計(kg)',
40
+ '塩さけ_卸売数量計(kg)',
41
+ 'さけます類_卸売数量計(kg)',
42
+ '全卸売数量計(kg)']
43
+
44
+ for shift_i in [7, 14, 21, 28]:
45
+ change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
46
+ fish_sell_ach[change_col_list] = fish_sell_ach[col_list].shift(shift_i)
47
+
48
+ fish_sell_ach['target_date'] = fish_sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
49
+ fish_sell_ach['年月'] = fish_sell_ach['target_date'].astype(str).str[:6].astype(int)
50
+
51
+ prediction_df = pd.merge(fish_sell_ach,
52
+ oil_price_df,
53
+ on='年月',
54
+ how='left')
55
+
56
+ for kind in fuel_procurement_cost_df['種別'].unique():
57
+ temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
58
+ temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
59
+ for i in temp_df.columns:
60
+ if i != '年月':
61
+ temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
62
+ temp_df['年月'] = pd.to_datetime(temp_df['年月'], format='%Y%m')
63
+ temp_df['年月'] = temp_df['年月'].apply(lambda x:x+relativedelta(months=1))
64
+ temp_df['年月'] = temp_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
65
+ prediction_df = pd.merge(prediction_df,
66
+ temp_df,
67
+ on='年月')
68
+ prediction_df = prediction_df.rename(columns={'date':'forecast_point'})
69
+
70
+ return prediction_df
71
+
72
+ def prediction_to_dr(oil_price_url, fuel_procurement_cost_url):
73
+ today = datetime.datetime.now()
74
+ last_prediction_result = pd.read_csv('data/prediction_result.csv')
75
+ last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
76
+ if (str(last_prediction_result['forecast_point'].max()) == today.strftime('%Y%m%d'))|(str(last_time_fish_arch['date'].max()) == int(today.strftime('%Y%m%d'))):
77
+ pass
78
+
79
+ else:
80
+ start_date = pd.to_datetime(str(last_time_fish_arch['date'].max()))
81
+ end_date = pd.to_datetime(today + relativedelta(days=1))
82
+ use_fish_list = config['use_fish_list']
83
+ temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
84
+ temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
85
+ if str(temp_sell_ach['date'].max()) != today.strftime('%Y%m%d'):
86
+ pass
87
+
88
+ else:
89
+ temp_sell_ach = pd.concat([last_time_fish_arch,
90
+ temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
91
+ temp_sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
92
+ prediction_df = create_prediction_data(temp_sell_ach,
93
+ oil_price_url,
94
+ fuel_procurement_cost_url)
95
+ prediction_df = prediction_df.loc[(prediction_df['forecast_point'].astype(int)>last_prediction_result['forecast_point'].max())
96
+ &
97
+ (prediction_df['forecast_point'].astype(int)<=int(today.strftime('%Y%m%d')))].reset_index(drop=True)
98
+
99
+ display(prediction_df)
100
+ DEPLOYMENT_ID = '640d791796a6a52d92c368a0'
101
+ prediction_df.to_csv('data/temp_prediction.csv', index=False)
102
+ prediction_json = dr_prediction_deployment.main('data/temp_prediction.csv', DEPLOYMENT_ID)
103
+
104
+ prediction_result = pd.DataFrame({
105
+ 'target_date':prediction_df['target_date'],
106
+ 'forecast_point':prediction_df['forecast_point'],
107
+ '電気代':pd.json_normalize(prediction_json['data'])['prediction']
108
+ })
109
+
110
+ prediction_result = pd.merge(prediction_df,
111
+ prediction_result,
112
+ on=['target_date', 'forecast_point'])
113
+
114
+ last_prediction_result = pd.concat([last_prediction_result,
115
+ prediction_result])
116
+
117
+ last_prediction_result.to_csv('data/prediction_result.csv', index=False)
118
+
119
+ return last_prediction_result
function/train_modeling.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ import datetime
5
+ from dateutil.relativedelta import relativedelta
6
+ import datarobot as dr
7
+ from function import get_fish_qty, get_estat, dr_prediction_deployment
8
+
9
+ import yaml
10
+ with open('config.yaml') as file:
11
+ config = yaml.safe_load(file.read())
12
+
13
+ def create_train_data():
14
+ # ターゲットを抽出
15
+ household_survey = get_estat.get_household_survey()
16
+ expence_df = pd.DataFrame({'年月':household_survey['時間軸(月次)'].unique()})
17
+ cate='3.1 電気代'
18
+ temp_df = household_survey.loc[household_survey['品目分類(2020年改定)'] == cate]
19
+ unit = temp_df['unit'].unique()[0]
20
+ temp_df = temp_df.rename(columns={'value':f'{cate}_({unit})'})
21
+ expence_df = pd.merge(expence_df,
22
+ temp_df[['時間軸(月次)', f'{cate}_({unit})']].rename(columns={'時間軸(月次)':'年月'}),
23
+ on='年月',
24
+ how='left')
25
+ expence_df = expence_df.rename(columns={'3.1 電気代_(円)':'電気代'})
26
+ expence_df['年月'] = pd.to_datetime(expence_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
27
+
28
+ # 原油価格を抽出し作成
29
+ oil_price_df = pd.read_excel(config['oil_price_url'], header=5)
30
+ oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
31
+ oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
32
+ oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
33
+ oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
34
+
35
+ # 燃料調達価格のデータを作成
36
+ fuel_procurement_cost_df = pd.read_excel(config['fuel_procurement_cost_url'], header=4)
37
+ fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
38
+ for i in fuel_procurement_cost_df.columns:
39
+ if '\n' in i:
40
+ fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
41
+
42
+ fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
43
+ fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
44
+ format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
45
+ for kind in fuel_procurement_cost_df['種別'].unique():
46
+ temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
47
+ temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
48
+ for i in temp_df.columns:
49
+ if i != '年月':
50
+ temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
51
+ temp_df[f'{i}_{kind}_lag1'] = temp_df[f'{i}_{kind}_lag1'].shift(1)
52
+ expence_df = pd.merge(expence_df,
53
+ temp_df,
54
+ on='年月',
55
+ how='left')
56
+
57
+ # 各データを結合
58
+ oil_price_df[['ブレント_lag3', 'ドバイ_lag3', 'WTI_lag3', 'OPECバスケット_lag3']] = oil_price_df[['ブレント', 'ドバイ', 'WTI', 'OPECバスケット']].shift(3)
59
+ expence_df = pd.merge(expence_df,
60
+ oil_price_df[['ブレント_lag3', 'ドバイ_lag3', 'WTI_lag3', 'OPECバスケット_lag3', '年月']],
61
+ on='年月',
62
+ how='left')
63
+
64
+ # 魚の卸売りデータを読み込み
65
+ last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
66
+ start_date = pd.to_datetime(str(int(last_time_fish_arch['date'].max())))
67
+ today = datetime.datetime.now()
68
+ end_date = pd.to_datetime(today + relativedelta(days=1))
69
+ use_fish_list = config['use_fish_list']
70
+ temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
71
+ temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
72
+ sell_ach = pd.concat([last_time_fish_arch,
73
+ temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
74
+ sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
75
+
76
+ # trainデータの作成
77
+ sell_ach['target_date'] = sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
78
+ sell_ach['年月'] = sell_ach['target_date'].astype(str).str[:6].astype(int)
79
+
80
+ col_list=['するめいか_卸売数量計(kg)',
81
+ 'いわし_卸売数量計(kg)',
82
+ 'ぶり・わらさ_卸売数量計(kg)',
83
+ '冷さけ_卸売数量計(kg)',
84
+ '塩さけ_卸売数量計(kg)',
85
+ 'さけます類_卸売数量計(kg)',
86
+ '全卸売数量計(kg)']
87
+
88
+ for shift_i in [7, 14, 21, 28]:
89
+ change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
90
+ sell_ach[change_col_list] = sell_ach[col_list].shift(shift_i)
91
+
92
+ sell_ach = sell_ach.rename(columns={'date':'forecast_point'})
93
+ train_df = pd.merge(expence_df,
94
+ sell_ach,
95
+ on='年月')
96
+ train_df.to_csv('data/train.csv', index=False)
97
+
98
+ return train_df
99
+
100
+
101
+ def modeling():
102
+ train_df = create_train_data()
103
+ # モデリングに必要な各設定値
104
+ ## データロボットとの接続設定
105
+ token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'
106
+ ### デモ環境これっぽい
107
+ endpoint = 'https://app.datarobot.com/api/v2'
108
+
109
+ ## プロジェクト名
110
+ project_name = f'{datetime.datetime.now().strftime("%Y%m%d")}_ESTYLEU_電気代予測_再学習'
111
+
112
+ ## 各種設定
113
+ ### 特徴量設定
114
+ target = '電気代'
115
+ feature_timeline = 'target_date' #時系列
116
+ not_use_feature = ['年月', 'forecast_point']
117
+ # 最適化指標
118
+ metric = 'RMSE'
119
+ ### ギャップ
120
+ gap='P0Y' # これで0?要確認
121
+ ### バックテストの数
122
+ number_of_backtests = 1
123
+ end_date = int(train_df[feature_timeline].max())
124
+ ### 日付
125
+ holdout_end_date=pd.to_datetime(str(end_date))
126
+ holdout_start_date=holdout_end_date - relativedelta(years=1)
127
+ backtest_end_date = holdout_start_date - relativedelta(days=1)
128
+ backtest_start_date = backtest_end_date - relativedelta(years=1)
129
+ train_end_date = backtest_start_date - relativedelta(days=1)
130
+ train_start_date = pd.to_datetime(str(int(train_df[feature_timeline].min())))
131
+
132
+ ### モデリングモード
133
+ # mode = dr.AUTOPILOT_MODE.QUICK
134
+ mode = dr.AUTOPILOT_MODE.FULL_AUTO
135
+ dr.Client(
136
+ endpoint=endpoint,
137
+ token=token
138
+ )
139
+
140
+ # バックテスト設定
141
+ backtests_setting = [dr.BacktestSpecification(
142
+ index=0,
143
+ primary_training_start_date=train_start_date,
144
+ primary_training_end_date=train_end_date,
145
+ validation_start_date=backtest_start_date,
146
+ validation_end_date=backtest_end_date
147
+ )]
148
+
149
+ spec = dr.DatetimePartitioningSpecification(
150
+ feature_timeline,
151
+ use_time_series=False,
152
+ disable_holdout=False,
153
+ holdout_start_date=holdout_start_date,
154
+ holdout_end_date=holdout_end_date,
155
+ gap_duration=gap,
156
+ number_of_backtests=number_of_backtests,
157
+ backtests=backtests_setting,
158
+ )
159
+
160
+ use_feature_list = train_df.columns.to_list()
161
+
162
+ print('now creating project')
163
+ project = dr.Project.create(
164
+ train_df,
165
+ project_name=project_name
166
+ )
167
+
168
+ raw = [feat_list for feat_list in project.get_featurelists() if feat_list.name == 'Informative Features'][0]
169
+ raw_features = [feat for feat in raw.features if f'{feature_timeline} ' in feat]
170
+
171
+ for i in not_use_feature:
172
+ if i in use_feature_list:
173
+ use_feature_list.remove(i)
174
+
175
+ use_feature_list = use_feature_list.extend(raw_features)
176
+ print("start modeling")
177
+ project.analyze_and_model(
178
+ target = target,
179
+ mode = mode,
180
+ partitioning_method=spec,
181
+ max_wait=3000,
182
+ worker_count=-1,
183
+ featurelist_id = project.create_featurelist('モデリング', use_feature_list).id
184
+ )
185
+ project.wait_for_autopilot()
186
+ project.unlock_holdout()
187
+
188
+ model_df = pd.DataFrame(
189
+ [[model.id,
190
+ model.model_type,
191
+ model.metrics['RMSE']['validation'],
192
+ model.metrics['RMSE']['backtesting'],
193
+ model.metrics['RMSE']['holdout'],
194
+ model] for model in project.get_datetime_models() if model.model_type != 'Baseline Predictions Using Most Recent Value'],
195
+ columns=['ID', 'モデル名', 'バックテスト1', '全てのバックテスト', 'holdout', 'model'])
196
+ model_df = model_df.sort_values('holdout').reset_index(drop=True)
197
+
198
+ model = model_df['model'][0]
199
+
200
+ try:
201
+ model_management_df = read_csv('data/model_management.csv')
202
+ except:
203
+ model_management_df = pd.DataFrame()
204
+
205
+ temp_model_management_df = pd.DataFrame({
206
+ '作成日':[int(datetime.datetime.now().strftime('%Y%m%d'))],
207
+ '作成時間':[int(datetime.datetime.now().strftime('%H%M%S'))],
208
+ 'project_url':[project.get_uri()],
209
+ 'model_url':[model.get_uri()],
210
+ 'model_type':[model.model_type]
211
+ })
212
+
213
+ model_management_df = pd.concat([model_management_df,
214
+ temp_model_management_df])
215
+ model_management_df.to_csv('data/model_management.csv')
216
+
217
+ deployment = dr.Deployment.get(deployment_id='640d791796a6a52d92c368a0')
218
+
219
+ deployment.replace_model(model.id, dr.enums.MODEL_REPLACEMENT_REASON.SCHEDULED_REFRESH)
220
+
221
+