NaokiOkamoto commited on
Commit
05fd13f
1 Parent(s): 953ca45

Delete function

Browse files
function/dr_prediction_deployment.py DELETED
@@ -1,119 +0,0 @@
1
- import sys
2
- import json
3
- import requests
4
-
5
- class DataRobotPredictionError(Exception):
6
- """Raised if there are issues getting predictions from DataRobot"""
7
-
8
-
9
- def make_datarobot_deployment_predictions(data, deployment_id):
10
- """
11
- Make predictions on data provided using DataRobot deployment_id provided.
12
- See docs for details:
13
- https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html
14
-
15
- Parameters
16
- ----------
17
- data : str
18
- If using CSV as input:
19
- Feature1,Feature2
20
- numeric_value,string
21
-
22
- Or if using JSON as input:
23
- [{"Feature1":numeric_value,"Feature2":"string"}]
24
-
25
- deployment_id : str
26
- The ID of the deployment to make predictions with.
27
-
28
- Returns
29
- -------
30
- Response schema:
31
- https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html#response-schema
32
-
33
- Raises
34
- ------
35
- DataRobotPredictionError if there are issues getting predictions from DataRobot
36
- """
37
- # Set HTTP headers. The charset should match the contents of the file.
38
- headers = {
39
- # As default, we expect CSV as input data.
40
- # Should you wish to supply JSON instead,
41
- # comment out the line below and use the line after that instead:
42
- 'Content-Type': 'text/plain; charset=UTF-8',
43
- # 'Content-Type': 'application/json; charset=UTF-8',
44
-
45
- 'Authorization': 'Bearer {}'.format('NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'),
46
- 'DataRobot-Key': '84f96e49-d400-ec9c-92fc-30fc6e9329d1',
47
- }
48
- API_URL = 'https://jppdemo.orm.datarobot.com/predApi/v1.0/deployments/{deployment_id}/predictions'
49
- url = API_URL.format(deployment_id=deployment_id)
50
-
51
- # Prediction Explanations:
52
- # See the documentation for more information:
53
- # https://app.datarobot.com/docs-jp/predictions/api/dr-predapi.html#request-pred-explanations
54
- # Should you wish to include Prediction Explanations or Prediction Warnings in the result,
55
- # Change the parameters below accordingly, and remove the comment from the params field below:
56
-
57
- params = {
58
- # If explanations are required, uncomment the line below
59
- # 'maxExplanations': 3,
60
- # 'thresholdHigh': 0.5,
61
- # 'thresholdLow': 0.15,
62
- # If text explanations are required, uncomment the line below.
63
- # 'maxNgramExplanations': 'all',
64
- # Uncomment this for Prediction Warnings, if enabled for your deployment.
65
- # 'predictionWarningEnabled': 'true',
66
- }
67
- # Make API request for predictions
68
- predictions_response = requests.post(
69
- url,
70
- data=data,
71
- headers=headers,
72
- # Prediction Explanations:
73
- # Uncomment this to include explanations in your prediction
74
- # params=params,
75
- )
76
- _raise_dataroboterror_for_status(predictions_response)
77
- # Return a Python dict following the schema in the documentation
78
- return predictions_response.json()
79
-
80
-
81
- def _raise_dataroboterror_for_status(response):
82
- """Raise DataRobotPredictionError if the request fails along with the response returned"""
83
- try:
84
- response.raise_for_status()
85
- except requests.exceptions.HTTPError:
86
- err_msg = '{code} Error: {msg}'.format(
87
- code=response.status_code, msg=response.text)
88
- raise DataRobotPredictionError(err_msg)
89
-
90
-
91
- def main(filename, deployment_id):
92
- """
93
- Return an exit code on script completion or error. Codes > 0 are errors to the shell.
94
- Also useful as a usage demonstration of
95
- `make_datarobot_deployment_predictions(data, deployment_id)`
96
- """
97
- MAX_PREDICTION_FILE_SIZE_BYTES = 52428800 # 50 MB
98
- if not filename:
99
- print(
100
- 'Input file is required argument. '
101
- 'Usage: python datarobot-predict.py <input-file.csv>')
102
- return 1
103
- data = open(filename, 'rb').read()
104
- data_size = sys.getsizeof(data)
105
- if data_size >= MAX_PREDICTION_FILE_SIZE_BYTES:
106
- print((
107
- 'Input file is too large: {} bytes. '
108
- 'Max allowed size is: {} bytes.'
109
- ).format(data_size, MAX_PREDICTION_FILE_SIZE_BYTES))
110
- return 1
111
- try:
112
- predictions = make_datarobot_deployment_predictions(data, deployment_id)
113
- except DataRobotPredictionError as exc:
114
- print(exc)
115
- return 1
116
- return predictions
117
-
118
- # def prediction_formatting_to_dataframe(prediction_json):
119
- # prediction_df = pd.jason_normalize(prediction_json)[['rowId'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
function/get_estat.py DELETED
@@ -1,31 +0,0 @@
1
- import pandas as pd
2
- import datetime
3
-
4
-
5
- def get_household_survey():
6
- # e-Statにユーザー登録し、APIキーを取得しておくこと
7
- # URL: https://www.e-stat.go.jp/api/
8
- API_KEY = "ddc1349cf530bdee69ca6a7ad6c0e2301aeb0780"
9
-
10
- # 取得年月の設定
11
- latest_year = int(datetime.datetime.now().strftime('%Y'))
12
- year_period = 5
13
- years = list(range(latest_year, latest_year - year_period, -1))
14
- months = range(1, 13)
15
- periods = []
16
- for y in years:
17
- y = y * 1_000_000
18
- for m in months:
19
- ym = y + m * 100 + m
20
- periods.append(str(ym))
21
- periods = ("%2C").join(periods)
22
-
23
- # データ取得
24
- url = f"http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?cdTab=01&cdCat02=03&cdArea=00000&cdTime={periods}&appId={API_KEY}&lang=J&statsDataId=0003343671&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y&sectionHeaderFlg=1&replaceSpChars=0"
25
- df = pd.read_csv(url, header=28)
26
- return df
27
-
28
-
29
- if __name__ == "__main__":
30
- df = get_household_survey()
31
- df.to_csv("household_survey.csv", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
function/get_fish_qty.py DELETED
@@ -1,107 +0,0 @@
1
- import codecs
2
- import io
3
- import random
4
- import requests
5
- import time
6
- from datetime import date, timedelta
7
- from tqdm import tqdm
8
- from typing import Generator, Tuple
9
-
10
- import numpy as np
11
- import pandas as pd
12
-
13
-
14
- def date_range(
15
- start: date, stop: date, step: timedelta = timedelta(1)
16
- ) -> Generator[date, None, None]:
17
- """startからendまで日付をstep日ずつループさせるジェネレータ"""
18
- current = start
19
- while current < stop:
20
- yield current
21
- current += step
22
-
23
-
24
- def get_url(download_date: date) -> Tuple[str, str]:
25
- """ダウンロードするURLと日付の文字列を返す"""
26
- month = download_date.strftime("%Y%m")
27
- day = download_date.strftime("%Y%m%d")
28
- return (
29
- f"https://www.shijou-nippo.metro.tokyo.lg.jp/SN/{month}/{day}/Sui/Sui_K1.csv",
30
- day,
31
- )
32
-
33
-
34
- def content_wrap(content):
35
- """1行目にヘッダ行が来るまでスキップする"""
36
- buffer = ""
37
- first = True
38
- for line in io.BytesIO(content):
39
- line_str = codecs.decode(line, "shift-jis")
40
- if first:
41
- if "品名" in line_str:
42
- first = False
43
- buffer = line_str
44
- else:
45
- continue
46
- else:
47
- buffer += line_str
48
- return io.StringIO(buffer)
49
-
50
-
51
- def to_numeric(x):
52
- """文字列を数値に変換する"""
53
- if isinstance(x, str):
54
- return float(x)
55
- else:
56
- return x
57
-
58
-
59
- def get_fish_price_data(start_date: date, end_date: date, use_fish_list) -> pd.core.frame.DataFrame:
60
- """
61
- 東京卸売市場からデータを引っ張ってくる
62
-
63
- :param start_date: 開始日
64
- :param end_date: 終了日
65
- :return: あじの値段を結合したデータ
66
- """
67
- columns = ['date'] + [i + '_卸売数量計(kg)' for i in use_fish_list] + ['全卸売数量計(kg)']
68
- fish_qty_df = pd.DataFrame(columns=columns)
69
-
70
- iterator = tqdm(
71
- date_range(start_date, end_date), total=(end_date - start_date).days
72
- )
73
-
74
- for download_date in iterator:
75
- url, day = get_url(download_date)
76
- iterator.set_description(day)
77
- response = requests.get(url)
78
-
79
- # URLが存在しないとき
80
- temp_df = pd.DataFrame([{'date':day}])
81
- if response.status_code == 404:
82
- continue
83
- assert (
84
- response.status_code == 200
85
- ), f"Unexpected HTTP response. Please check the website {url}."
86
-
87
- df = pd.read_csv(content_wrap(response.content))
88
-
89
-
90
- for i in use_fish_list:
91
- temp = df.loc[df["品名"] == i, ['卸売数量計']]
92
-
93
- # display(temp)
94
-
95
- if len(temp) == 0:
96
- temp_df[f'{i}_卸売数量計(kg)'] = 0
97
-
98
-
99
- temp_df[f'{i}_卸売数量計(kg)'] = temp['卸売数量計'].sum()
100
-
101
- all_qty = df[['卸売数量計']].dropna().values[-1][0]
102
-
103
- temp_df['全卸売数量計(kg)'] = all_qty
104
-
105
- fish_qty_df = pd.concat([fish_qty_df, temp_df])
106
- time.sleep(max(0.5 + random.normalvariate(0, 0.3), 0.1))
107
- return fish_qty_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
function/prediction_func.py DELETED
@@ -1,119 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import gradio as gr
4
- import datetime
5
- from dateutil.relativedelta import relativedelta
6
- from function import get_fish_qty, get_estat, dr_prediction_deployment
7
-
8
- import yaml
9
- with open('config/config.yaml') as file:
10
- config = yaml.safe_load(file.read())
11
-
12
- def create_prediction_data(fish_sell_ach, oil_price_url, fuel_procurement_cost_url):
13
- oil_price_df = pd.read_excel(oil_price_url, header=5)
14
- oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
15
- oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
16
- oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
17
- oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月')
18
- oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:x+relativedelta(months=3))
19
- oil_price_df['年月'] = oil_price_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
20
- oil_price_df = oil_price_df.drop(['年', '月'], axis=1)
21
- for i in oil_price_df.columns:
22
- if i != '年月':
23
- oil_price_df = oil_price_df.rename(columns={i:f'{i}_lag3'})
24
- oil_price_df[f'{i}_lag3'] = oil_price_df[f'{i}_lag3'].shift(1)
25
-
26
- fuel_procurement_cost_df = pd.read_excel(fuel_procurement_cost_url, header=4)
27
- fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
28
- for i in fuel_procurement_cost_df.columns:
29
- if '\n' in i:
30
- fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
31
-
32
- fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
33
- fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
34
- format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
35
-
36
- col_list=['するめいか_卸売数量計(kg)',
37
- 'いわし_卸売数量計(kg)',
38
- 'ぶり・わらさ_卸売数量計(kg)',
39
- '冷さけ_卸売数量計(kg)',
40
- '塩さけ_卸売数量計(kg)',
41
- 'さけます類_卸売数量計(kg)',
42
- '全卸売数量計(kg)']
43
-
44
- for shift_i in [7, 14, 21, 28]:
45
- change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
46
- fish_sell_ach[change_col_list] = fish_sell_ach[col_list].shift(shift_i)
47
-
48
- fish_sell_ach['target_date'] = fish_sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
49
- fish_sell_ach['年月'] = fish_sell_ach['target_date'].astype(str).str[:6].astype(int)
50
-
51
- prediction_df = pd.merge(fish_sell_ach,
52
- oil_price_df,
53
- on='年月',
54
- how='left')
55
-
56
- for kind in fuel_procurement_cost_df['種別'].unique():
57
- temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
58
- temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
59
- for i in temp_df.columns:
60
- if i != '年月':
61
- temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
62
- temp_df['年月'] = pd.to_datetime(temp_df['年月'], format='%Y%m')
63
- temp_df['年月'] = temp_df['年月'].apply(lambda x:x+relativedelta(months=1))
64
- temp_df['年月'] = temp_df['年月'].apply(lambda x:''.join(str(x).split('-'))[:6]).astype(int)
65
- prediction_df = pd.merge(prediction_df,
66
- temp_df,
67
- on='年月')
68
- prediction_df = prediction_df.rename(columns={'date':'forecast_point'})
69
-
70
- return prediction_df
71
-
72
- def prediction_to_dr(oil_price_url, fuel_procurement_cost_url):
73
- today = datetime.datetime.now()
74
- last_prediction_result = pd.read_csv('data/prediction_result.csv')
75
- last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
76
- if (str(last_prediction_result['forecast_point'].max()) == today.strftime('%Y%m%d'))|(str(last_time_fish_arch['date'].max()) == int(today.strftime('%Y%m%d'))):
77
- pass
78
-
79
- else:
80
- start_date = pd.to_datetime(str(last_time_fish_arch['date'].max()))
81
- end_date = pd.to_datetime(today + relativedelta(days=1))
82
- use_fish_list = config['use_fish_list']
83
- temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
84
- temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
85
- if str(temp_sell_ach['date'].max()) != today.strftime('%Y%m%d'):
86
- pass
87
-
88
- else:
89
- temp_sell_ach = pd.concat([last_time_fish_arch,
90
- temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
91
- temp_sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
92
- prediction_df = create_prediction_data(temp_sell_ach,
93
- oil_price_url,
94
- fuel_procurement_cost_url)
95
- prediction_df = prediction_df.loc[(prediction_df['forecast_point'].astype(int)>last_prediction_result['forecast_point'].max())
96
- &
97
- (prediction_df['forecast_point'].astype(int)<=int(today.strftime('%Y%m%d')))].reset_index(drop=True)
98
-
99
- display(prediction_df)
100
- DEPLOYMENT_ID = '640d791796a6a52d92c368a0'
101
- prediction_df.to_csv('data/temp_prediction.csv', index=False)
102
- prediction_json = dr_prediction_deployment.main('data/temp_prediction.csv', DEPLOYMENT_ID)
103
-
104
- prediction_result = pd.DataFrame({
105
- 'target_date':prediction_df['target_date'],
106
- 'forecast_point':prediction_df['forecast_point'],
107
- '電気代':pd.json_normalize(prediction_json['data'])['prediction']
108
- })
109
-
110
- prediction_result = pd.merge(prediction_df,
111
- prediction_result,
112
- on=['target_date', 'forecast_point'])
113
-
114
- last_prediction_result = pd.concat([last_prediction_result,
115
- prediction_result])
116
-
117
- last_prediction_result.to_csv('data/prediction_result.csv', index=False)
118
-
119
- return last_prediction_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
function/train_modeling.py DELETED
@@ -1,221 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import gradio as gr
4
- import datetime
5
- from dateutil.relativedelta import relativedelta
6
- import datarobot as dr
7
- from function import get_fish_qty, get_estat, dr_prediction_deployment
8
-
9
- import yaml
10
- with open('config/config.yaml') as file:
11
- config = yaml.safe_load(file.read())
12
-
13
- def create_train_data():
14
- # ターゲットを抽出
15
- household_survey = get_estat.get_household_survey()
16
- expence_df = pd.DataFrame({'年月':household_survey['時間軸(月次)'].unique()})
17
- cate='3.1 電気代'
18
- temp_df = household_survey.loc[household_survey['品目分類(2020年改定)'] == cate]
19
- unit = temp_df['unit'].unique()[0]
20
- temp_df = temp_df.rename(columns={'value':f'{cate}_({unit})'})
21
- expence_df = pd.merge(expence_df,
22
- temp_df[['時間軸(月次)', f'{cate}_({unit})']].rename(columns={'時間軸(月次)':'年月'}),
23
- on='年月',
24
- how='left')
25
- expence_df = expence_df.rename(columns={'3.1 電気代_(円)':'電気代'})
26
- expence_df['年月'] = pd.to_datetime(expence_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
27
-
28
- # 原油価格を抽出し作成
29
- oil_price_df = pd.read_excel(config['oil_price_url'], header=5)
30
- oil_price_df = oil_price_df.rename(columns={oil_price_df.columns[0]:'年'})
31
- oil_price_df['年'] = oil_price_df['年'].interpolate(method='ffill')
32
- oil_price_df['年月'] = oil_price_df['年'] + oil_price_df['月'].astype(str) + '月'
33
- oil_price_df['年月'] = pd.to_datetime(oil_price_df['年月'], format='%Y年%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
34
-
35
- # 燃料調達価格のデータを作成
36
- fuel_procurement_cost_df = pd.read_excel(config['fuel_procurement_cost_url'], header=4)
37
- fuel_procurement_cost_df = fuel_procurement_cost_df.iloc[:, 3:]
38
- for i in fuel_procurement_cost_df.columns:
39
- if '\n' in i:
40
- fuel_procurement_cost_df = fuel_procurement_cost_df.rename(columns={i:i.replace('\n', '')})
41
-
42
- fuel_procurement_cost_df['燃料費調整単価適用期間'] = fuel_procurement_cost_df['燃料費調整単価適用期間'].interpolate(method='ffill')
43
- fuel_procurement_cost_df['燃料費調整単価適用期間'] = pd.to_datetime(fuel_procurement_cost_df['燃料費調整単価適用期間'],
44
- format='%Y年\n%m月').astype(str).apply(lambda x:''.join(x.split('-'))[:6]).astype(int)
45
- for kind in fuel_procurement_cost_df['種別'].unique():
46
- temp_df = fuel_procurement_cost_df.loc[fuel_procurement_cost_df['種別']==kind].drop('種別', axis=1)
47
- temp_df = temp_df.rename(columns={temp_df.columns[0]:'年月'})
48
- for i in temp_df.columns:
49
- if i != '年月':
50
- temp_df = temp_df.rename(columns={i:f'{i}_{kind}_lag1'})
51
- temp_df[f'{i}_{kind}_lag1'] = temp_df[f'{i}_{kind}_lag1'].shift(1)
52
- expence_df = pd.merge(expence_df,
53
- temp_df,
54
- on='年月',
55
- how='left')
56
-
57
- # 各データを結合
58
- oil_price_df[['ブレント_lag3', 'ドバイ_lag3', 'WTI_lag3', 'OPECバスケット_lag3']] = oil_price_df[['ブレント', 'ドバイ', 'WTI', 'OPECバスケット']].shift(3)
59
- expence_df = pd.merge(expence_df,
60
- oil_price_df[['ブレント_lag3', 'ドバイ_lag3', 'WTI_lag3', 'OPECバスケット_lag3', '年月']],
61
- on='年月',
62
- how='left')
63
-
64
- # 魚の卸売りデータを読み込み
65
- last_time_fish_arch = pd.read_csv('data/fish_sell_ach.csv')
66
- start_date = pd.to_datetime(str(int(last_time_fish_arch['date'].max())))
67
- today = datetime.datetime.now()
68
- end_date = pd.to_datetime(today + relativedelta(days=1))
69
- use_fish_list = config['use_fish_list']
70
- temp_sell_ach = get_fish_qty.get_fish_price_data(start_date, end_date, use_fish_list)
71
- temp_sell_ach['date'] = temp_sell_ach['date'].astype(int)
72
- sell_ach = pd.concat([last_time_fish_arch,
73
- temp_sell_ach.loc[~temp_sell_ach['date'].isin(last_time_fish_arch['date'].unique())]])
74
- sell_ach.to_csv('data/fish_sell_ach.csv', index=False)
75
-
76
- # trainデータの作成
77
- sell_ach['target_date'] = sell_ach['date'].apply(lambda x:int((pd.to_datetime(str(x))+relativedelta(months=1)).strftime('%Y%m%d')))
78
- sell_ach['年月'] = sell_ach['target_date'].astype(str).str[:6].astype(int)
79
-
80
- col_list=['するめいか_卸売数量計(kg)',
81
- 'いわし_卸売数量計(kg)',
82
- 'ぶり・わらさ_卸売数量計(kg)',
83
- '冷さけ_卸売数量計(kg)',
84
- '塩さけ_卸売数量計(kg)',
85
- 'さけます類_卸売数量計(kg)',
86
- '全卸売数量計(kg)']
87
-
88
- for shift_i in [7, 14, 21, 28]:
89
- change_col_list = [f'{i}_lag{shift_i}' for i in col_list]
90
- sell_ach[change_col_list] = sell_ach[col_list].shift(shift_i)
91
-
92
- sell_ach = sell_ach.rename(columns={'date':'forecast_point'})
93
- train_df = pd.merge(expence_df,
94
- sell_ach,
95
- on='年月')
96
- train_df.to_csv('data/train.csv', index=False)
97
-
98
- return train_df
99
-
100
-
101
- def modeling():
102
- train_df = create_train_data()
103
- # モデリングに必要な各設定値
104
- ## データロボットとの接続設定
105
- token = 'NjQwMDVmNGI0ZDQzZDFhYzI2YThmZDJiOnVZejljTXFNTXNoUnlKMStoUFhXSFdYMEZRck9lY3dobnEvRFZ1aVBHbVE9'
106
- ### デモ環境これっぽい
107
- endpoint = 'https://app.datarobot.com/api/v2'
108
-
109
- ## プロジェクト名
110
- project_name = f'{datetime.datetime.now().strftime("%Y%m%d")}_ESTYLEU_電気代予測_再学習'
111
-
112
- ## 各種設定
113
- ### 特徴量設定
114
- target = '電気代'
115
- feature_timeline = 'target_date' #時系列
116
- not_use_feature = ['年月', 'forecast_point']
117
- # 最適化指標
118
- metric = 'RMSE'
119
- ### ギャップ
120
- gap='P0Y' # これで0?要確認
121
- ### バックテストの数
122
- number_of_backtests = 1
123
- end_date = int(train_df[feature_timeline].max())
124
- ### 日付
125
- holdout_end_date=pd.to_datetime(str(end_date))
126
- holdout_start_date=holdout_end_date - relativedelta(years=1)
127
- backtest_end_date = holdout_start_date - relativedelta(days=1)
128
- backtest_start_date = backtest_end_date - relativedelta(years=1)
129
- train_end_date = backtest_start_date - relativedelta(days=1)
130
- train_start_date = pd.to_datetime(str(int(train_df[feature_timeline].min())))
131
-
132
- ### モデリングモード
133
- # mode = dr.AUTOPILOT_MODE.QUICK
134
- mode = dr.AUTOPILOT_MODE.FULL_AUTO
135
- dr.Client(
136
- endpoint=endpoint,
137
- token=token
138
- )
139
-
140
- # バックテスト設定
141
- backtests_setting = [dr.BacktestSpecification(
142
- index=0,
143
- primary_training_start_date=train_start_date,
144
- primary_training_end_date=train_end_date,
145
- validation_start_date=backtest_start_date,
146
- validation_end_date=backtest_end_date
147
- )]
148
-
149
- spec = dr.DatetimePartitioningSpecification(
150
- feature_timeline,
151
- use_time_series=False,
152
- disable_holdout=False,
153
- holdout_start_date=holdout_start_date,
154
- holdout_end_date=holdout_end_date,
155
- gap_duration=gap,
156
- number_of_backtests=number_of_backtests,
157
- backtests=backtests_setting,
158
- )
159
-
160
- use_feature_list = train_df.columns.to_list()
161
-
162
- print('now creating project')
163
- project = dr.Project.create(
164
- train_df,
165
- project_name=project_name
166
- )
167
-
168
- raw = [feat_list for feat_list in project.get_featurelists() if feat_list.name == 'Informative Features'][0]
169
- raw_features = [feat for feat in raw.features if f'{feature_timeline} ' in feat]
170
-
171
- for i in not_use_feature:
172
- if i in use_feature_list:
173
- use_feature_list.remove(i)
174
-
175
- use_feature_list = use_feature_list.extend(raw_features)
176
- print("start modeling")
177
- project.analyze_and_model(
178
- target = target,
179
- mode = mode,
180
- partitioning_method=spec,
181
- max_wait=3000,
182
- worker_count=-1,
183
- featurelist_id = project.create_featurelist('モデリング', use_feature_list).id
184
- )
185
- project.wait_for_autopilot()
186
- project.unlock_holdout()
187
-
188
- model_df = pd.DataFrame(
189
- [[model.id,
190
- model.model_type,
191
- model.metrics['RMSE']['validation'],
192
- model.metrics['RMSE']['backtesting'],
193
- model.metrics['RMSE']['holdout'],
194
- model] for model in project.get_datetime_models() if model.model_type != 'Baseline Predictions Using Most Recent Value'],
195
- columns=['ID', 'モデル名', 'バックテスト1', '全てのバックテスト', 'holdout', 'model'])
196
- model_df = model_df.sort_values('holdout').reset_index(drop=True)
197
-
198
- model = model_df['model'][0]
199
-
200
- try:
201
- model_management_df = read_csv('data/model_management.csv')
202
- except:
203
- model_management_df = pd.DataFrame()
204
-
205
- temp_model_management_df = pd.DataFrame({
206
- '作成日':[int(datetime.datetime.now().strftime('%Y%m%d'))],
207
- '作成時間':[int(datetime.datetime.now().strftime('%H%M%S'))],
208
- 'project_url':[project.get_uri()],
209
- 'model_url':[model.get_uri()],
210
- 'model_type':[model.model_type]
211
- })
212
-
213
- model_management_df = pd.concat([model_management_df,
214
- temp_model_management_df])
215
- model_management_df.to_csv('data/model_management.csv')
216
-
217
- deployment = dr.Deployment.get(deployment_id='640d791796a6a52d92c368a0')
218
-
219
- deployment.replace_model(model.id, dr.enums.MODEL_REPLACEMENT_REASON.SCHEDULED_REFRESH)
220
-
221
-