Spaces:
Runtime error
Runtime error
import pandas as pd | |
import datetime as dt | |
import lightgbm as lgb | |
import pickle | |
import datetime | |
import json | |
import numpy as np | |
import requests | |
from bs4 import BeautifulSoup #ダウンロードしてなかったらpipでできるからやってね。 | |
import csv | |
#取ったデータをfloat型に変えるやつ。(データが取れなかったとき気象庁は"/"を埋め込んでいるから0に変える) | |
def str2float(str): | |
try: | |
return float(str) | |
except: | |
return 0.0 | |
def rename_multicol(df): | |
df_col=df.columns #列名をコピー | |
df = df.T.reset_index(drop=False).T #一回列名をリセット | |
for i in range(df.shape[1]): #列名を新たに定義 | |
rename_col = {i:"".join(df_col[i])} | |
df = df.rename(columns = rename_col) | |
df = df.drop(["level_0","level_1"],axis=0) | |
return df | |
def predict(): | |
electric_bill_correction_path = f"electric_bill_Correction.csv" | |
tokyo_monthly_electric_bill_path = f"tokyo_monthly_electiric_bill.csv" | |
tokyo_weather_2014_2017_path = f"tokyo_weather_2014.3_2018.3.csv" | |
tokyo_weather_2018_2022_path = f"tokyo_weather_2018.3_2023.3.csv" | |
weather_condition_path = f"weather_conditions.csv" | |
train_path = f"train.csv" | |
#データ読み込み | |
electric_bill_correction_df = pd.read_csv(electric_bill_correction_path) | |
tokyo_monthly_electric_bill_df = pd.read_csv(tokyo_monthly_electric_bill_path) | |
tokyo_weather_2014_2017_df = pd.read_csv(tokyo_weather_2014_2017_path) | |
tokyo_weather_2018_2022_df = pd.read_csv(tokyo_weather_2018_2022_path) | |
tokyo_weather_df = pd.concat([tokyo_weather_2014_2017_df, tokyo_weather_2018_2022_df]) | |
weather_condition_df = pd.read_csv(weather_condition_path) | |
train_df = pd.read_csv(train_path) | |
#日付型変換 | |
electric_bill_correction_df["年月"] = pd.to_datetime(electric_bill_correction_df["年月"], format="%Y%m" ) | |
tokyo_monthly_electric_bill_df["時間軸(月次)"] = pd.to_datetime(tokyo_monthly_electric_bill_df["時間軸(月次)"], format="%Y年%m月" ) | |
tokyo_weather_df["年月日"] = pd.to_datetime(tokyo_weather_df["年月日"], format="%Y/%m/%d" ) | |
electric_bill_correction_df["年"] = electric_bill_correction_df["年月"].dt.year | |
electric_bill_correction_df["月"] = electric_bill_correction_df["年月"].dt.month | |
tokyo_monthly_electric_bill_df["年"] = tokyo_monthly_electric_bill_df["時間軸(月次)"].dt.year | |
tokyo_monthly_electric_bill_df["月"] = tokyo_monthly_electric_bill_df["時間軸(月次)"].dt.month | |
tokyo_weather_df["年"] = tokyo_weather_df["年月日"].dt.year | |
tokyo_weather_df["月"] = tokyo_weather_df["年月日"].dt.month | |
weather_condition_dict = dict(zip(weather_condition_df["code"], weather_condition_df["天候"])) | |
model = pickle.load(open('nowcast_electriic_bill_model.pickle', 'rb')) | |
dt_now = datetime.datetime.now() | |
year = dt_now.year | |
month = dt_now.month | |
##今月の気象履歴を気象庁HPから持ってくる | |
place_codeA = 44 | |
place_codeB = 47662 | |
place_name = ["東京"] | |
# URLで年と月ごとの設定ができるので%sで指定した英数字を埋め込めるようにします。 | |
base_url = "http://www.data.jma.go.jp/obd/stats/etrn/view/daily_s1.php?prec_no=%s&block_no=%s&year=%s&month=%s&day=1&view=p1" | |
#2つの都市コードと年と月を当てはめる。 | |
r = requests.get(base_url%(44, 47662, year, month)) | |
r.encoding = r.apparent_encoding | |
# まずはサイトごとスクレイピング | |
soup = BeautifulSoup(r.text) | |
# findAllで条件に一致するものをすべて抜き出します。 | |
# 今回の条件はtrタグでclassがmtxになってるものです。 | |
rows = soup.findAll('tr',class_='mtx') | |
# 表の最初の1~4行目はカラム情報なのでスライスする。(indexだから初めは0だよ) | |
# 【追記】2020/3/11 申し訳ございません。間違えてました。 | |
rows = rows[4:] | |
column_list = ['年月日', '平均気温', '最高気温', '最低気温', '天気概況(昼:06時〜18時)', '天気概況(夜:18時〜翌日06時)'] | |
All_list = [] | |
# 1日〜最終日までの1行を網羅し、取得します。 | |
for row in rows: | |
# 今度はtrのなかのtdをすべて抜き出します | |
data = row.findAll('td') | |
#1行の中には様々なデータがあるので全部取り出す。 | |
# ★ポイント | |
rowData = [] #初期化 | |
rowData.append(str(year) + "/" + str(month) + "/" + str(data[0].string)) | |
rowData.append(str2float(data[6].string)) | |
rowData.append(str2float(data[7].string)) | |
rowData.append(str2float(data[8].string)) | |
rowData.append(data[19].string) | |
rowData.append(data[20].string) | |
#次の行にデータを追加 | |
All_list.append(rowData) | |
weather_df = pd.DataFrame(All_list, columns=column_list) | |
weather_df["年月日"] = pd.to_datetime(weather_df["年月日"]) | |
weather_df = weather_df.add_prefix('天気実績_') | |
forecast_days = 14 | |
forecast_url = f"http://api.weatherapi.com/v1/forecast.json?key=9184b04d480140f8a3c133051232903&q=Tokyo&days={forecast_days}" | |
r = requests.get(forecast_url) | |
r.encoding = r.apparent_encoding | |
json_string = r.text | |
response_json = json.loads(json_string) | |
print("======"*10) | |
#14日以内予測 | |
All_list = [] | |
for i in range(forecast_days): | |
row_list = [] | |
row_list.append(response_json["forecast"]["forecastday"][i]["date"]) | |
row_list.append(response_json["forecast"]["forecastday"][i]["day"]["maxtemp_c"]) | |
row_list.append(response_json["forecast"]["forecastday"][i]["day"]["avgtemp_c"]) | |
row_list.append(response_json["forecast"]["forecastday"][i]["day"]["mintemp_c"]) | |
condition_code = min_temperature = response_json["forecast"]["forecastday"][i]["day"]["condition"]["code"] | |
row_list.append(weather_condition_dict[condition_code]) | |
row_list.append(weather_condition_dict[condition_code]) | |
All_list.append(row_list) | |
short_forecast_weather_df = pd.DataFrame(All_list, columns=column_list) | |
short_forecast_weather_df["年月日"] = pd.to_datetime(short_forecast_weather_df["年月日"]) | |
short_forecast_weather_df = short_forecast_weather_df.add_prefix('短期天気予報_') | |
#15日以降予測 | |
necessary_days = 20 | |
base_url = "https://api.weatherapi.com/v1/future.json?key=9184b04d480140f8a3c133051232903&q=Tokyo&hour=24&dt=" | |
All_list = [] | |
for forecast_day_point in range(0, necessary_days): | |
forecast_day = (dt_now + datetime.timedelta(days=14+forecast_day_point)).strftime('%Y-%m-%d') | |
url = base_url + forecast_day | |
print(url) | |
r = requests.get(url) | |
r.encoding = r.apparent_encoding | |
response_json = json.loads(r.text) | |
print("======"*10) | |
print(response_json) | |
row_list = [] | |
row_list.append(response_json["forecast"]["forecastday"][0]["date"]) | |
row_list.append(response_json["forecast"]["forecastday"][0]["day"]["maxtemp_c"]) | |
row_list.append(response_json["forecast"]["forecastday"][0]["day"]["avgtemp_c"]) | |
row_list.append(response_json["forecast"]["forecastday"][0]["day"]["mintemp_c"]) | |
condition_code = min_temperature = response_json["forecast"]["forecastday"][0]["day"]["condition"]["code"] | |
row_list.append(weather_condition_dict[condition_code]) | |
row_list.append(weather_condition_dict[condition_code]) | |
All_list.append(row_list) | |
long_forecast_weather_df = pd.DataFrame(All_list, columns=column_list) | |
long_forecast_weather_df["年月日"] = pd.to_datetime(long_forecast_weather_df["年月日"]) | |
long_forecast_weather_df = long_forecast_weather_df.add_prefix('長期天気予報_') | |
total_weather_df = pd.merge(weather_df, short_forecast_weather_df, left_on="天気実績_年月日", right_on="短期天気予報_年月日", how="left") | |
total_weather_df = pd.merge(total_weather_df, long_forecast_weather_df, left_on="天気実績_年月日", right_on="長期天気予報_年月日", how="left") | |
total_weather_df["年月日"] = total_weather_df["天気実績_年月日"] | |
total_weather_df["平均気温"] = np.where(total_weather_df['天気実績_平均気温']!=0.0, total_weather_df['天気実績_平均気温'], total_weather_df['短期天気予報_平均気温']) | |
total_weather_df["平均気温"] = np.where(total_weather_df['平均気温']!=0.0, total_weather_df['平均気温'], total_weather_df['長期天気予報_平均気温']) | |
total_weather_df["最高気温"] = np.where(total_weather_df['天気実績_最高気温']!=0.0, total_weather_df['天気実績_最高気温'], total_weather_df['短期天気予報_最高気温']) | |
total_weather_df["最高気温"] = np.where(total_weather_df['最高気温']!=0.0, total_weather_df['最高気温'], total_weather_df['長期天気予報_最高気温']) | |
total_weather_df["最低気温"] = np.where(total_weather_df['天気実績_最低気温']!=0.0, total_weather_df['天気実績_最低気温'], total_weather_df['短期天気予報_最低気温']) | |
total_weather_df["最低気温"] = np.where(total_weather_df['最低気温']!=0.0, total_weather_df['最低気温'], total_weather_df['長期天気予報_最低気温']) | |
total_weather_df["天気概況(昼:06時〜18時)"] = np.where(pd.notna(total_weather_df['天気実績_天気概況(昼:06時〜18時)']), total_weather_df['天気実績_天気概況(昼:06時〜18時)'], total_weather_df['短期天気予報_天気概況(昼:06時〜18時)']) | |
total_weather_df["天気概況(昼:06時〜18時)"] = np.where(pd.notna(total_weather_df['天気概況(昼:06時〜18時)']), total_weather_df['天気概況(昼:06時〜18時)'], total_weather_df['長期天気予報_天気概況(昼:06時〜18時)']) | |
total_weather_df["天気概況(夜:18時〜翌日06時)"] = np.where(pd.notna(total_weather_df['天気実績_天気概況(夜:18時〜翌日06時)']), total_weather_df['天気実績_天気概況(夜:18時〜翌日06時)'], total_weather_df['短期天気予報_天気概況(夜:18時〜翌日06時)']) | |
total_weather_df["天気概況(夜:18時〜翌日06時)"] = np.where(pd.notna(total_weather_df['天気概況(夜:18時〜翌日06時)']), total_weather_df['天気概況(夜:18時〜翌日06時)'], total_weather_df['長期天気予報_天気概況(夜:18時〜翌日06時)']) | |
total_weather_df = total_weather_df[["年月日", "平均気温", "最高気温", "最低気温", "天気概況(昼:06時〜18時)", "天気概況(夜:18時〜翌日06時)"]] | |
total_weather_df["年"] = total_weather_df["年月日"].dt.year | |
total_weather_df["月"] = total_weather_df["年月日"].dt.month | |
first_half_weather_df = total_weather_df.groupby(["年", "月"])['天気概況(昼:06時〜18時)'].apply(lambda weather: weather.str.contains("曇").sum()).reset_index().rename(columns={"天気概況(昼:06時〜18時)":"日中_曇"}) | |
latter_half_weather_df = total_weather_df.groupby(["年", "月"])['天気概況(夜:18時〜翌日06時)'].apply(lambda weather: weather.str.contains("曇").sum()).reset_index().rename(columns={"天気概況(夜:18時〜翌日06時)":"夜間_曇"}) | |
weather_list = ["快晴", "晴", "雨", "大雨", "霧雨", "雪", "雷", "みぞれ", "後", "一時"] | |
for weather in weather_list: | |
tmp_first_df = total_weather_df.groupby(["年", "月"])['天気概況(昼:06時〜18時)'].apply(lambda x: x.str.contains(weather).sum()).reset_index().rename(columns={"天気概況(昼:06時〜18時)":f'日中_{weather}'}) | |
tmp_latter_df = total_weather_df.groupby(["年", "月"])['天気概況(夜:18時〜翌日06時)'].apply(lambda x: x.str.contains(weather).sum()).reset_index().rename(columns={"天気概況(夜:18時〜翌日06時)":f'夜間_{weather}'}) | |
first_half_weather_df = pd.merge(first_half_weather_df, tmp_first_df, on=["年", "月"]) | |
latter_half_weather_df = pd.merge(latter_half_weather_df, tmp_latter_df, on=["年", "月"]) | |
latter_half_weather_df.drop(["年", "月"], axis=1, inplace=True) | |
weather_df = pd.concat([first_half_weather_df, latter_half_weather_df], axis=1) | |
temperature_agg_list = ['max', 'mean', 'min'] | |
total_weather_agg_df = total_weather_df.groupby(["年", "月"]).agg({'平均気温': temperature_agg_list, '最高気温': temperature_agg_list, '最低気温':temperature_agg_list}).reset_index() | |
total_weather_agg_df = rename_multicol(total_weather_agg_df) | |
total_weather_agg_df = total_weather_agg_df.astype(float) | |
tokyo_monthly_electric_bill_df["年"] = tokyo_monthly_electric_bill_df["年"] + 1 | |
total_weather_agg_df = pd.merge(total_weather_agg_df, tokyo_monthly_electric_bill_df, on=["年", "月"], how="left") | |
total_weather_agg_df.rename(columns={"value":"前年同月実績"}, inplace=True) | |
predict_df = pd.merge(weather_df, total_weather_agg_df, on=["年", "月"]) | |
predict_df["再生エネルギー賦課金"] = 3.45 | |
predict_df["燃料費調整(低圧)"] = 4.69 | |
predict_df.drop("時間軸(月次)", axis=1, inplace=True) | |
this_month_electric_price = int(model.predict(predict_df)[0]) | |
return this_month_electric_price |