masa729406 commited on
Commit
359e36e
·
1 Parent(s): 23d8057

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -22
app.py CHANGED
@@ -8,37 +8,131 @@ matplotlib.use("Agg")
8
 
9
  import matplotlib.pyplot as plt
10
  import numpy as np
11
- # import plotly.express as px
12
  import pandas as pd
13
 
14
- df_pre = pd.read_csv('data')
15
- y = df_pre['audience']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- filename = 'model.pkl'
18
- loaded_rf_model = pickle.load(open(filename, 'rb'))
19
- X_test = pd.DataFrame(
20
- data={'saba': [300, 200, 3030, 400],
21
- 'date_ymd': [20230328, 20230329, 20230330, 20230331]}
22
- )
23
- y_pred_ = loaded_rf_model.predict(X_test, num_iteration=gbm.best_iteration)
24
 
25
  def outbreak(date):
26
  if date:
27
- # start_day = start_day
28
- # final_day = date
29
- # x = np.arange(start_day, final_day + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  fig = plt.figure()
31
- plt.plot(y_pred_)
32
- plt.title("audience")
 
33
  plt.ylabel("audience")
34
  plt.xlabel("Days since Day 0")
35
  return fig
36
 
37
- demo = gr.Interface(
38
- fn=outbreak,
39
- inputs='checkbox',
40
- outputs=gr.Plot()
41
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- if __name__ == "__main__":
44
- demo.launch()
 
8
 
9
  import matplotlib.pyplot as plt
10
  import numpy as np
11
+ import plotly.express as px
12
  import pandas as pd
13
 
14
+ # Webページを取得して解析する
15
+ load_url = "https://www.football-lab.jp/kyot/match/"
16
+ html = requests.get(load_url)
17
+ soup = bs(html.content, "html.parser")
18
+
19
+ !pip install beautifulsoup4
20
+ !pip install requests_html
21
+ import requests
22
+ from bs4 import BeautifulSoup as bs
23
+ from requests_html import AsyncHTMLSession
24
+
25
+ import pandas as pd
26
+ from datetime import datetime
27
+ from IPython.display import display
28
+
29
+ url23 = 'https://www.football-lab.jp/ka-f/match/'
30
+ dfs23 = pd.read_html(url23)
31
+ url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
32
+ dfs22 = pd.read_html(url22)
33
+ url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
34
+ dfs21 = pd.read_html(url21)
35
+ url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
36
+ dfs20 = pd.read_html(url20)
37
+
38
+ #シーズン毎に分類
39
+ res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
40
+ res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
41
+ res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
42
+ res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
43
+
44
+ df_tmp = pd.concat([res23, res22, res21, res20])
45
+ df = df_tmp
46
+
47
+ df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
48
+ df = df.query('stadium=="等々力"').reset_index()
49
+ df = df.query('audience.notna()', engine='python').reset_index()
50
+ df = df[['audience', 'year', 'date']]
51
+ #seasonカラムから年を抽出
52
+ df["year"] = df["year"].apply(lambda x: str(x)[1:5])
53
+ #開催日から月と日を分割
54
+ df['month'] = df['date'].str.split(pat='.', expand=True)[0]
55
+ df['day'] = df['date'].str.split(pat='.', expand=True)[1]
56
+ #数値データを日付データに変換
57
+ df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
58
+ #日付昇順に並び替える
59
+ df = df.sort_values('date', ascending=True)
60
+ df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
61
+ df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
62
+ df["date_ymd"] = df["date_ymd"].astype(int)
63
+ df['date_before'] = df['date_ymd'] - 1
64
+ df["date_before"] = df["date_before"]
65
+ df = df[['audience', 'date_ymd', 'date_before']]
66
+ df['last_audience'] = df['audience'].shift(1)
67
+
68
+ df_aji = pd.read_csv('fish_price.csv')
69
+
70
+ df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
71
+
72
+ df_train = df_train.query('date > 20201202')
73
+ df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
74
+ df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
75
+ df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
76
+
77
+ X = df_train.drop('audience', axis=1)
78
+ y = df_train['audience']
79
+
80
+ from sklearn.linear_model import LinearRegression
81
+ from sklearn.metrics import log_loss
82
+ from sklearn.preprocessing import StandardScaler
83
+ linear_regression = LinearRegression()
84
+ linear_regression.fit(X,y)
85
 
 
 
 
 
 
 
 
86
 
87
  def outbreak(date):
88
  if date:
89
+
90
+
91
+
92
+ if __name__ == "__main__":
93
+ start_date = d_today
94
+ end_date = d_tom
95
+ df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
96
+ # df_aji_pre.to_csv("fish_price_pre.csv", index=False)
97
+
98
+ df_pre = df.tail(1).reset_index()
99
+ df_pre = df_pre.drop('index', axis=1)
100
+ df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
101
+ df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
102
+ df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
103
+
104
+ pred = linear_regression.predict(df_aji_ft_pre)
105
+ df_aji_ft_pre['audience_pred'] = pred
106
+ df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)
107
+
108
  fig = plt.figure()
109
+ plt.plot(df_train['date'], df_train['audience'], label='original')
110
+ plt.plot(df_aji_ft_pre['date'], df_aji_ft_pre['audience_pred'], '*', label='predict')
111
+ plt.title("prediction of audince")
112
  plt.ylabel("audience")
113
  plt.xlabel("Days since Day 0")
114
  return fig
115
 
116
+ with gr.Blocks() as demo:
117
+ gr.Markdown(
118
+ """
119
+ # 川崎フロンターレの観客動員数の予測
120
+ 川崎フロンターレの等々力陸上競技場での試合の観客数を「あじ」の価格をもとに予測する。
121
+ ## 使用データ
122
+ * 東京卸売市場日報
123
+ * Football Lab
124
+ ## 予測ロジック
125
+ 観客動員数は雨天か否かで左右されると考えられる。そこで雨天の可能性をあじの価格を利用し表した。
126
+ 一般的に雨天の場合、低気圧の影響で海面が上昇し漁に出ることが難しくなる。
127
+ そのため漁獲量が減少し、あじの価格が上昇すると考えられる。
128
+ """
129
+ )
130
+ with gr.Row():
131
+ with gr.Column():
132
+ date_input = gr.Checkbox(label='please input date')
133
+ prediction_btn = gr.Button(value="predict")
134
+ with gr.Column():
135
+ prediction = gr.Plot(label = "時系列プロット")
136
+ prediction_btn.click(outbreak, inputs=date_input, outputs=prediction)
137
 
138
+ demo.launch()