Spaces:
Runtime error
Runtime error
masa729406
commited on
Commit
·
83738aa
1
Parent(s):
e683c9b
Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,160 @@ import requests
|
|
20 |
from bs4 import BeautifulSoup as bs
|
21 |
from requests_html import AsyncHTMLSession
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Webページを取得して解析する
|
24 |
load_url = "https://www.football-lab.jp/kyot/match/"
|
25 |
html = requests.get(load_url)
|
|
|
20 |
from bs4 import BeautifulSoup as bs
|
21 |
from requests_html import AsyncHTMLSession
|
22 |
|
23 |
+
def date_range(
|
24 |
+
start: date, stop: date, step: timedelta = timedelta(1)
|
25 |
+
) -> Generator[date, None, None]:
|
26 |
+
"""startからendまで日付をstep日ずつループさせるジェネレータ"""
|
27 |
+
current = start
|
28 |
+
while current < stop:
|
29 |
+
yield current
|
30 |
+
current += step
|
31 |
+
|
32 |
+
|
33 |
+
def get_url(download_date: date) -> Tuple[str, str]:
|
34 |
+
"""ダウンロードするURLと日付の文字列を返す"""
|
35 |
+
month = download_date.strftime("%Y%m")
|
36 |
+
day = download_date.strftime("%Y%m%d")
|
37 |
+
return (
|
38 |
+
f"https://www.shijou-nippo.metro.tokyo.lg.jp/SN/{month}/{day}/Sui/Sui_K1.csv",
|
39 |
+
day,
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
def content_wrap(content):
|
44 |
+
"""1行目にヘッダ行が来るまでスキップする"""
|
45 |
+
buffer = ""
|
46 |
+
first = True
|
47 |
+
for line in io.BytesIO(content):
|
48 |
+
line_str = codecs.decode(line, "shift-jis")
|
49 |
+
if first:
|
50 |
+
if "品名" in line_str:
|
51 |
+
first = False
|
52 |
+
buffer = line_str
|
53 |
+
else:
|
54 |
+
continue
|
55 |
+
else:
|
56 |
+
buffer += line_str
|
57 |
+
return io.StringIO(buffer)
|
58 |
+
|
59 |
+
|
60 |
+
def insert_data(data, day, low_price, center_price, high_price, quantity):
|
61 |
+
""" "データをリストに追加する"""
|
62 |
+
data["date"].append(day)
|
63 |
+
data["low_price"].append(low_price)
|
64 |
+
data["center_price"].append(center_price)
|
65 |
+
data["high_price"].append(high_price)
|
66 |
+
data["quantity"].append(quantity)
|
67 |
+
|
68 |
+
|
69 |
+
def to_numeric(x):
|
70 |
+
"""文字列を数値に変換する"""
|
71 |
+
if isinstance(x, str):
|
72 |
+
return float(x)
|
73 |
+
else:
|
74 |
+
return x
|
75 |
+
|
76 |
+
|
77 |
+
def get_fish_price_data(start_date: date, end_date: date) -> pd.core.frame.DataFrame:
|
78 |
+
"""
|
79 |
+
東京卸売市場からデータを引っ張ってくる
|
80 |
+
|
81 |
+
:param start_date: 開始日
|
82 |
+
:param end_date: 終了日
|
83 |
+
:return: あじの値段を結合したデータ
|
84 |
+
"""
|
85 |
+
data = {
|
86 |
+
"date": [],
|
87 |
+
"low_price": [],
|
88 |
+
"center_price": [],
|
89 |
+
"high_price": [],
|
90 |
+
"quantity": [],
|
91 |
+
}
|
92 |
+
iterator = tqdm(
|
93 |
+
date_range(start_date, end_date), total=(end_date - start_date).days
|
94 |
+
)
|
95 |
+
|
96 |
+
for download_date in iterator:
|
97 |
+
url, day = get_url(download_date)
|
98 |
+
iterator.set_description(day)
|
99 |
+
response = requests.get(url)
|
100 |
+
|
101 |
+
# URLが存在しないとき
|
102 |
+
if response.status_code == 404:
|
103 |
+
insert_data(data, day, np.nan, np.nan, np.nan, 0)
|
104 |
+
continue
|
105 |
+
assert (
|
106 |
+
response.status_code == 200
|
107 |
+
), f"Unexpected HTTP response. Please check the website {url}."
|
108 |
+
|
109 |
+
df = pd.read_csv(content_wrap(response.content))
|
110 |
+
|
111 |
+
# 欠損値補完
|
112 |
+
price_cols = ["安値(円)", "中値(円)", "高値(円)"]
|
113 |
+
for c in price_cols:
|
114 |
+
df[c].mask(df[c] == "-", np.nan, inplace=True)
|
115 |
+
df[c].mask(df[c] == "−", np.nan, inplace=True)
|
116 |
+
df["卸売数量"].mask(df["卸売数量"] == "-", np.nan, inplace=True)
|
117 |
+
df["卸売数量"].mask(df["卸売数量"] == "−", np.nan, inplace=True)
|
118 |
+
|
119 |
+
# 長崎で獲れたあじの中値と卸売数量
|
120 |
+
# 品目 == あじ の行だけ抽出
|
121 |
+
df_aji = df.loc[df["品名"] == "あじ", ["卸売数量"] + price_cols]
|
122 |
+
|
123 |
+
# あじの販売がなかったら欠損扱いに
|
124 |
+
if len(df_aji) == 0:
|
125 |
+
insert_data(data, day, np.nan, np.nan, np.nan, 0)
|
126 |
+
continue
|
127 |
+
|
128 |
+
isnan = lambda x: isinstance(x, float) and np.isnan(x)
|
129 |
+
# 産地ごと(?)の鯵の販売実績を調べる
|
130 |
+
low_prices = []
|
131 |
+
center_prices = []
|
132 |
+
high_prices = []
|
133 |
+
quantities = []
|
134 |
+
for i, row in enumerate(df_aji.iloc):
|
135 |
+
lp, cp, hp, q = row[price_cols + ["卸売数量"]]
|
136 |
+
lp, cp, hp, q = (
|
137 |
+
to_numeric(lp),
|
138 |
+
to_numeric(cp),
|
139 |
+
to_numeric(hp),
|
140 |
+
to_numeric(q),
|
141 |
+
)
|
142 |
+
|
143 |
+
# 中値だけが記録されている -> 価格帯が1個だけなので高値、安値も中値と同じにしておく
|
144 |
+
if isnan(lp) and isnan(hp) and (not isnan(cp)):
|
145 |
+
low_prices.append(cp)
|
146 |
+
center_prices.append(cp)
|
147 |
+
high_prices.append(cp)
|
148 |
+
|
149 |
+
# 高値・安値があり中値がない -> 価格帯2個、とりあえず両者の平均を中値とする
|
150 |
+
elif (not isnan(lp)) and (not isnan(hp)) and isnan(cp):
|
151 |
+
low_prices.append(lp)
|
152 |
+
center_prices.append((lp + hp) / 2)
|
153 |
+
high_prices.append(hp)
|
154 |
+
else:
|
155 |
+
low_prices.append(lp)
|
156 |
+
center_prices.append(cp)
|
157 |
+
high_prices.append(hp)
|
158 |
+
|
159 |
+
if isnan(row["卸売数量"]):
|
160 |
+
quantities.append(0)
|
161 |
+
else:
|
162 |
+
quantities.append(q)
|
163 |
+
|
164 |
+
low_price = int(min(low_prices))
|
165 |
+
center_price = int(sum(center_prices) / len(center_prices))
|
166 |
+
high_price = int(max(high_prices))
|
167 |
+
quantity = int(float(sum(quantities)))
|
168 |
+
|
169 |
+
# 保存
|
170 |
+
insert_data(data, day, low_price, center_price, high_price, quantity)
|
171 |
+
# 短期間にアクセスが集中しないようにクールタイムを設定
|
172 |
+
time.sleep(max(0.5 + random.normalvariate(0, 0.3), 0.1))
|
173 |
+
# DataFrameを作成
|
174 |
+
df = pd.DataFrame(data)
|
175 |
+
return df
|
176 |
+
|
177 |
# Webページを取得して解析する
|
178 |
load_url = "https://www.football-lab.jp/kyot/match/"
|
179 |
html = requests.get(load_url)
|