Spaces:
Runtime error
Runtime error
import codecs | |
import io | |
import random | |
import requests | |
import time | |
from datetime import date, timedelta | |
from tqdm import tqdm | |
from typing import Generator, Tuple | |
import numpy as np | |
import pandas as pd | |
def date_range( | |
start: date, stop: date, step: timedelta = timedelta(1) | |
) -> Generator[date, None, None]: | |
"""startからendまで日付をstep日ずつループさせるジェネレータ""" | |
current = start | |
while current < stop: | |
yield current | |
current += step | |
def get_url(download_date: date) -> Tuple[str, str]: | |
"""ダウンロードするURLと日付の文字列を返す""" | |
month = download_date.strftime("%Y%m") | |
day = download_date.strftime("%Y%m%d") | |
return ( | |
f"https://www.shijou-nippo.metro.tokyo.lg.jp/SN/{month}/{day}/Sui/Sui_K1.csv", | |
day, | |
) | |
def content_wrap(content): | |
"""1行目にヘッダ行が来るまでスキップする""" | |
buffer = "" | |
first = True | |
for line in io.BytesIO(content): | |
line_str = codecs.decode(line, "shift-jis") | |
if first: | |
if "品名" in line_str: | |
first = False | |
buffer = line_str | |
else: | |
continue | |
else: | |
buffer += line_str | |
return io.StringIO(buffer) | |
def to_numeric(x): | |
"""文字列を数値に変換する""" | |
if isinstance(x, str): | |
return float(x) | |
else: | |
return x | |
def get_fish_price_data(start_date: date, end_date: date, use_fish_list) -> pd.core.frame.DataFrame: | |
""" | |
東京卸売市場からデータを引っ張ってくる | |
:param start_date: 開始日 | |
:param end_date: 終了日 | |
:return: あじの値段を結合したデータ | |
""" | |
columns = ['date'] + [i + '_卸売数量計(kg)' for i in use_fish_list] + ['全卸売数量計(kg)'] | |
fish_qty_df = pd.DataFrame(columns=columns) | |
iterator = tqdm( | |
date_range(start_date, end_date), total=(end_date - start_date).days | |
) | |
for download_date in iterator: | |
url, day = get_url(download_date) | |
iterator.set_description(day) | |
response = requests.get(url) | |
# URLが存在しないとき | |
temp_df = pd.DataFrame([{'date':day}]) | |
if response.status_code == 404: | |
continue | |
assert ( | |
response.status_code == 200 | |
), f"Unexpected HTTP response. Please check the website {url}." | |
df = pd.read_csv(content_wrap(response.content)) | |
for i in use_fish_list: | |
temp = df.loc[df["品名"] == i, ['卸売数量計']] | |
# display(temp) | |
if len(temp) == 0: | |
temp_df[f'{i}_卸売数量計(kg)'] = 0 | |
temp_df[f'{i}_卸売数量計(kg)'] = temp['卸売数量計'].sum() | |
all_qty = df[['卸売数量計']].dropna().values[-1][0] | |
temp_df['全卸売数量計(kg)'] = all_qty | |
fish_qty_df = pd.concat([fish_qty_df, temp_df]) | |
time.sleep(max(0.5 + random.normalvariate(0, 0.3), 0.1)) | |
return fish_qty_df | |