import codecs import io import random import requests import time from datetime import date, timedelta from tqdm import tqdm from typing import Generator, Tuple import numpy as np import pandas as pd def date_range( start: date, stop: date, step: timedelta = timedelta(1) ) -> Generator[date, None, None]: """startからendまで日付をstep日ずつループさせるジェネレータ""" current = start while current < stop: yield current current += step def get_url(download_date: date) -> Tuple[str, str]: """ダウンロードするURLと日付の文字列を返す""" month = download_date.strftime("%Y%m") day = download_date.strftime("%Y%m%d") return ( f"https://www.shijou-nippo.metro.tokyo.lg.jp/SN/{month}/{day}/Sui/Sui_K1.csv", day, ) def content_wrap(content): """1行目にヘッダ行が来るまでスキップする""" buffer = "" first = True for line in io.BytesIO(content): line_str = codecs.decode(line, "shift-jis") if first: if "品名" in line_str: first = False buffer = line_str else: continue else: buffer += line_str return io.StringIO(buffer) def to_numeric(x): """文字列を数値に変換する""" if isinstance(x, str): return float(x) else: return x def get_fish_price_data(start_date: date, end_date: date, use_fish_list) -> pd.core.frame.DataFrame: """ 東京卸売市場からデータを引っ張ってくる :param start_date: 開始日 :param end_date: 終了日 :return: あじの値段を結合したデータ """ columns = ['date'] + [i + '_卸売数量計(kg)' for i in use_fish_list] + ['全卸売数量計(kg)'] fish_qty_df = pd.DataFrame(columns=columns) iterator = tqdm( date_range(start_date, end_date), total=(end_date - start_date).days ) for download_date in iterator: url, day = get_url(download_date) iterator.set_description(day) response = requests.get(url) # URLが存在しないとき temp_df = pd.DataFrame([{'date':day}]) if response.status_code == 404: continue assert ( response.status_code == 200 ), f"Unexpected HTTP response. Please check the website {url}." df = pd.read_csv(content_wrap(response.content)) for i in use_fish_list: temp = df.loc[df["品名"] == i, ['卸売数量計']] # display(temp) if len(temp) == 0: temp_df[f'{i}_卸売数量計(kg)'] = 0 temp_df[f'{i}_卸売数量計(kg)'] = temp['卸売数量計'].sum() all_qty = df[['卸売数量計']].dropna().values[-1][0] temp_df['全卸売数量計(kg)'] = all_qty fish_qty_df = pd.concat([fish_qty_df, temp_df]) time.sleep(max(0.5 + random.normalvariate(0, 0.3), 0.1)) return fish_qty_df