File size: 41,541 Bytes
ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 d18e287 ea70b68 |
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "ccdd0729-ec00-4e51-94f2-b4808d26796c",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import numpy as np\n",
"import statsmodels.api as sm\n",
"from statsmodels.tsa.seasonal import seasonal_decompose\n",
"import sys\n",
"import datetime as dt\n",
"sys.path.append(\"../\")\n",
"from model import SarimaModel\n",
"import get_fish_price"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "02a5171f-5556-440d-88c2-65aeb0215932",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"20230327: 100%|██████████| 11/11 [00:05<00:00, 1.94it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/3q/40y_c73d30ndpj0wrsh_pts00000gn/T/ipykernel_9785/2639658167.py:56: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" train.dropna(subset = [\"value\"], inplace=True)\n"
]
}
],
"source": [
"df_hamachi = pd.read_csv(r'../data/hamachi_price.csv', encoding='utf_8_sig')\n",
"df_hamachi[\"date\"] = df_hamachi[\"date\"].apply(lambda x: pd.to_datetime(str(x)))\n",
"\n",
"today = dt.date.today()\n",
"year = today.year\n",
"month = today.month\n",
"\n",
"# ハマチの卸売数量のデータを更新\n",
"if df_hamachi['date'].max().date() < today:\n",
" start_date = df_hamachi['date'].max().date() + dt.timedelta(days=1)\n",
" temp_df = get_fish_price.get_fish_price_data(start_date=start_date, end_date=today)\n",
" temp_df[\"date\"] = temp_df[\"date\"].apply(lambda x: pd.to_datetime(str(x)))\n",
" df_hamachi = pd.concat([df_hamachi, temp_df])\n",
" df_hamachi.to_csv(r'../data/hamachi_price2.csv', encoding='utf_8_sig', index=False)\n",
"\n",
"df_hamachi[\"month\"] = df_hamachi[\"date\"].dt.month\n",
"df_hamachi[\"year\"] = df_hamachi[\"date\"].dt.year\n",
"#小売物価統計の最新の行を取得\n",
"df_hamachi_latest = df_hamachi.tail(1)\n",
"\n",
"#小売物価統計調査データ\n",
"df_FEH = pd.read_csv(\"../data/FEH_buri.csv\")\n",
"df_FEH[\"時間軸(月)\"] = pd.to_datetime(df_FEH[\"時間軸(月)\"], format='%Y年%m月')\n",
"df_FEH[\"year\"] = df_FEH[\"時間軸(月)\"].dt.year\n",
"df_FEH[\"month\"] = df_FEH[\"時間軸(月)\"].dt.month\n",
"df_FEH = df_FEH.sort_values(by=[\"year\", \"month\"], ascending=False)\n",
"#小売物価統計の最新の行を取得\n",
"df_FEH_latest = df_FEH.head(1)\n",
"#ハマチの卸売数量のデータの最新月は、何カ月差か計算\n",
"delta = abs(df_hamachi_latest[\"year\"].iloc[0] - df_FEH_latest[\"year\"].iloc[0])*12\\\n",
" + abs(df_hamachi_latest[\"month\"].iloc[0] - df_FEH_latest[\"month\"].iloc[0])\n",
"\n",
"#東京都中央卸売市場 休業日データ\n",
"df_calender = pd.read_csv(\"../data/toyosu_calender_2023.csv\")\n",
"df_calender[\"date\"] = pd.to_datetime(df_calender[\"date\"])\n",
"df_calender[\"week_day\"] = df_calender[\"date\"].apply(lambda x: x.weekday())\n",
"\n",
"#ハマチの卸売数量のデータと小売物価統計調査データをマージ\n",
"df_hamachi = pd.merge(left=df_hamachi, right=df_FEH[[\"year\", \"month\", \"value\"]], on=[\"year\", \"month\"],\n",
" how=\"left\")\n",
"#直近、小売物価統計調査データが入っていない箇所を最新の値で埋める\n",
"for i in range(delta-1, -1, -1):\n",
" print(i)\n",
" if month-i>0:\n",
" df_hamachi.loc[(df_hamachi[\"year\"]==year) & (df_hamachi[\"month\"]==month-i),\n",
" \"value\"\n",
" ] = df_FEH_latest[\"value\"].at[0]\n",
" elif (month-i>-12)and (month-i<=0):\n",
" df_hamachi.loc[(df_hamachi[\"year\"]==year-1 & (df_hamachi[\"month\"]==month-i+12)),\n",
" \"value\"\n",
" ] = df_FEH_latest[\"value\"].at[0] \n",
" else:\n",
" raise ValueError(\"小売物価統計調査データを更新してください\")\n",
"df_hamachi = df_hamachi.set_index(df_hamachi[\"date\"])\n",
"train = df_hamachi[[\"quantity\", \"value\"]]\n",
"train.dropna(subset = [\"value\"], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6178e933-c376-468f-844e-58149c3a54fc",
"metadata": {},
"outputs": [],
"source": [
"sarima = SarimaModel(train=train, df_calender=df_calender, exog=\"value\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1367ac5b-6d4b-4d83-8f4c-c9d96c054f99",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/estyle-074/opt/anaconda3/envs/sub1/lib/python3.9/site-packages/statsmodels/tsa/base/tsa_model.py:471: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
" self._init_dates(dates, freq)\n",
"/Users/estyle-074/opt/anaconda3/envs/sub1/lib/python3.9/site-packages/statsmodels/tsa/base/tsa_model.py:471: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
" self._init_dates(dates, freq)\n",
" This problem is unconstrained.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"RUNNING THE L-BFGS-B CODE\n",
"\n",
" * * *\n",
"\n",
"Machine precision = 2.220D-16\n",
" N = 6 M = 10\n",
"\n",
"At X0 0 variables are exactly at the bounds\n",
"\n",
"At iterate 0 f= 1.08969D+01 |proj g|= 7.35336D-02\n",
"\n",
"At iterate 5 f= 1.08053D+01 |proj g|= 4.19363D-03\n",
"\n",
"At iterate 10 f= 1.08045D+01 |proj g|= 1.92586D-03\n",
"\n",
"At iterate 15 f= 1.08041D+01 |proj g|= 1.12754D-03\n",
"\n",
" * * *\n",
"\n",
"Tit = total number of iterations\n",
"Tnf = total number of function evaluations\n",
"Tnint = total number of segments explored during Cauchy searches\n",
"Skip = number of BFGS updates skipped\n",
"Nact = number of active bounds at final generalized Cauchy point\n",
"Projg = norm of the final projected gradient\n",
"F = final function value\n",
"\n",
" * * *\n",
"\n",
" N Tit Tnf Tnint Skip Nact Projg F\n",
" 6 19 22 1 0 0 2.972D-05 1.080D+01\n",
" F = 10.804142602405426 \n",
"\n",
"CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH \n"
]
}
],
"source": [
"sarima_fit = sarima.fit()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "18c61fc7-a2bd-4f12-bb7a-64340a9e9199",
"metadata": {},
"outputs": [],
"source": [
"test_pred = sarima.predict(sarima_fit)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "7921b93b-79b2-482f-abb8-0fa90e398103",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(df_hamachi.loc[\"2023\"][\"quantity\"])\n",
"plt.plot(test_pred)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36527aa8-fdfb-446f-beb3-df8a18e02492",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|