{ "cells": [ { "cell_type": "code", "execution_count": 64, "id": "a94c4760-bcad-4c09-83e7-e5391b059b59", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from misc import nearest_mrt\n", "import pickle\n", "import os\n", "import pandas as pd\n", "import datetime\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 5, "id": "dfd76296-5048-433b-a29a-cc073dd9d814", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "loaded model\n" ] } ], "source": [ "filename = 'finalized_model2.sav'\n", "\n", "if os.path.exists(\"./finalized_model2.sav\"):\n", " model = pickle.load(open(filename, 'rb'))\n", " print('loaded model')\n", "else:\n", " print('failed loading model')" ] }, { "cell_type": "code", "execution_count": 8, "id": "361df0d9-1659-42ac-9dca-8cdde2ac3a15", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('columntransformer',\n",
       "                 ColumnTransformer(transformers=[('standardscaler',\n",
       "                                                  StandardScaler(),\n",
       "                                                  ['distance_mrt',\n",
       "                                                   'age_transation',\n",
       "                                                   'transaction_yr', 'Postal',\n",
       "                                                   'storey_height']),\n",
       "                                                 ('pipeline',\n",
       "                                                  Pipeline(steps=[('onehotencoder',\n",
       "                                                                   OneHotEncoder(handle_unknown='ignore',\n",
       "                                                                                 sparse_output=False))]),\n",
       "                                                  ['town', 'flat_num'])])),\n",
       "                ('xgbregressor',\n",
       "                 XGBRegressor(base_scor...\n",
       "                              feature_types=None, gamma=1, grow_policy=None,\n",
       "                              importance_type=None,\n",
       "                              interaction_constraints=None, learning_rate=None,\n",
       "                              max_bin=None, max_cat_threshold=None,\n",
       "                              max_cat_to_onehot=None, max_delta_step=None,\n",
       "                              max_depth=7, max_leaves=None,\n",
       "                              min_child_weight=None, missing=nan,\n",
       "                              monotone_constraints=None, multi_strategy=None,\n",
       "                              n_estimators=None, n_jobs=None,\n",
       "                              num_parallel_tree=None, random_state=None, ...))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('standardscaler',\n", " StandardScaler(),\n", " ['distance_mrt',\n", " 'age_transation',\n", " 'transaction_yr', 'Postal',\n", " 'storey_height']),\n", " ('pipeline',\n", " Pipeline(steps=[('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]),\n", " ['town', 'flat_num'])])),\n", " ('xgbregressor',\n", " XGBRegressor(base_scor...\n", " feature_types=None, gamma=1, grow_policy=None,\n", " importance_type=None,\n", " interaction_constraints=None, learning_rate=None,\n", " max_bin=None, max_cat_threshold=None,\n", " max_cat_to_onehot=None, max_delta_step=None,\n", " max_depth=7, max_leaves=None,\n", " min_child_weight=None, missing=nan,\n", " monotone_constraints=None, multi_strategy=None,\n", " n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...))])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model" ] }, { "cell_type": "code", "execution_count": 20, "id": "e4764df8-efdf-42e9-ade6-ff8062b5bac3", "metadata": {}, "outputs": [], "source": [ "#extract feature names#\n", "feature_names = model.feature_names_in_.tolist()\n", "input = [0]*len(feature_names)" ] }, { "cell_type": "code", "execution_count": 21, "id": "9eb9aa6a-4e67-4f51-9566-775fed6ac4ff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['distance_mrt',\n", " 'age_transation',\n", " 'transaction_yr',\n", " 'Postal',\n", " 'storey_height',\n", " 'town',\n", " 'flat_num']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_names" ] }, { "cell_type": "code", "execution_count": 22, "id": "3f2fd14c-2df7-481f-b837-502d717a892b", "metadata": {}, "outputs": [], "source": [ "#Set up mrt_list\n", "mrt_name = []\n", "mrt_loc = []\n", "with open('data/mrt_list.json', 'r') as file:\n", " for line in file:\n", " item = json.loads(line)\n", " mrt_name.append(item['MRT'])\n", " loc = tuple([float(i) for i in item['location']])\n", " mrt_loc.append(loc)" ] }, { "cell_type": "code", "execution_count": 23, "id": "b2d0339f-91bb-4514-890c-b561857af14c", "metadata": {}, "outputs": [], "source": [ "#Test input\n", "Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'" ] }, { "cell_type": "code", "execution_count": 24, "id": "30e85e47-70f7-4b2a-a242-b25b00449276", "metadata": {}, "outputs": [], "source": [ "##POSTAL\n", "Postal_input = int(Postal_)\n", "# Postal_input = 680705\n", "input[feature_names.index('Postal')] = Postal_input" ] }, { "cell_type": "code", "execution_count": 45, "id": "f02d1a92-fc2a-49ed-a3e3-87d976e779c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.onemap.gov.sg/api/common/elastic/search?searchVal=680705&returnGeom=Y&getAddrDetails=Y&pageNum=1\n", "{'found': 1, 'totalNumPages': 1, 'pageNum': 1, 'results': [{'SEARCHVAL': '705 CHOA CHU KANG STREET 53 SINGAPORE 680705', 'BLK_NO': '705', 'ROAD_NAME': 'CHOA CHU KANG STREET 53', 'BUILDING': 'NIL', 'ADDRESS': '705 CHOA CHU KANG STREET 53 SINGAPORE 680705', 'POSTAL': '680705', 'X': '18296.4178872742', 'Y': '41364.999289671', 'LATITUDE': '1.39036325274643', 'LONGITUDE': '103.746124351793'}]}\n" ] }, { "data": { "text/plain": [ "'Choa Chu Kang MRT Station'" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##DISTANCE TO MRT\n", "search_term = Postal_\n", "query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)\n", "resp = requests.get(query_string)\n", "data = json.loads(resp.content)\n", "print(query_string)\n", "print(data)\n", "chosen_result = data['results'][0]\n", "\n", "#Calculate the distance to nearest MRT\n", "distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)\n", "input[feature_names.index('distance_mrt')] = distance_km\n", "nearest_mr" ] }, { "cell_type": "code", "execution_count": 62, "id": "c3c84b64-3932-4226-bb32-d7dfc3551c6d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.5863143456991471, 30, 2024, 680705, 4, 'CHOA CHU KANG', '5 ROOM']" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##STOREY\n", "#Height is input, but then converted to the scale we used for iterating model\n", "height_input = int(storey_)\n", "# height_input = 51\n", "Height = (height_input+2)//3\n", "input[feature_names.index('storey_height')] = Height\n", "\n", "##Town\n", "input[feature_names.index(\"town\")]=town_\n", "\n", "##Room\n", "input[feature_names.index(\"flat_num\")]=room_\n", "\n", "##AGE/ TRANSACTION YEAR [Current default to 2024]\n", "age_input = int(age_)\n", "# age_input = 30\n", "\n", "# Get the current date\n", "current_date = datetime.now()\n", "\n", "input[feature_names.index('age_transation')] = age_input\n", "input[feature_names.index('transaction_yr')] = current_date.year #Default to 2024 first\n", "\n", "input" ] }, { "cell_type": "code", "execution_count": 69, "id": "8b5702ee-3891-4373-b2cf-97c1b1b23e66", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "468224.38" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Create final_dataframe as input to model\n", "\n", "Actual = dict(zip(feature_names,input))\n", "Actual_df = pd.DataFrame(Actual, index=[0])\n", "resale_adj_price = model.predict(Actual_df)[0]\n", "resale_adj_price" ] }, { "cell_type": "code", "execution_count": 70, "id": "e289a971-ca3b-47ac-95db-19c5c97f0ccb", "metadata": {}, "outputs": [], "source": [ "# Calculate the quarter\n", "quarter = (current_date.month - 1) // 3 + 1\n", "# Format the quarter in the desired format\n", "formatted_quarter = f\"{quarter}Q{current_date.year}\"" ] }, { "cell_type": "code", "execution_count": 71, "id": "8b6c863c-cf92-4fe8-964a-8cfbb779dd0f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1Q2024'" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formatted_quarter" ] }, { "cell_type": "code", "execution_count": 74, "id": "c0286b33-90a1-40bd-85ef-9dcc13fd0f9a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "636421.4805825242" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)\n", "RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))\n", "RPI = float(RPI_dict[formatted_quarter])\n", "price = resale_adj_price*(RPI/133.9) \n", "price" ] }, { "cell_type": "code", "execution_count": null, "id": "05a4a4c1-cbe7-4623-ace3-2b6c61f1575c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "686ef72d-5f24-4ed9-bf0f-4fcbc5b3138a", "metadata": {}, "source": [ "## Find last 10 as dataframe" ] }, { "cell_type": "code", "execution_count": 106, "id": "7e9134ec-a778-4ca7-accf-4f500c1a493b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
transactionareastorey_heightresale_price
02023-04114.007 TO 09510000
12022-06132.010 TO 12585000
22022-12109.001 TO 03470000
32021-03121.007 TO 09455000
42020-02109.007 TO 09329000
52019-05114.004 TO 06330000
62017-09109.010 TO 12355000
\n", "
" ], "text/plain": [ " transaction area storey_height resale_price\n", "0 2023-04 114.0 07 TO 09 510000\n", "1 2022-06 132.0 10 TO 12 585000\n", "2 2022-12 109.0 01 TO 03 470000\n", "3 2021-03 121.0 07 TO 09 455000\n", "4 2020-02 109.0 07 TO 09 329000\n", "5 2019-05 114.0 04 TO 06 330000\n", "6 2017-09 109.0 10 TO 12 355000" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_json(\"data/data_features.json\", lines=True)\n", "\n", "Postal_input= 680705\n", "df_filtered = df[df['Postal']==str(Postal_input)]\n", "\n", "df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)\n", "\n", "storey_dict = {\n", " '01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,\n", " '13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,\n", " '25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,\n", " '37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,\n", " '49 TO 51': 17\n", "}\n", "\n", "# Swap keys and values using dictionary comprehension\n", "swapped_dict = {value: key for key, value in storey_dict.items()}\n", "\n", "df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])\n", "df_out = df_output[['transaction','area','storey_height','resale_price']]\n", "\n", "df_out" ] }, { "cell_type": "code", "execution_count": 96, "id": "667f879d-a891-4bf2-9f62-ffd96f3e809f", "metadata": {}, "outputs": [], "source": [ "\n" ] } ], "metadata": { "kernelspec": { "display_name": "HDB_pred", "language": "python", "name": "hdb_pred" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }