sivakornchong commited on
Commit
b9ed1ac
1 Parent(s): ca08c8e

Enter new model (using XGBoost pipeline instead)

Browse files
Files changed (6) hide show
  1. .gitignore +2 -1
  2. data/RPI_dict.csv +30 -0
  3. finalized_model.sav +0 -3
  4. main.py +33 -30
  5. main_old.py +96 -0
  6. test.ipynb +428 -0
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  __pycache__
2
  model/
3
- *sav
 
 
1
  __pycache__
2
  model/
3
+ *sav
4
+ .ipynb_checkpoints
data/RPI_dict.csv ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2Q2024,184
2
+ 1Q2024,182
3
+ 4Q2023,180.2
4
+ 3Q2023,178.5
5
+ 2Q2023,176.2
6
+ 1Q2023,173.6
7
+ 4Q2022,171.9
8
+ 3Q2022,168.1
9
+ 2Q2022,163.9
10
+ 1Q2022,159.5
11
+ 4Q2021,155.7
12
+ 3Q2021,150.6
13
+ 2Q2021,146.4
14
+ 1Q2021,142.2
15
+ 4Q2020,138.1
16
+ 3Q2020,133.9
17
+ 2Q2020,131.9
18
+ 1Q2020,131.5
19
+ 4Q2019,131.5
20
+ 3Q2019,130.9
21
+ 2Q2019,130.8
22
+ 1Q2019,131.0
23
+ 4Q2018,131.4
24
+ 3Q2018,131.6
25
+ 2Q2018,131.7
26
+ 1Q2018,131.6
27
+ 4Q2017,132.6
28
+ 3Q2017,132.8
29
+ 2Q2017,133.7
30
+ 1Q2017,133.9
finalized_model.sav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fcee802bc380de56e88aee0b2fee8a6586391ee036fa11f9e16eba6d21ffa6f
3
- size 813445176
 
 
 
 
main.py CHANGED
@@ -4,24 +4,21 @@ from misc import nearest_mrt
4
  import pickle
5
  import os
6
  import pandas as pd
7
-
8
- ###This is to create MRT names and MRT locations
9
-
10
 
11
  def main_fn(Postal_,age_,town_,storey_,room_):
12
- ##Input structure into model is##
13
- filename = 'finalized_model.sav'
14
-
15
- if os.path.exists("./finalized_model.sav"):
16
  model = pickle.load(open(filename, 'rb'))
17
  print('loaded model')
18
  else:
19
  print('failed loading model')
20
 
21
- #extract feature names#
22
- feature_names = model.feature_names
23
  input = [0]*len(feature_names)
24
- # print(feature_names)
25
 
26
  #Set up mrt_list
27
  mrt_name = []
@@ -33,16 +30,17 @@ def main_fn(Postal_,age_,town_,storey_,room_):
33
  loc = tuple([float(i) for i in item['location']])
34
  mrt_loc.append(loc)
35
 
36
- #Query for latitude and longitude
37
-
 
38
  ##POSTAL
39
  Postal_input = int(Postal_)
40
  # Postal_input = 680705
41
  input[feature_names.index('Postal')] = Postal_input
42
 
43
  ##DISTANCE TO MRT
44
- search_term = Postal_input
45
- query_string='https://developers.onemap.sg/commonapi/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
46
  resp = requests.get(query_string)
47
  data = json.loads(resp.content)
48
  print(query_string)
@@ -60,33 +58,38 @@ def main_fn(Postal_,age_,town_,storey_,room_):
60
  Height = (height_input+2)//3
61
  input[feature_names.index('storey_height')] = Height
62
 
63
- ##TOWN
64
- town_input = town_
65
- # town_input = 'CHOA CHU KANG'
66
- input[feature_names.index("town_"+town_input)] = 1
67
 
68
- ##ROOM
69
- room_input = room_
70
- # room_input = '4 ROOM'
71
- input[feature_names.index("flat_num_"+room_input)] = 1
72
 
73
- ##AGE/ TRANSACTION YEAR [Current default to 2022]
74
  age_input = int(age_)
75
  # age_input = 30
76
- input[feature_names.index('age_transation')] = age_input
77
- input[feature_names.index('transaction_yr')] = 2022 #Default to 2022 first
78
 
79
- #Create final_dataframe as input to model
 
 
 
 
80
 
 
81
  Actual = dict(zip(feature_names,input))
82
  Actual_df = pd.DataFrame(Actual, index=[0])
83
 
 
84
  resale_adj_price = model.predict(Actual_df)[0]
85
 
86
- #New resale index is set arbitrarily as 170
87
- resale_index = 170
88
- price = resale_adj_price*resale_index/133.9
89
- print(Actual_df)
 
 
 
 
 
90
 
91
  return int(price)
92
 
 
4
  import pickle
5
  import os
6
  import pandas as pd
7
+ import datetime
8
+ from datetime import datetime
 
9
 
10
  def main_fn(Postal_,age_,town_,storey_,room_):
11
+ #Load model
12
+ filename = 'finalized_model2.sav'
13
+ if os.path.exists("./finalized_model2.sav"):
 
14
  model = pickle.load(open(filename, 'rb'))
15
  print('loaded model')
16
  else:
17
  print('failed loading model')
18
 
19
+ #extract feature names
20
+ feature_names = model.feature_names_in_.tolist()
21
  input = [0]*len(feature_names)
 
22
 
23
  #Set up mrt_list
24
  mrt_name = []
 
30
  loc = tuple([float(i) for i in item['location']])
31
  mrt_loc.append(loc)
32
 
33
+ # #Test input
34
+ # Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
35
+
36
  ##POSTAL
37
  Postal_input = int(Postal_)
38
  # Postal_input = 680705
39
  input[feature_names.index('Postal')] = Postal_input
40
 
41
  ##DISTANCE TO MRT
42
+ search_term = Postal_
43
+ query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
44
  resp = requests.get(query_string)
45
  data = json.loads(resp.content)
46
  print(query_string)
 
58
  Height = (height_input+2)//3
59
  input[feature_names.index('storey_height')] = Height
60
 
61
+ ##Town
62
+ input[feature_names.index("town")]=town_
 
 
63
 
64
+ ##Room
65
+ input[feature_names.index("flat_num")]=room_
 
 
66
 
67
+ ##AGE/ TRANSACTION YEAR [Current default to 2024]
68
  age_input = int(age_)
69
  # age_input = 30
 
 
70
 
71
+ # Get the current date
72
+ current_date = datetime.now()
73
+
74
+ input[feature_names.index('age_transation')] = age_input
75
+ input[feature_names.index('transaction_yr')] = current_date.year #Default to 2024 first
76
 
77
+ # Create final_dataframe as input to model
78
  Actual = dict(zip(feature_names,input))
79
  Actual_df = pd.DataFrame(Actual, index=[0])
80
 
81
+ # Use model to predict adjusted price
82
  resale_adj_price = model.predict(Actual_df)[0]
83
 
84
+ # Readjust back to actual price
85
+ # Calculate the quarter
86
+ quarter = (current_date.month - 1) // 3 + 1
87
+ # Format the quarter in the desired format
88
+ formatted_quarter = f"{quarter}Q{current_date.year}"
89
+ RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)
90
+ RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))
91
+ RPI = float(RPI_dict[formatted_quarter])
92
+ price = resale_adj_price*(RPI/133.9)
93
 
94
  return int(price)
95
 
main_old.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+ from misc import nearest_mrt
4
+ import pickle
5
+ import os
6
+ import pandas as pd
7
+
8
+ ###This is to create MRT names and MRT locations
9
+
10
+
11
+ def main_fn(Postal_,age_,town_,storey_,room_):
12
+ ##Input structure into model is##
13
+ filename = 'finalized_model.sav'
14
+
15
+ if os.path.exists("./finalized_model.sav"):
16
+ model = pickle.load(open(filename, 'rb'))
17
+ print('loaded model')
18
+ else:
19
+ print('failed loading model')
20
+
21
+ #extract feature names#
22
+ feature_names = model.feature_names
23
+ input = [0]*len(feature_names)
24
+ # print(feature_names)
25
+
26
+ #Set up mrt_list
27
+ mrt_name = []
28
+ mrt_loc = []
29
+ with open('data/mrt_list.json', 'r') as file:
30
+ for line in file:
31
+ item = json.loads(line)
32
+ mrt_name.append(item['MRT'])
33
+ loc = tuple([float(i) for i in item['location']])
34
+ mrt_loc.append(loc)
35
+
36
+ #Query for latitude and longitude
37
+
38
+ ##POSTAL
39
+ Postal_input = int(Postal_)
40
+ # Postal_input = 680705
41
+ input[feature_names.index('Postal')] = Postal_input
42
+
43
+ ##DISTANCE TO MRT
44
+ search_term = Postal_input
45
+ query_string='https://developers.onemap.sg/commonapi/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
46
+ resp = requests.get(query_string)
47
+ data = json.loads(resp.content)
48
+ print(query_string)
49
+ print(data)
50
+ chosen_result = data['results'][0]
51
+
52
+ #Calculate the distance to nearest MRT
53
+ distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)
54
+ input[feature_names.index('distance_mrt')] = distance_km
55
+
56
+ ##STOREY
57
+ #Height is input, but then converted to the scale we used for iterating model
58
+ height_input = int(storey_)
59
+ # height_input = 51
60
+ Height = (height_input+2)//3
61
+ input[feature_names.index('storey_height')] = Height
62
+
63
+ ##TOWN
64
+ town_input = town_
65
+ # town_input = 'CHOA CHU KANG'
66
+ input[feature_names.index("town_"+town_input)] = 1
67
+
68
+ ##ROOM
69
+ room_input = room_
70
+ # room_input = '4 ROOM'
71
+ input[feature_names.index("flat_num_"+room_input)] = 1
72
+
73
+ ##AGE/ TRANSACTION YEAR [Current default to 2022]
74
+ age_input = int(age_)
75
+ # age_input = 30
76
+ input[feature_names.index('age_transation')] = age_input
77
+ input[feature_names.index('transaction_yr')] = 2022 #Default to 2022 first
78
+
79
+ #Create final_dataframe as input to model
80
+
81
+ Actual = dict(zip(feature_names,input))
82
+ Actual_df = pd.DataFrame(Actual, index=[0])
83
+
84
+ resale_adj_price = model.predict(Actual_df)[0]
85
+
86
+ #New resale index is set arbitrarily as 170
87
+ resale_index = 170
88
+ price = resale_adj_price*resale_index/133.9
89
+ print(Actual_df)
90
+
91
+ return int(price)
92
+
93
+ if __name__ == "__main__":
94
+ Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
95
+ price = main_fn(Postal_,age_,town_,storey_,room_)
96
+ print(price)
test.ipynb ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 64,
6
+ "id": "a94c4760-bcad-4c09-83e7-e5391b059b59",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import json\n",
11
+ "import requests\n",
12
+ "from misc import nearest_mrt\n",
13
+ "import pickle\n",
14
+ "import os\n",
15
+ "import pandas as pd\n",
16
+ "import datetime\n",
17
+ "from datetime import datetime"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 5,
23
+ "id": "dfd76296-5048-433b-a29a-cc073dd9d814",
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "name": "stdout",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "loaded model\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "filename = 'finalized_model2.sav'\n",
36
+ "\n",
37
+ "if os.path.exists(\"./finalized_model2.sav\"):\n",
38
+ " model = pickle.load(open(filename, 'rb'))\n",
39
+ " print('loaded model')\n",
40
+ "else:\n",
41
+ " print('failed loading model')"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 8,
47
+ "id": "361df0d9-1659-42ac-9dca-8cdde2ac3a15",
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "data": {
52
+ "text/html": [
53
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
54
+ " ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;,\n",
55
+ " StandardScaler(),\n",
56
+ " [&#x27;distance_mrt&#x27;,\n",
57
+ " &#x27;age_transation&#x27;,\n",
58
+ " &#x27;transaction_yr&#x27;, &#x27;Postal&#x27;,\n",
59
+ " &#x27;storey_height&#x27;]),\n",
60
+ " (&#x27;pipeline&#x27;,\n",
61
+ " Pipeline(steps=[(&#x27;onehotencoder&#x27;,\n",
62
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
63
+ " sparse_output=False))]),\n",
64
+ " [&#x27;town&#x27;, &#x27;flat_num&#x27;])])),\n",
65
+ " (&#x27;xgbregressor&#x27;,\n",
66
+ " XGBRegressor(base_scor...\n",
67
+ " feature_types=None, gamma=1, grow_policy=None,\n",
68
+ " importance_type=None,\n",
69
+ " interaction_constraints=None, learning_rate=None,\n",
70
+ " max_bin=None, max_cat_threshold=None,\n",
71
+ " max_cat_to_onehot=None, max_delta_step=None,\n",
72
+ " max_depth=7, max_leaves=None,\n",
73
+ " min_child_weight=None, missing=nan,\n",
74
+ " monotone_constraints=None, multi_strategy=None,\n",
75
+ " n_estimators=None, n_jobs=None,\n",
76
+ " num_parallel_tree=None, random_state=None, ...))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;columntransformer&#x27;,\n",
77
+ " ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;,\n",
78
+ " StandardScaler(),\n",
79
+ " [&#x27;distance_mrt&#x27;,\n",
80
+ " &#x27;age_transation&#x27;,\n",
81
+ " &#x27;transaction_yr&#x27;, &#x27;Postal&#x27;,\n",
82
+ " &#x27;storey_height&#x27;]),\n",
83
+ " (&#x27;pipeline&#x27;,\n",
84
+ " Pipeline(steps=[(&#x27;onehotencoder&#x27;,\n",
85
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
86
+ " sparse_output=False))]),\n",
87
+ " [&#x27;town&#x27;, &#x27;flat_num&#x27;])])),\n",
88
+ " (&#x27;xgbregressor&#x27;,\n",
89
+ " XGBRegressor(base_scor...\n",
90
+ " feature_types=None, gamma=1, grow_policy=None,\n",
91
+ " importance_type=None,\n",
92
+ " interaction_constraints=None, learning_rate=None,\n",
93
+ " max_bin=None, max_cat_threshold=None,\n",
94
+ " max_cat_to_onehot=None, max_delta_step=None,\n",
95
+ " max_depth=7, max_leaves=None,\n",
96
+ " min_child_weight=None, missing=nan,\n",
97
+ " monotone_constraints=None, multi_strategy=None,\n",
98
+ " n_estimators=None, n_jobs=None,\n",
99
+ " num_parallel_tree=None, random_state=None, ...))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">columntransformer: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;, StandardScaler(),\n",
100
+ " [&#x27;distance_mrt&#x27;, &#x27;age_transation&#x27;,\n",
101
+ " &#x27;transaction_yr&#x27;, &#x27;Postal&#x27;,\n",
102
+ " &#x27;storey_height&#x27;]),\n",
103
+ " (&#x27;pipeline&#x27;,\n",
104
+ " Pipeline(steps=[(&#x27;onehotencoder&#x27;,\n",
105
+ " OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
106
+ " sparse_output=False))]),\n",
107
+ " [&#x27;town&#x27;, &#x27;flat_num&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">standardscaler</label><div class=\"sk-toggleable__content\"><pre>[&#x27;distance_mrt&#x27;, &#x27;age_transation&#x27;, &#x27;transaction_yr&#x27;, &#x27;Postal&#x27;, &#x27;storey_height&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">StandardScaler</label><div class=\"sk-toggleable__content\"><pre>StandardScaler()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">pipeline</label><div class=\"sk-toggleable__content\"><pre>[&#x27;town&#x27;, &#x27;flat_num&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div></div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBRegressor</label><div class=\"sk-toggleable__content\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
108
+ " colsample_bylevel=None, colsample_bynode=None,\n",
109
+ " colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
110
+ " enable_categorical=False, eval_metric=None, feature_types=None,\n",
111
+ " gamma=1, grow_policy=None, importance_type=None,\n",
112
+ " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
113
+ " max_cat_threshold=None, max_cat_to_onehot=None,\n",
114
+ " max_delta_step=None, max_depth=7, max_leaves=None,\n",
115
+ " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
116
+ " multi_strategy=None, n_estimators=None, n_jobs=None,\n",
117
+ " num_parallel_tree=None, random_state=None, ...)</pre></div></div></div></div></div></div></div>"
118
+ ],
119
+ "text/plain": [
120
+ "Pipeline(steps=[('columntransformer',\n",
121
+ " ColumnTransformer(transformers=[('standardscaler',\n",
122
+ " StandardScaler(),\n",
123
+ " ['distance_mrt',\n",
124
+ " 'age_transation',\n",
125
+ " 'transaction_yr', 'Postal',\n",
126
+ " 'storey_height']),\n",
127
+ " ('pipeline',\n",
128
+ " Pipeline(steps=[('onehotencoder',\n",
129
+ " OneHotEncoder(handle_unknown='ignore',\n",
130
+ " sparse_output=False))]),\n",
131
+ " ['town', 'flat_num'])])),\n",
132
+ " ('xgbregressor',\n",
133
+ " XGBRegressor(base_scor...\n",
134
+ " feature_types=None, gamma=1, grow_policy=None,\n",
135
+ " importance_type=None,\n",
136
+ " interaction_constraints=None, learning_rate=None,\n",
137
+ " max_bin=None, max_cat_threshold=None,\n",
138
+ " max_cat_to_onehot=None, max_delta_step=None,\n",
139
+ " max_depth=7, max_leaves=None,\n",
140
+ " min_child_weight=None, missing=nan,\n",
141
+ " monotone_constraints=None, multi_strategy=None,\n",
142
+ " n_estimators=None, n_jobs=None,\n",
143
+ " num_parallel_tree=None, random_state=None, ...))])"
144
+ ]
145
+ },
146
+ "execution_count": 8,
147
+ "metadata": {},
148
+ "output_type": "execute_result"
149
+ }
150
+ ],
151
+ "source": [
152
+ "model"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": 20,
158
+ "id": "e4764df8-efdf-42e9-ade6-ff8062b5bac3",
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "#extract feature names#\n",
163
+ "feature_names = model.feature_names_in_.tolist()\n",
164
+ "input = [0]*len(feature_names)"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": 21,
170
+ "id": "9eb9aa6a-4e67-4f51-9566-775fed6ac4ff",
171
+ "metadata": {},
172
+ "outputs": [
173
+ {
174
+ "data": {
175
+ "text/plain": [
176
+ "['distance_mrt',\n",
177
+ " 'age_transation',\n",
178
+ " 'transaction_yr',\n",
179
+ " 'Postal',\n",
180
+ " 'storey_height',\n",
181
+ " 'town',\n",
182
+ " 'flat_num']"
183
+ ]
184
+ },
185
+ "execution_count": 21,
186
+ "metadata": {},
187
+ "output_type": "execute_result"
188
+ }
189
+ ],
190
+ "source": [
191
+ "feature_names"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 22,
197
+ "id": "3f2fd14c-2df7-481f-b837-502d717a892b",
198
+ "metadata": {},
199
+ "outputs": [],
200
+ "source": [
201
+ "#Set up mrt_list\n",
202
+ "mrt_name = []\n",
203
+ "mrt_loc = []\n",
204
+ "with open('data/mrt_list.json', 'r') as file:\n",
205
+ " for line in file:\n",
206
+ " item = json.loads(line)\n",
207
+ " mrt_name.append(item['MRT'])\n",
208
+ " loc = tuple([float(i) for i in item['location']])\n",
209
+ " mrt_loc.append(loc)"
210
+ ]
211
+ },
212
+ {
213
+ "cell_type": "code",
214
+ "execution_count": 23,
215
+ "id": "b2d0339f-91bb-4514-890c-b561857af14c",
216
+ "metadata": {},
217
+ "outputs": [],
218
+ "source": [
219
+ "#Test input\n",
220
+ "Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": 24,
226
+ "id": "30e85e47-70f7-4b2a-a242-b25b00449276",
227
+ "metadata": {},
228
+ "outputs": [],
229
+ "source": [
230
+ "##POSTAL\n",
231
+ "Postal_input = int(Postal_)\n",
232
+ "# Postal_input = 680705\n",
233
+ "input[feature_names.index('Postal')] = Postal_input"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": 45,
239
+ "id": "f02d1a92-fc2a-49ed-a3e3-87d976e779c9",
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "name": "stdout",
244
+ "output_type": "stream",
245
+ "text": [
246
+ "https://www.onemap.gov.sg/api/common/elastic/search?searchVal=680705&returnGeom=Y&getAddrDetails=Y&pageNum=1\n",
247
+ "{'found': 1, 'totalNumPages': 1, 'pageNum': 1, 'results': [{'SEARCHVAL': '705 CHOA CHU KANG STREET 53 SINGAPORE 680705', 'BLK_NO': '705', 'ROAD_NAME': 'CHOA CHU KANG STREET 53', 'BUILDING': 'NIL', 'ADDRESS': '705 CHOA CHU KANG STREET 53 SINGAPORE 680705', 'POSTAL': '680705', 'X': '18296.4178872742', 'Y': '41364.999289671', 'LATITUDE': '1.39036325274643', 'LONGITUDE': '103.746124351793'}]}\n"
248
+ ]
249
+ },
250
+ {
251
+ "data": {
252
+ "text/plain": [
253
+ "'Choa Chu Kang MRT Station'"
254
+ ]
255
+ },
256
+ "execution_count": 45,
257
+ "metadata": {},
258
+ "output_type": "execute_result"
259
+ }
260
+ ],
261
+ "source": [
262
+ "##DISTANCE TO MRT\n",
263
+ "search_term = Postal_\n",
264
+ "query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)\n",
265
+ "resp = requests.get(query_string)\n",
266
+ "data = json.loads(resp.content)\n",
267
+ "print(query_string)\n",
268
+ "print(data)\n",
269
+ "chosen_result = data['results'][0]\n",
270
+ "\n",
271
+ "#Calculate the distance to nearest MRT\n",
272
+ "distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)\n",
273
+ "input[feature_names.index('distance_mrt')] = distance_km\n",
274
+ "nearest_mr"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": 62,
280
+ "id": "c3c84b64-3932-4226-bb32-d7dfc3551c6d",
281
+ "metadata": {},
282
+ "outputs": [
283
+ {
284
+ "data": {
285
+ "text/plain": [
286
+ "[0.5863143456991471, 30, 2024, 680705, 4, 'CHOA CHU KANG', '5 ROOM']"
287
+ ]
288
+ },
289
+ "execution_count": 62,
290
+ "metadata": {},
291
+ "output_type": "execute_result"
292
+ }
293
+ ],
294
+ "source": [
295
+ "##STOREY\n",
296
+ "#Height is input, but then converted to the scale we used for iterating model\n",
297
+ "height_input = int(storey_)\n",
298
+ "# height_input = 51\n",
299
+ "Height = (height_input+2)//3\n",
300
+ "input[feature_names.index('storey_height')] = Height\n",
301
+ "\n",
302
+ "##Town\n",
303
+ "input[feature_names.index(\"town\")]=town_\n",
304
+ "\n",
305
+ "##Room\n",
306
+ "input[feature_names.index(\"flat_num\")]=room_\n",
307
+ "\n",
308
+ "##AGE/ TRANSACTION YEAR [Current default to 2024]\n",
309
+ "age_input = int(age_)\n",
310
+ "# age_input = 30\n",
311
+ "\n",
312
+ "# Get the current date\n",
313
+ "current_date = datetime.now()\n",
314
+ "\n",
315
+ "input[feature_names.index('age_transation')] = age_input\n",
316
+ "input[feature_names.index('transaction_yr')] = current_date.year #Default to 2024 first\n",
317
+ "\n",
318
+ "input"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": 69,
324
+ "id": "8b5702ee-3891-4373-b2cf-97c1b1b23e66",
325
+ "metadata": {},
326
+ "outputs": [
327
+ {
328
+ "data": {
329
+ "text/plain": [
330
+ "468224.38"
331
+ ]
332
+ },
333
+ "execution_count": 69,
334
+ "metadata": {},
335
+ "output_type": "execute_result"
336
+ }
337
+ ],
338
+ "source": [
339
+ "#Create final_dataframe as input to model\n",
340
+ "\n",
341
+ "Actual = dict(zip(feature_names,input))\n",
342
+ "Actual_df = pd.DataFrame(Actual, index=[0])\n",
343
+ "resale_adj_price = model.predict(Actual_df)[0]\n",
344
+ "resale_adj_price"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 70,
350
+ "id": "e289a971-ca3b-47ac-95db-19c5c97f0ccb",
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "# Calculate the quarter\n",
355
+ "quarter = (current_date.month - 1) // 3 + 1\n",
356
+ "# Format the quarter in the desired format\n",
357
+ "formatted_quarter = f\"{quarter}Q{current_date.year}\""
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": 71,
363
+ "id": "8b6c863c-cf92-4fe8-964a-8cfbb779dd0f",
364
+ "metadata": {},
365
+ "outputs": [
366
+ {
367
+ "data": {
368
+ "text/plain": [
369
+ "'1Q2024'"
370
+ ]
371
+ },
372
+ "execution_count": 71,
373
+ "metadata": {},
374
+ "output_type": "execute_result"
375
+ }
376
+ ],
377
+ "source": [
378
+ "formatted_quarter"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": 74,
384
+ "id": "c0286b33-90a1-40bd-85ef-9dcc13fd0f9a",
385
+ "metadata": {},
386
+ "outputs": [
387
+ {
388
+ "data": {
389
+ "text/plain": [
390
+ "636421.4805825242"
391
+ ]
392
+ },
393
+ "execution_count": 74,
394
+ "metadata": {},
395
+ "output_type": "execute_result"
396
+ }
397
+ ],
398
+ "source": [
399
+ "RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)\n",
400
+ "RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))\n",
401
+ "RPI = float(RPI_dict[formatted_quarter])\n",
402
+ "price = resale_adj_price*(RPI/133.9) \n",
403
+ "price"
404
+ ]
405
+ }
406
+ ],
407
+ "metadata": {
408
+ "kernelspec": {
409
+ "display_name": "HDB_pred",
410
+ "language": "python",
411
+ "name": "hdb_pred"
412
+ },
413
+ "language_info": {
414
+ "codemirror_mode": {
415
+ "name": "ipython",
416
+ "version": 3
417
+ },
418
+ "file_extension": ".py",
419
+ "mimetype": "text/x-python",
420
+ "name": "python",
421
+ "nbconvert_exporter": "python",
422
+ "pygments_lexer": "ipython3",
423
+ "version": "3.11.7"
424
+ }
425
+ },
426
+ "nbformat": 4,
427
+ "nbformat_minor": 5
428
+ }