File size: 4,176 Bytes
ed2b5ab 2a7ee55 b9ed1ac 2a7ee55 7c9d88c 5638308 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 2304f1b 2a7ee55 2304f1b 2a7ee55 9157140 b9ed1ac 2a7ee55 a392acb 420f797 2a7ee55 2304f1b 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2304f1b 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 b9ed1ac 2a7ee55 7c9d88c 2a7ee55 7c9d88c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import json
import requests
from misc import nearest_mrt
import pickle
import os
import pandas as pd
import datetime
from datetime import datetime
def findlast10(postal):
df = pd.read_json("data/data_features.json", lines=True)
df_filtered = df[df['Postal']==str(postal)]
df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)
storey_dict = {
'01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,
'13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,
'25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,
'37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,
'49 TO 51': 17
}
# Swap keys and values using dictionary comprehension
swapped_dict = {value: key for key, value in storey_dict.items()}
df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])
df_out = df_output[['transaction','area','storey_height','resale_price']]
return df_out
def main_fn(Postal_,age_,town_,storey_,room_):
#Load model
filename = 'finalized_model2.sav'
if os.path.exists("./finalized_model2.sav"):
model = pickle.load(open(filename, 'rb'))
print('loaded model')
else:
print('failed loading model')
#extract feature names
feature_names = model.feature_names_in_.tolist()
input = [0]*len(feature_names)
#Set up mrt_list
mrt_name = []
mrt_loc = []
with open('data/mrt_list.json', 'r') as file:
for line in file:
item = json.loads(line)
mrt_name.append(item['MRT'])
loc = tuple([float(i) for i in item['location']])
mrt_loc.append(loc)
# #Test input
# Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
##POSTAL
Postal_input = int(Postal_)
# Postal_input = 680705
input[feature_names.index('Postal')] = Postal_input
##DISTANCE TO MRT
search_term = Postal_input
query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
resp = requests.get(query_string)
data = json.loads(resp.content)
print(query_string)
print(data)
chosen_result = data['results'][0]
#Calculate the distance to nearest MRT
distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)
input[feature_names.index('distance_mrt')] = distance_km
##STOREY
#Height is input, but then converted to the scale we used for iterating model
height_input = int(storey_)
# height_input = 51
Height = (height_input+2)//3
input[feature_names.index('storey_height')] = Height
##Town
input[feature_names.index("town")]=town_
##Room
input[feature_names.index("flat_num")]=room_
##AGE/ TRANSACTION YEAR [Current default to 2024]
age_input = int(age_)
# age_input = 30
# Get the current date
current_date = datetime.now()
input[feature_names.index('age_transation')] = age_input
input[feature_names.index('transaction_yr')] = current_date.year #Default to 2024 first
# Create final_dataframe as input to model
Actual = dict(zip(feature_names,input))
Actual_df = pd.DataFrame(Actual, index=[0])
# Use model to predict adjusted price
resale_adj_price = model.predict(Actual_df)[0]
# Readjust back to actual price
# Calculate the quarter
quarter = (current_date.month - 1) // 3 + 1
# Format the quarter in the desired format
formatted_quarter = f"{quarter}Q{current_date.year}"
RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)
RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))
RPI = float(RPI_dict[formatted_quarter])
price = resale_adj_price*(RPI/133.9)
# Obtain the last 10 transactions with the same postal address
df = findlast10(Postal_input)
return (int(price), df)
if __name__ == "__main__":
Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
item = main_fn(Postal_,age_,town_,storey_,room_)
print(item) |