sivakornchong's picture
add code to calculate df
7c9d88c
import json
import requests
from misc import nearest_mrt
import pickle
import os
import pandas as pd
import datetime
from datetime import datetime
def findlast10(postal):
df = pd.read_json("data/data_features.json", lines=True)
df_filtered = df[df['Postal']==str(postal)]
df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)
storey_dict = {
'01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,
'13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,
'25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,
'37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,
'49 TO 51': 17
}
# Swap keys and values using dictionary comprehension
swapped_dict = {value: key for key, value in storey_dict.items()}
df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])
df_out = df_output[['transaction','area','storey_height','resale_price']]
return df_out
def main_fn(Postal_,age_,town_,storey_,room_):
#Load model
filename = 'finalized_model2.sav'
if os.path.exists("./finalized_model2.sav"):
model = pickle.load(open(filename, 'rb'))
print('loaded model')
else:
print('failed loading model')
#extract feature names
feature_names = model.feature_names_in_.tolist()
input = [0]*len(feature_names)
#Set up mrt_list
mrt_name = []
mrt_loc = []
with open('data/mrt_list.json', 'r') as file:
for line in file:
item = json.loads(line)
mrt_name.append(item['MRT'])
loc = tuple([float(i) for i in item['location']])
mrt_loc.append(loc)
# #Test input
# Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
##POSTAL
Postal_input = int(Postal_)
# Postal_input = 680705
input[feature_names.index('Postal')] = Postal_input
##DISTANCE TO MRT
search_term = Postal_input
query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
resp = requests.get(query_string)
data = json.loads(resp.content)
print(query_string)
print(data)
chosen_result = data['results'][0]
#Calculate the distance to nearest MRT
distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)
input[feature_names.index('distance_mrt')] = distance_km
##STOREY
#Height is input, but then converted to the scale we used for iterating model
height_input = int(storey_)
# height_input = 51
Height = (height_input+2)//3
input[feature_names.index('storey_height')] = Height
##Town
input[feature_names.index("town")]=town_
##Room
input[feature_names.index("flat_num")]=room_
##AGE/ TRANSACTION YEAR [Current default to 2024]
age_input = int(age_)
# age_input = 30
# Get the current date
current_date = datetime.now()
input[feature_names.index('age_transation')] = age_input
input[feature_names.index('transaction_yr')] = current_date.year #Default to 2024 first
# Create final_dataframe as input to model
Actual = dict(zip(feature_names,input))
Actual_df = pd.DataFrame(Actual, index=[0])
# Use model to predict adjusted price
resale_adj_price = model.predict(Actual_df)[0]
# Readjust back to actual price
# Calculate the quarter
quarter = (current_date.month - 1) // 3 + 1
# Format the quarter in the desired format
formatted_quarter = f"{quarter}Q{current_date.year}"
RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)
RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))
RPI = float(RPI_dict[formatted_quarter])
price = resale_adj_price*(RPI/133.9)
# Obtain the last 10 transactions with the same postal address
df = findlast10(Postal_input)
return (int(price), df)
if __name__ == "__main__":
Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
item = main_fn(Postal_,age_,town_,storey_,room_)
print(item)